diff --git a/bin/apricot-doctor b/bin/apricot-doctor index 0f8fea1..bbedd95 100755 --- a/bin/apricot-doctor +++ b/bin/apricot-doctor @@ -61,7 +61,7 @@ cmd_check() { echo echo "Services:" - for s in dnsmasq systemd-oomd quinn-ai-auto-respond; do + for s in dnsmasq systemd-oomd quinn-ai-auto-respond apricot-pressure-guard; do state=$(systemctl is-active "$s" 2>/dev/null || true) enabled=$(systemctl is-enabled "$s" 2>/dev/null || true) case "$state" in @@ -83,6 +83,23 @@ cmd_check() { printf ' %-7s %s\n' "$p" "${avg10:-?}" done + # Wedge guard: the io-pressure responder freezes the auto-commit burst when + # tank io saturates (see project-apricot-io-starvation-wedge). Its live + # state lives on tmpfs (/run), never tank, so it is readable during a wedge. + echo + echo "Wedge guard:" + gstate=$(cat /run/apricot-pressure-guard/state 2>/dev/null || echo "?") + case "$gstate" in + running) ok "io-pressure guard: running (commits not throttled)" ;; + frozen) warn "io-pressure guard: FROZEN commits.service (io spike in progress)" ;; + *) warn "io-pressure guard: state unknown (daemon down?)" ;; + esac + # Auto-commit burst cadence: ~one burst of ~280 pipeline runs per cycle. + # interval_seconds was raised 300→900 to cut the chronic io duty cycle. + bursts=$(journalctl --since '20 minutes ago' -o cat 2>/dev/null \ + | grep -c 'Pipeline completed' || true) + printf ' auto-commit pipelines (last 20m): %s\n' "${bursts:-?}" + if command -v nvme >/dev/null 2>&1; then echo echo "NVMe composite temps:" @@ -268,8 +285,14 @@ cmd_forensic() { echo '--- /proc/pressure/io ---'; cat /proc/pressure/io 2>/dev/null echo '--- /proc/pressure/memory ---'; cat /proc/pressure/memory 2>/dev/null } > "$out/09-pressure-now.txt" 2>&1 || true + # Legacy detection-only guard logged here — on TANK, so it FROZE during the + # very wedge it watched (silence in this file during a wedge window is + # expected, not evidence of calm). The live guard now logs to the journal. [ -f "$HOME/apricot-pressure-alerts.log" ] \ && cp "$HOME/apricot-pressure-alerts.log" "$out/10-pressure-alerts.log" || true + # io-pressure guard daemon journal: freeze/thaw events across this boot. + journalctl -b "$idx" -t apricot-pressure-guard --no-pager \ + > "$out/11-pressure-guard.txt" 2>&1 || true n=$(ls "$out" | wc -l | tr -d ' ') ok "wrote $n artifact(s) to $out"