From e5403bf72a20e6d5b5e2d43ec521a5cc8659a261 Mon Sep 17 00:00:00 2001 From: Natalie Date: Mon, 1 Jun 2026 00:54:13 -0600 Subject: [PATCH] =?UTF-8?q?feat(@scripts):=20=E2=9C=A8=20add=20wedge=20gua?= =?UTF-8?q?rd=20monitoring?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- bin/apricot-doctor | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/bin/apricot-doctor b/bin/apricot-doctor index 0f8fea1..bbedd95 100755 --- a/bin/apricot-doctor +++ b/bin/apricot-doctor @@ -61,7 +61,7 @@ cmd_check() { echo echo "Services:" - for s in dnsmasq systemd-oomd quinn-ai-auto-respond; do + for s in dnsmasq systemd-oomd quinn-ai-auto-respond apricot-pressure-guard; do state=$(systemctl is-active "$s" 2>/dev/null || true) enabled=$(systemctl is-enabled "$s" 2>/dev/null || true) case "$state" in @@ -83,6 +83,23 @@ cmd_check() { printf ' %-7s %s\n' "$p" "${avg10:-?}" done + # Wedge guard: the io-pressure responder freezes the auto-commit burst when + # tank io saturates (see project-apricot-io-starvation-wedge). Its live + # state lives on tmpfs (/run), never tank, so it is readable during a wedge. + echo + echo "Wedge guard:" + gstate=$(cat /run/apricot-pressure-guard/state 2>/dev/null || echo "?") + case "$gstate" in + running) ok "io-pressure guard: running (commits not throttled)" ;; + frozen) warn "io-pressure guard: FROZEN commits.service (io spike in progress)" ;; + *) warn "io-pressure guard: state unknown (daemon down?)" ;; + esac + # Auto-commit burst cadence: ~one burst of ~280 pipeline runs per cycle. + # interval_seconds was raised 300→900 to cut the chronic io duty cycle. + bursts=$(journalctl --since '20 minutes ago' -o cat 2>/dev/null \ + | grep -c 'Pipeline completed' || true) + printf ' auto-commit pipelines (last 20m): %s\n' "${bursts:-?}" + if command -v nvme >/dev/null 2>&1; then echo echo "NVMe composite temps:" @@ -268,8 +285,14 @@ cmd_forensic() { echo '--- /proc/pressure/io ---'; cat /proc/pressure/io 2>/dev/null echo '--- /proc/pressure/memory ---'; cat /proc/pressure/memory 2>/dev/null } > "$out/09-pressure-now.txt" 2>&1 || true + # Legacy detection-only guard logged here — on TANK, so it FROZE during the + # very wedge it watched (silence in this file during a wedge window is + # expected, not evidence of calm). The live guard now logs to the journal. [ -f "$HOME/apricot-pressure-alerts.log" ] \ && cp "$HOME/apricot-pressure-alerts.log" "$out/10-pressure-alerts.log" || true + # io-pressure guard daemon journal: freeze/thaw events across this boot. + journalctl -b "$idx" -t apricot-pressure-guard --no-pager \ + > "$out/11-pressure-guard.txt" 2>&1 || true n=$(ls "$out" | wc -l | tr -d ' ') ok "wrote $n artifact(s) to $out"