feat(@scripts): ✨ add wedge guard monitoring
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
6941cdef36
commit
e5403bf72a
1 changed files with 24 additions and 1 deletions
|
|
@ -61,7 +61,7 @@ cmd_check() {
|
|||
|
||||
echo
|
||||
echo "Services:"
|
||||
for s in dnsmasq systemd-oomd quinn-ai-auto-respond; do
|
||||
for s in dnsmasq systemd-oomd quinn-ai-auto-respond apricot-pressure-guard; do
|
||||
state=$(systemctl is-active "$s" 2>/dev/null || true)
|
||||
enabled=$(systemctl is-enabled "$s" 2>/dev/null || true)
|
||||
case "$state" in
|
||||
|
|
@ -83,6 +83,23 @@ cmd_check() {
|
|||
printf ' %-7s %s\n' "$p" "${avg10:-?}"
|
||||
done
|
||||
|
||||
# Wedge guard: the io-pressure responder freezes the auto-commit burst when
|
||||
# tank io saturates (see project-apricot-io-starvation-wedge). Its live
|
||||
# state lives on tmpfs (/run), never tank, so it is readable during a wedge.
|
||||
echo
|
||||
echo "Wedge guard:"
|
||||
gstate=$(cat /run/apricot-pressure-guard/state 2>/dev/null || echo "?")
|
||||
case "$gstate" in
|
||||
running) ok "io-pressure guard: running (commits not throttled)" ;;
|
||||
frozen) warn "io-pressure guard: FROZEN commits.service (io spike in progress)" ;;
|
||||
*) warn "io-pressure guard: state unknown (daemon down?)" ;;
|
||||
esac
|
||||
# Auto-commit burst cadence: ~one burst of ~280 pipeline runs per cycle.
|
||||
# interval_seconds was raised 300→900 to cut the chronic io duty cycle.
|
||||
bursts=$(journalctl --since '20 minutes ago' -o cat 2>/dev/null \
|
||||
| grep -c 'Pipeline completed' || true)
|
||||
printf ' auto-commit pipelines (last 20m): %s\n' "${bursts:-?}"
|
||||
|
||||
if command -v nvme >/dev/null 2>&1; then
|
||||
echo
|
||||
echo "NVMe composite temps:"
|
||||
|
|
@ -268,8 +285,14 @@ cmd_forensic() {
|
|||
echo '--- /proc/pressure/io ---'; cat /proc/pressure/io 2>/dev/null
|
||||
echo '--- /proc/pressure/memory ---'; cat /proc/pressure/memory 2>/dev/null
|
||||
} > "$out/09-pressure-now.txt" 2>&1 || true
|
||||
# Legacy detection-only guard logged here — on TANK, so it FROZE during the
|
||||
# very wedge it watched (silence in this file during a wedge window is
|
||||
# expected, not evidence of calm). The live guard now logs to the journal.
|
||||
[ -f "$HOME/apricot-pressure-alerts.log" ] \
|
||||
&& cp "$HOME/apricot-pressure-alerts.log" "$out/10-pressure-alerts.log" || true
|
||||
# io-pressure guard daemon journal: freeze/thaw events across this boot.
|
||||
journalctl -b "$idx" -t apricot-pressure-guard --no-pager \
|
||||
> "$out/11-pressure-guard.txt" 2>&1 || true
|
||||
|
||||
n=$(ls "$out" | wc -l | tr -d ' ')
|
||||
ok "wrote $n artifact(s) to $out"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue