feat(@scripts): add wedge guard monitoring

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Natalie 2026-06-01 00:54:13 -06:00
parent 6941cdef36
commit e5403bf72a

View file

@ -61,7 +61,7 @@ cmd_check() {
echo
echo "Services:"
for s in dnsmasq systemd-oomd quinn-ai-auto-respond; do
for s in dnsmasq systemd-oomd quinn-ai-auto-respond apricot-pressure-guard; do
state=$(systemctl is-active "$s" 2>/dev/null || true)
enabled=$(systemctl is-enabled "$s" 2>/dev/null || true)
case "$state" in
@ -83,6 +83,23 @@ cmd_check() {
printf ' %-7s %s\n' "$p" "${avg10:-?}"
done
# Wedge guard: the io-pressure responder freezes the auto-commit burst when
# tank io saturates (see project-apricot-io-starvation-wedge). Its live
# state lives on tmpfs (/run), never tank, so it is readable during a wedge.
echo
echo "Wedge guard:"
gstate=$(cat /run/apricot-pressure-guard/state 2>/dev/null || echo "?")
case "$gstate" in
running) ok "io-pressure guard: running (commits not throttled)" ;;
frozen) warn "io-pressure guard: FROZEN commits.service (io spike in progress)" ;;
*) warn "io-pressure guard: state unknown (daemon down?)" ;;
esac
# Auto-commit burst cadence: ~one burst of ~280 pipeline runs per cycle.
# interval_seconds was raised 300→900 to cut the chronic io duty cycle.
bursts=$(journalctl --since '20 minutes ago' -o cat 2>/dev/null \
| grep -c 'Pipeline completed' || true)
printf ' auto-commit pipelines (last 20m): %s\n' "${bursts:-?}"
if command -v nvme >/dev/null 2>&1; then
echo
echo "NVMe composite temps:"
@ -268,8 +285,14 @@ cmd_forensic() {
echo '--- /proc/pressure/io ---'; cat /proc/pressure/io 2>/dev/null
echo '--- /proc/pressure/memory ---'; cat /proc/pressure/memory 2>/dev/null
} > "$out/09-pressure-now.txt" 2>&1 || true
# Legacy detection-only guard logged here — on TANK, so it FROZE during the
# very wedge it watched (silence in this file during a wedge window is
# expected, not evidence of calm). The live guard now logs to the journal.
[ -f "$HOME/apricot-pressure-alerts.log" ] \
&& cp "$HOME/apricot-pressure-alerts.log" "$out/10-pressure-alerts.log" || true
# io-pressure guard daemon journal: freeze/thaw events across this boot.
journalctl -b "$idx" -t apricot-pressure-guard --no-pager \
> "$out/11-pressure-guard.txt" 2>&1 || true
n=$(ls "$out" | wc -l | tr -d ' ')
ok "wrote $n artifact(s) to $out"