session-tools/bin/host-probe

84 lines
2.2 KiB
Text
Raw Normal View History

#!/bin/sh
# host-probe <host> [port] — one-shot: print state and exit
# host-probe --watch <host> [port] — loop, emit only on state change
#
# Distinguishes three states by probing layers independently:
# up ICMP + TCP accept + SSH banner exchange all succeed
# wedged ICMP + TCP accept succeed, banner exchange times out
# (kernel networking alive, userspace frozen — classic
# D-state / OOM / disk hang signature)
# down no ICMP or no TCP accept
#
# Suitable both as a standalone check and as the command body for the
# Monitor tool (one stdout line per state change).
#
# Env:
# HOST_PROBE_INTERVAL seconds between polls in --watch mode (default 30)
# HOST_PROBE_TIMEOUT per-probe timeout in seconds (default 3)
set -eu
interval=${HOST_PROBE_INTERVAL:-30}
timeout=${HOST_PROBE_TIMEOUT:-3}
usage() {
sed -n '2,/^$/p' "$0" | sed 's/^# \{0,1\}//'
exit 2
}
watch=false
case "${1:-}" in
''|-h|--help|help) usage ;;
--watch) watch=true; shift ;;
esac
[ $# -ge 1 ] && [ $# -le 2 ] || usage
host=$1
port=${2:-22}
probe_icmp() {
ping -c1 -W"$timeout" "$host" >/dev/null 2>&1
}
probe_tcp() {
# -G is the BSD/macOS connect timeout flag; falls back to -w on Linux nc.
nc -z -G"$timeout" "$host" "$port" >/dev/null 2>&1 \
|| nc -z -w"$timeout" "$host" "$port" >/dev/null 2>&1
}
probe_banner() {
# SSH banner arrives unsolicited within milliseconds on a healthy sshd.
# Frozen userspace: TCP accepts but no banner ever lands.
banner=$(
( nc -G"$timeout" "$host" "$port" </dev/null &
nc_pid=$!
( sleep "$timeout"; kill "$nc_pid" 2>/dev/null ) &
wait "$nc_pid" 2>/dev/null ) 2>/dev/null | head -c 100
)
[ -n "$banner" ]
}
classify() {
if ! probe_icmp; then echo down; return; fi
if ! probe_tcp; then echo down; return; fi
if ! probe_banner; then echo wedged; return; fi
echo up
}
stamp() { date -u +%H:%M:%SZ; }
if [ "$watch" = false ]; then
classify
exit 0
fi
prev=""
while :; do
state=$(classify)
if [ "$state" != "$prev" ]; then
echo "[$(stamp)] $host:$port $state"
prev=$state
fi
sleep "$interval"
done