84 lines
2.2 KiB
Text
84 lines
2.2 KiB
Text
|
|
#!/bin/sh
|
||
|
|
# host-probe <host> [port] — one-shot: print state and exit
|
||
|
|
# host-probe --watch <host> [port] — loop, emit only on state change
|
||
|
|
#
|
||
|
|
# Distinguishes three states by probing layers independently:
|
||
|
|
# up ICMP + TCP accept + SSH banner exchange all succeed
|
||
|
|
# wedged ICMP + TCP accept succeed, banner exchange times out
|
||
|
|
# (kernel networking alive, userspace frozen — classic
|
||
|
|
# D-state / OOM / disk hang signature)
|
||
|
|
# down no ICMP or no TCP accept
|
||
|
|
#
|
||
|
|
# Suitable both as a standalone check and as the command body for the
|
||
|
|
# Monitor tool (one stdout line per state change).
|
||
|
|
#
|
||
|
|
# Env:
|
||
|
|
# HOST_PROBE_INTERVAL seconds between polls in --watch mode (default 30)
|
||
|
|
# HOST_PROBE_TIMEOUT per-probe timeout in seconds (default 3)
|
||
|
|
|
||
|
|
set -eu
|
||
|
|
|
||
|
|
interval=${HOST_PROBE_INTERVAL:-30}
|
||
|
|
timeout=${HOST_PROBE_TIMEOUT:-3}
|
||
|
|
|
||
|
|
usage() {
|
||
|
|
sed -n '2,/^$/p' "$0" | sed 's/^# \{0,1\}//'
|
||
|
|
exit 2
|
||
|
|
}
|
||
|
|
|
||
|
|
watch=false
|
||
|
|
case "${1:-}" in
|
||
|
|
''|-h|--help|help) usage ;;
|
||
|
|
--watch) watch=true; shift ;;
|
||
|
|
esac
|
||
|
|
[ $# -ge 1 ] && [ $# -le 2 ] || usage
|
||
|
|
|
||
|
|
host=$1
|
||
|
|
port=${2:-22}
|
||
|
|
|
||
|
|
probe_icmp() {
|
||
|
|
ping -c1 -W"$timeout" "$host" >/dev/null 2>&1
|
||
|
|
}
|
||
|
|
|
||
|
|
probe_tcp() {
|
||
|
|
# -G is the BSD/macOS connect timeout flag; falls back to -w on Linux nc.
|
||
|
|
nc -z -G"$timeout" "$host" "$port" >/dev/null 2>&1 \
|
||
|
|
|| nc -z -w"$timeout" "$host" "$port" >/dev/null 2>&1
|
||
|
|
}
|
||
|
|
|
||
|
|
probe_banner() {
|
||
|
|
# SSH banner arrives unsolicited within milliseconds on a healthy sshd.
|
||
|
|
# Frozen userspace: TCP accepts but no banner ever lands.
|
||
|
|
banner=$(
|
||
|
|
( nc -G"$timeout" "$host" "$port" </dev/null &
|
||
|
|
nc_pid=$!
|
||
|
|
( sleep "$timeout"; kill "$nc_pid" 2>/dev/null ) &
|
||
|
|
wait "$nc_pid" 2>/dev/null ) 2>/dev/null | head -c 100
|
||
|
|
)
|
||
|
|
[ -n "$banner" ]
|
||
|
|
}
|
||
|
|
|
||
|
|
classify() {
|
||
|
|
if ! probe_icmp; then echo down; return; fi
|
||
|
|
if ! probe_tcp; then echo down; return; fi
|
||
|
|
if ! probe_banner; then echo wedged; return; fi
|
||
|
|
echo up
|
||
|
|
}
|
||
|
|
|
||
|
|
stamp() { date -u +%H:%M:%SZ; }
|
||
|
|
|
||
|
|
if [ "$watch" = false ]; then
|
||
|
|
classify
|
||
|
|
exit 0
|
||
|
|
fi
|
||
|
|
|
||
|
|
prev=""
|
||
|
|
while :; do
|
||
|
|
state=$(classify)
|
||
|
|
if [ "$state" != "$prev" ]; then
|
||
|
|
echo "[$(stamp)] $host:$port $state"
|
||
|
|
prev=$state
|
||
|
|
fi
|
||
|
|
sleep "$interval"
|
||
|
|
done
|