#!/bin/sh
# fleet-status — one-screen dashboard of every agent node, in the terminal.
#
# For each agent host in mesh-hosts.json (ssh_user != null): read its
# data/agent-status.json (locally for this node, over ssh for the rest — using
# the fleet ssh names the agents themselves maintain) and render a table:
#
#   NODE      LOC   ROUTE   HEAD     AGE   HOSTNAME   DISCOVERED
#   fennel    HOME  en0     af54b67  4s    fennel     apricot=10.0.0.118 ...
#
# AGE is seconds since the agent's last cycle — STALE (>90s) means the agent is
# down or wedged on that node. "no status" = agent not yet running new code
# (e.g. waiting on its next pull).
#
# Read-only; safe from anywhere on the mesh.

set -eu

self=$0
while [ -L "$self" ]; do
    link=$(readlink "$self")
    case $link in /*) self=$link ;; *) self=$(dirname "$self")/$link ;; esac
done
root=$(cd "$(dirname "$self")" && pwd)
while [ "$root" != "/" ] && [ ! -f "$root/data/mesh-hosts.json" ]; do root=$(dirname "$root"); done
data_file="$root/data/mesh-hosts.json"
[ -f "$data_file" ] || { echo "fleet-status: cannot locate data/mesh-hosts.json" >&2; exit 1; }
command -v jq >/dev/null || { echo "fleet-status: jq not installed" >&2; exit 1; }

short=$(hostname 2>/dev/null | cut -d. -f1)
now=$(date +%s)

printf '%-11s %-5s %-7s %-9s %-7s %-11s %s\n' NODE LOC ROUTE HEAD AGE HOSTNAME DISCOVERED
jq -r '.hosts[] | select(.ssh_user != null) | .name' "$data_file" | while read -r node; do
    is_self=0
    [ "$node" = "$short" ] && is_self=1
    # Also self if any alias matches our short hostname.
    if [ "$is_self" -eq 0 ]; then
        if jq -e --arg n "$node" --arg h "$short" \
             '.hosts[] | select(.name == $n) | .aliases | index($h)' "$data_file" >/dev/null 2>&1; then
            is_self=1
        fi
    fi

    if [ "$is_self" -eq 1 ]; then
        raw=$(cat "$root/data/agent-status.json" 2>/dev/null || true)
    else
        raw=$(ssh -n -o ConnectTimeout=5 -o BatchMode=yes "$node" \
              'cat ~/net-tools/data/agent-status.json 2>/dev/null' 2>/dev/null || true)
    fi

    if [ -z "$raw" ] || ! printf '%s' "$raw" | jq -e . >/dev/null 2>&1; then
        printf '%-11s %s\n' "$node" "— no status (agent down, unreachable, or awaiting pull)"
        continue
    fi
    printf '%s' "$raw" | jq -r --argjson now "$now" '
        ( $now - .ts ) as $age
        | [ .self,
            (.location // "-"),
            (.lan_route_via // "-"),
            (.head // "-"),
            (if $age > 90 then "STALE" else "\($age)s" end),
            (.hostname | split(".")[0]),
            ( .discovered | to_entries | map("\(.key)=\(.value)") | join(" ") | if . == "" then "-" else . end )
          ] | @tsv
    ' | awk -F'\t' '{printf "%-11s %-5s %-7s %-9s %-7s %-11s %s\n", $1,$2,$3,$4,$5,$6,$7}'
done
