#!/usr/bin/env bash # # Deploy the headless `claire agent` peer node to a Linux host (apricot|black). # Runs FROM plum. Idempotent. Code + systemd unit + peer config (injects plum's # sync_secret so the host can sync to plum). # # scripts/deploy-agent.sh apricot # # Requires: `remote-run` on PATH (~/Code/@scripts/session-tools), ssh access, # uv + python3.12+ on the remote, and NTP-synced clocks (HMAC skew window 300s). set -euo pipefail HOST="${1:?usage: deploy-agent.sh }" SRC="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" REMOTE_DIR="Code/@projects/@claire" # relative to remote $HOME PLUM_TOML="${CLAIRE_TOML:-$HOME/.config/claire/claire.toml}" say() { printf '\033[1;35m▸\033[0m %s\n' "$*"; } # Plum's bind URL — the peer's event-sync target. The HMAC secret is NO LONGER # injected here: it lives in the vault (~/.vault/claire-sync-secret.txt), seeded # below and read at runtime, so rotation is just a vault push. PLUM_URL="$("$SRC/.venv/bin/python" - "$PLUM_TOML" <<'PY' import sys, tomllib, pathlib c = tomllib.loads(pathlib.Path(sys.argv[1]).read_text()) web = c.get("web", {}) host = web.get("host", "127.0.0.1") if host in ("0.0.0.0", "::", ""): host = "127.0.0.1" print(f"http://{host}:{web.get('port', 8765)}") PY )" say "plum peer URL = $PLUM_URL" # Resolve a reachable SSH transport. The host LABEL stays $HOST (claire # identity / sessions.host / per_host config), but the plum↔host route flaps: # `.lan` is unreachable off-site and the direct WG relay can drop, so fall back # to the `-wg` (direct WireGuard) then `-j` (black jump-host) aliases defined in # ~/.ssh/config. Only the bare ssh/rsync legs need this — `remote-run` does its # own routing. Override with CLAIRE_SSH_ALIAS= to force one. say "[$HOST] resolve ssh transport + clock" SSH="" for cand in ${CLAIRE_SSH_ALIAS:-"$HOST" "${HOST}-wg" "${HOST}-j"}; do if ssh -o ConnectTimeout=8 -o BatchMode=yes "$cand" 'true' 2>/dev/null; then SSH="$cand"; break fi done [ -n "$SSH" ] || { echo "ERROR: no reachable ssh transport for $HOST (tried ${CLAIRE_SSH_ALIAS:-$HOST $HOST-wg $HOST-j})" >&2; exit 1; } [ "$SSH" = "$HOST" ] || say "[$HOST] direct route down — using ssh transport '$SSH'" ssh "$SSH" 'timedatectl show -p NTPSynchronized --value 2>/dev/null || echo unknown' say "[$HOST] rsync source" ssh "$SSH" "mkdir -p ~/$REMOTE_DIR" rsync -az --delete -e ssh \ --exclude='.venv/' --exclude='.git/' --exclude='__pycache__/' \ --exclude='*.pyc' --exclude='.pytest_cache/' --exclude='.ruff_cache/' \ --exclude='claire.toml' \ --exclude='src/claire/web/app/node_modules/' \ --exclude='src/claire/web/app/dist/' \ "$SRC/" "${SSH}:${REMOTE_DIR}/" say "[$HOST] install (uv if present, else python venv+pip) + init" remote-run "$HOST" "export PATH=\"\$HOME/.local/bin:\$PATH\"; cd ~/$REMOTE_DIR && if command -v uv >/dev/null 2>&1; then { [ -d .venv ] || uv venv; }; uv pip install -e .; else { [ -d .venv ] || python3 -m venv .venv; }; .venv/bin/pip install -q -e .; fi && .venv/bin/claire init" say "[$HOST] seed vault (BEFORE agent starts — it reads the HMAC secret from here)" ssh "$SSH" 'mkdir -p ~/.vault && chmod 700 ~/.vault' rsync -az --no-owner --no-group --chmod=D700,F600 -e ssh \ --exclude='.vault-backups/' --exclude='*.prev.txt' \ "$HOME/.vault/" "${SSH}:.vault/" # Gate: the agent will 401 forever without the shared secret present. ssh "$SSH" '[ -s ~/.vault/claire-sync-secret.txt ]' \ || { echo "ERROR: ~/.vault/claire-sync-secret.txt missing on $HOST after seed" >&2; exit 1; } say "[$HOST] configure peer (url only — secret is vault-sourced)" remote-run "$HOST" "cd ~/$REMOTE_DIR && .venv/bin/claire agent add-peer --url '$PLUM_URL' && chmod 600 ~/.config/claire/claire.toml" say "[$HOST] enable local orchestrator ([] claire) — MCP → plum's central endpoint" remote-run "$HOST" "cd ~/$REMOTE_DIR && .venv/bin/claire agent enable-orchestrator --mcp-url '$PLUM_URL/mcp/' && chmod 600 ~/.config/claire/claire.toml" say "[$HOST] install + enable systemd --user unit" remote-run "$HOST" " mkdir -p ~/.config/systemd/user cp ~/$REMOTE_DIR/deployments/systemd/claire-agent.service ~/.config/systemd/user/ systemctl --user daemon-reload systemctl --user enable claire-agent.service # restart (not just enable --now) so a redeploy actually loads the new code. systemctl --user restart claire-agent.service loginctl enable-linger \$(whoami) 2>/dev/null || true sleep 2 # Real gate: is-active is non-zero iff the unit failed to come up. The status # dump below is cosmetic — piping to head closes the pipe early (SIGPIPE), so # keep it non-fatal or it false-aborts an otherwise-healthy deploy. systemctl --user is-active claire-agent.service systemctl --user --no-pager status claire-agent.service 2>&1 | head -5 || true " say "[$HOST] done."