feat(claire): add session cleanup and kick delay logic

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Natalie 2026-06-02 15:02:47 -07:00
parent c6bd77cc5b
commit 1d431b5056
2 changed files with 43 additions and 17 deletions

View file

@ -0,0 +1 @@
{"sessionId":"c5a10f83-d897-4eb1-aa4f-a4264850dbf3","pid":15738,"procStart":"Mon Jun 1 06:29:14 2026","acquiredAt":1780437249785}

View file

@ -601,6 +601,17 @@ def ensure_running(
and r.cwd.rstrip("/") == norm_target
}
# Kill any pane lingering at this slug before spawning. Reaching here means
# the fast path above found no VALID live session to preserve, so a pane at
# the slug is an orphan from an earlier cycle whose discovery timed out
# (the tmux session spawned, but its uuid never got persisted). Left alone
# they accumulate — and since turns are delivered by slug match, a duplicate
# orchestrator would receive every turn too, splitting delivery. Best-effort.
try:
rcl.kill(match=slug, yes=True)
except RclaudeError:
pass
# Spawn a fresh session. Pass `.mcp.json` via `--mcp-config`. Path must
# be remote-absolute so the spawned `claude` reads it.
try:
@ -612,23 +623,37 @@ def ensure_running(
except RclaudeError:
return None
# Claude doesn't flush its session JSONL to disk until the first user
# message is processed — discovery would otherwise spin until timeout
# on an empty filesystem. Send a no-op kick so the JSONL appears.
# Wait briefly for tmux + claude to reach the prompt before sending.
time.sleep(2.0)
try:
_send_kick(rcl=rcl, cwd=effective_cwd)
except RclaudeError:
# Kick failure isn't fatal — discovery may still succeed if the
# session writes its JSONL for other reasons.
pass
new_uuid = discover_session(
cwd=effective_cwd, host=cfg.orchestrator.host, rclaude=rcl,
timeout_s=discover_timeout_s,
ignore_uuids=pre_uuids,
)
# Claude doesn't flush its session JSONL to disk until it processes its
# first message — until then discovery has nothing to find. The kick both
# introduces Claude to its role and triggers that first flush.
#
# Timing is a race: a freshly spawned `claude` needs a few seconds to
# reach a live input prompt, and a kick sent before then is silently
# dropped — the session never processes it, never flushes, discovery
# fails, and the orchestrator never comes up (observed: panes stuck at
# R:0 with the kick text unconsumed). The delay scales with host load, so
# a single fixed sleep is unreliable. Instead, poll: send the kick, look
# for the new session, and re-send on each miss until it appears or the
# budget is spent. Re-sending is harmless — Claude just re-acknowledges
# once ready. capture-pane can't gate readiness here: the orchestrator may
# live on a remote host, where capture-pane is unsafe.
new_uuid: str | None = None
time.sleep(2.0) # let tmux attach + claude begin launching
deadline = time.time() + discover_timeout_s
while True:
try:
_send_kick(rcl=rcl, cwd=effective_cwd)
except RclaudeError:
pass # transient — the next iteration re-sends
# Clamp to ≥1s so a tiny `discover_timeout_s` still gets one real
# discovery attempt; cap at 4s so a miss re-kicks promptly.
new_uuid = discover_session(
cwd=effective_cwd, host=cfg.orchestrator.host, rclaude=rcl,
timeout_s=max(1.0, min(4.0, deadline - time.time())),
ignore_uuids=pre_uuids,
)
if new_uuid is not None or time.time() >= deadline:
break
if new_uuid is None:
return None
write_session_uuid(