#!/bin/sh
# wg-dns-sync — render dnsmasq records for the wg1 mesh from data/mesh-hosts.json
# (+ data/lan-state.json overlay) and install to /etc/dnsmasq.d/wg-mesh.conf.
#
# Source of truth: data/mesh-hosts.json (located by walking up from this script,
#                  resolving symlinks first — so it works when invoked via a
#                  PATH symlink in ~/.local/bin, not only from the repo dir).
# Output file:     /etc/dnsmasq.d/wg-mesh.conf
# Daemon:          dnsmasq.service (restarted only if conf changed)
# Runs on:         apricot (the mesh DNS host). Harmless no-op elsewhere if the
#                  listen address isn't local to the box, but only apricot should
#                  install this.
#
# Renders the host records (both views) into one conf, from hosts[]:
#   1. <host>.wg   -> mesh IP (10.9.0.x)
#   2. <host>.lan  -> current LAN IP (lan-state overlay over static seed)
# (The old *.local platform service records are RETIRED — platform uses .com,
#  infra uses .lan — and are no longer rendered here.)
#
# Why a separate conf file (not editing the platform's own dnsmasq confs):
#   the platform's loopback Traefik uses split-horizon 127.0.0.1 records for its
#   own vhosts; mesh clients (phones on DNS=10.9.0.2) need the LAN/mesh IP, so
#   this writes a SECOND conf bound only to the wg1 listen address.
#
# Idempotent: re-run is a no-op if the rendered conf matches what's installed.
#
# Usage:
#   wg-dns-sync            # render + install + restart dnsmasq if changed
#   wg-dns-sync --dry-run  # print rendered conf, no install
#
# Exit codes:
#   0  success (or unchanged no-op)
#   1  missing dependency (jq) / invalid or unlocatable JSON
#   2  sudo required but not available non-interactively
#   3  dnsmasq failed to start after install (rolled back)

set -eu

dry_run=0
[ "${1:-}" = "--dry-run" ] && dry_run=1

# --- locate the repo + data file, surviving symlink invocation -----------------
# Resolve $0's symlink chain portably (macOS has no `readlink -f`), then walk up
# to the directory that actually contains data/mesh-hosts.json.
self=$0
while [ -L "$self" ]; do
    link=$(readlink "$self")
    case $link in
        /*) self=$link ;;
        *)  self=$(dirname "$self")/$link ;;
    esac
done
root=$(cd "$(dirname "$self")" && pwd)
while [ "$root" != "/" ] && [ ! -f "$root/data/mesh-hosts.json" ]; do
    root=$(dirname "$root")
done
data_file="$root/data/mesh-hosts.json"
[ -f "$data_file" ] || { echo "wg-dns-sync: cannot locate data/mesh-hosts.json (from $self)" >&2; exit 1; }

target=/etc/dnsmasq.d/wg-mesh.conf

command -v jq >/dev/null || { echo "wg-dns-sync: jq not installed" >&2; exit 1; }
jq empty "$data_file" || { echo "wg-dns-sync: invalid JSON in $data_file" >&2; exit 1; }

# Segment-aware listen address: if THIS host is a segment's dns_host, bind that
# segment's dns_listen (e.g. citron -> nyc3 -> 127.0.0.1,10.9.0.7); otherwise fall
# back to the legacy global .dnsmasq.listen_address (apricot's historical behavior).
# WG_DNS_SELF overrides self-detection (tests / deliberate ops).
if [ -n "${WG_DNS_SELF:-}" ]; then
    dns_self=$WG_DNS_SELF
else
    dns_self=$(hostname 2>/dev/null | cut -d. -f1); [ -n "$dns_self" ] || dns_self=$(uname -n | cut -d. -f1)
fi
seg_listen=$(jq -r --arg s "$dns_self" '
    (.mesh.segments // {}) | to_entries[]
    | select(.value | type == "object")
    | select(.value.dns_host == $s) | .value.dns_listen' "$data_file" 2>/dev/null | head -1)
if [ -n "$seg_listen" ] && [ "$seg_listen" != "null" ]; then
    listen=$seg_listen
else
    listen=$(jq -r '.dnsmasq.listen_address // empty' "$data_file")
fi
[ -n "$listen" ] || { echo "wg-dns-sync: missing listen address (no segment match and no .dnsmasq.listen_address)" >&2; exit 1; }

overlay='{}'
state_file="$root/data/lan-state.json"
if [ -f "$state_file" ] && jq -e . "$state_file" >/dev/null 2>&1; then
    overlay=$(cat "$state_file")
fi

# --- render --------------------------------------------------------------------
tmp=$(mktemp "${TMPDIR:-/tmp}/wg-mesh.conf.XXXXXX")
trap 'rm -f "$tmp"' EXIT

if command -v sha256sum >/dev/null 2>&1; then
    data_sha=$(sha256sum "$data_file" | awk '{print $1}')
else
    data_sha=$(shasum -a 256 "$data_file" | awk '{print $1}')
fi
when=$(date -u +%Y-%m-%dT%H:%M:%SZ)
host=$(hostname -s 2>/dev/null || hostname)

{
    printf '# Generated by net-tools/bin/wg-dns-sync — DO NOT EDIT MANUALLY\n'
    printf '# To change records: edit data/mesh-hosts.json (+ lan-state.json overlay) and re-run.\n'
    printf '# rendered_at:   %s\n' "$when"
    printf '# rendered_on:   %s\n' "$host"
    printf '# source_sha256: %s\n' "$data_sha"
    if [ -f "$state_file" ]; then
        if command -v sha256sum >/dev/null 2>&1; then
            printf '# lan_state_sha256: %s\n' "$(sha256sum "$state_file" | awk '{print $1}')"
        else
            printf '# lan_state_sha256: %s\n' "$(shasum -a 256 "$state_file" | awk '{print $1}')"
        fi
    fi
    printf '\n'
    printf '# Bind only to the wg1 IP so this view is invisible to LAN/loopback clients\n'
    printf '# (which lilith-local.conf serves with split-horizon 127.0.0.1 records).\n'
    printf 'listen-address=%s\n' "$listen"
    # bind-dynamic (not bind-interfaces): binds the listen-address as it appears,
    # so dnsmasq does not lose the boot race against wg1 coming up.
    printf 'bind-dynamic\n'
    printf '\n'
    printf '# === Mesh host records (.wg -> mesh IP) — from hosts[] ===\n'
    jq -r '
        .hosts[]
        | . as $h
        | ([$h.name] + ($h.aliases // []))[]
          | "address=/\(.).wg/\($h.wg)  # \($h.role)"
    ' "$data_file"
    printf '\n'
    printf '# === LAN host records (.lan -> current LAN IP) — overlay over static seed ===\n'
    jq -r --argjson ov "$overlay" '
        .hosts[]
        | . as $h
        | (($ov[$h.name]) // $h.lan) as $lan
        | select($lan != null)
        | ([$h.name] + ($h.aliases // []))[]
          | "address=/\(.).lan/\($lan)  # \($h.role)"
    ' "$data_file"
} > "$tmp"

if [ "$dry_run" -eq 1 ]; then
    cat "$tmp"
    exit 0
fi

# --- install (idempotent) ------------------------------------------------------
if [ -f "$target" ] && cmp -s "$tmp" "$target"; then
    echo "wg-dns-sync: $target already up to date"
    exit 0
fi

SUDO=
if [ "$(id -u)" -ne 0 ]; then
    if command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null; then
        SUDO="sudo"
    else
        echo "wg-dns-sync: need root to write $target (run with sudo)" >&2
        exit 2
    fi
fi

# Back up the current conf so we can roll back if dnsmasq rejects the new one.
backup=""
if [ -f "$target" ]; then
    backup=/var/lib/wg-mesh.conf.prev
    $SUDO cp "$target" "$backup"
fi

$SUDO cp "$tmp" "$target"
echo "wg-dns-sync: installed $target"

if command -v systemctl >/dev/null 2>&1; then
    if $SUDO systemctl restart dnsmasq 2>/dev/null; then
        echo "wg-dns-sync: dnsmasq restarted"
    else
        echo "wg-dns-sync: dnsmasq failed to restart — rolling back" >&2
        if [ -n "$backup" ]; then
            $SUDO cp "$backup" "$target"
            $SUDO systemctl restart dnsmasq || true
        else
            $SUDO rm -f "$target"
            $SUDO systemctl restart dnsmasq || true
        fi
        exit 3
    fi
else
    echo "wg-dns-sync: systemctl not found — reload dnsmasq manually" >&2
fi
