tv-anarchy/Sources/TVAnarchyCore/Library/LocalLLMGrouper.swift

84 lines
4.4 KiB
Swift

import Foundation
/// `ShowGrouper` backed by the local MLX model on plum (recommender's
/// `grouper.py`). Shells into it like `EnrichService`; returns `nil` on any failure
/// so the cheap clustering stands alone. Blocking call off the main actor.
public struct LocalLLMGrouper: ShowGrouper {
public init() {}
public func resolve(cluster: [CachedShow]) -> ShowGroupDecision? {
guard cluster.count > 1 else { return nil }
let entries: [[String: Any]] = cluster.map { s in
["rootDir": s.rootDir, "name": s.name, "category": s.category,
"year": s.year.map { $0 as Any } ?? NSNull(), "seasons": s.seasons]
}
guard let data = try? JSONSerialization.data(withJSONObject: entries),
let json = String(data: data, encoding: .utf8) else { return nil }
let dir = RepoPaths.recommender.path
let cmd = "cd \(Self.shq(dir)) && uv run python -m media_rec.grouper \(Self.shq(json))"
let r = ProcessRunner.runShell(cmd, timeout: 120, cwd: dir)
guard r.ok,
let out = r.stdout.trimmingCharacters(in: .whitespacesAndNewlines).data(using: .utf8),
let d = try? JSONDecoder().decode(Decision.self, from: out),
!d.sameWork.isEmpty else {
if !r.ok { Log.warn("show grouper failed (exit \(r.status)): \(r.stderr.suffix(160))") }
return nil
}
return ShowGroupDecision(canonicalTitle: d.canonicalTitle, sameWork: d.sameWork)
}
private struct Decision: Decodable { let canonicalTitle: String; let sameWork: [String] }
private static func shq(_ s: String) -> String { "'" + s.replacingOccurrences(of: "'", with: "'\\''") + "'" }
}
/// Persists grouping decisions keyed by a cluster signature, so the LLM runs ONCE
/// per distinct cluster (the messy tail is small and stable). A `decider` wraps a
/// grouper with the cache: cache-hit instant; miss grouper, then store.
public enum ShowGroupCache {
private static var url: URL {
FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent(".local/state/tv-anarchy/show-groups.json")
}
/// Stable signature for a cluster its members' rootDirs, sorted. Independent
/// of order, so a cluster maps to the same key across scans.
public static func signature(_ cluster: [CachedShow]) -> String {
cluster.map(\.rootDir).sorted().joined(separator: "\u{1}")
}
public static func load() -> [String: ShowGroupDecision] {
guard let data = try? Data(contentsOf: url),
let raw = try? JSONDecoder().decode([String: CodableDecision].self, from: data) else { return [:] }
return raw.mapValues { ShowGroupDecision(canonicalTitle: $0.canonicalTitle, sameWork: $0.sameWork) }
}
public static func save(_ map: [String: ShowGroupDecision]) {
let raw = map.mapValues { CodableDecision(canonicalTitle: $0.canonicalTitle, sameWork: $0.sameWork) }
guard let data = try? JSONEncoder().encode(raw) else { return }
try? FileManager.default.createDirectory(at: url.deletingLastPathComponent(), withIntermediateDirectories: true)
try? data.write(to: url, options: .atomic)
}
private struct CodableDecision: Codable { let canonicalTitle: String; let sameWork: [String] }
}
/// Cache-fronted decider for `ShowGrouping.combine`: cache-hit instant; miss
/// the wrapped grouper, recording the result. After a combine pass, `cache` holds
/// the updated map and `dirty` says whether anything new was learned (so the
/// caller persists only when needed).
public final class CachedGroupDecider {
private let grouper: any ShowGrouper
public private(set) var cache: [String: ShowGroupDecision]
public private(set) var dirty = false
public init(grouper: any ShowGrouper, cache: [String: ShowGroupDecision] = ShowGroupCache.load()) {
self.grouper = grouper; self.cache = cache
}
public func decide(_ cluster: [CachedShow]) -> ShowGroupDecision? {
let key = ShowGroupCache.signature(cluster)
if let hit = cache[key] { return hit }
guard let decision = grouper.resolve(cluster: cluster) else { return nil }
cache[key] = decision; dirty = true
return decision
}
/// Persist the cache if anything new was learned this pass.
public func persistIfDirty() { if dirty { ShowGroupCache.save(cache); dirty = false } }
}