import Foundation /// `TitleRefiner` backed by the local MLX model on plum (recommender's /// `title_refiner.py`) — closes the seam declared in `FilenameParser`. Shells /// into the recommender like `LocalLLMGrouper`; returns `nil` on any failure so /// the regex path always stands alone. /// /// The refiner sits on the synchronous parse path, so two guards keep the messy /// tail from stalling a scan: /// - **Result cache** (`~/.local/state/tv-anarchy/title-refinements.json`): /// each distinct messy filename pays for the model exactly once, ever. /// - **Session kill-switch**: consecutive subprocess failures (no `uv`, no MLX, /// no model) disable the refiner for the rest of the process — one scan never /// pays the failure timeout per file. public struct LocalLLMTitleRefiner: TitleRefiner { public init() {} public func refineTitle(from filename: String) -> String? { if let cached = Self.store.cached(filename) { return cached.isEmpty ? nil : cached } guard Self.store.healthy else { return nil } let dir = RepoPaths.recommender.path let cmd = "cd \(Self.shq(dir)) && uv run python -m media_rec.title_refiner \(Self.shq(filename))" let r = ProcessRunner.runShell(cmd, timeout: 90, cwd: dir) guard r.ok, let data = r.stdout.trimmingCharacters(in: .whitespacesAndNewlines).data(using: .utf8), let decoded = try? JSONDecoder().decode(Refined.self, from: data) else { if !r.ok { Log.warn("title refiner failed (exit \(r.status)): \(r.stderr.suffix(160))") } Self.store.recordFailure() return nil } Self.store.recordSuccess() let title = decoded.title.trimmingCharacters(in: .whitespaces) // Cache empties too — "the model has no answer" is also worth remembering. Self.store.remember(filename, title: title) return title.isEmpty ? nil : title } private struct Refined: Decodable { let title: String } private static func shq(_ s: String) -> String { "'" + s.replacingOccurrences(of: "'", with: "'\\''") + "'" } private static let store = RefinementStore() } /// Thread-safe cache + health tracking for the refiner. A class with one lock — /// the refiner is consulted from concurrent scan work. final class RefinementStore: @unchecked Sendable { private let lock = NSLock() private var cache: [String: String] private var consecutiveFailures = 0 private var dirty = false /// After this many subprocess failures in a row, stop trying this session. private static let maxFailures = 2 private static var url: URL { FileManager.default.homeDirectoryForCurrentUser .appendingPathComponent(".local/state/tv-anarchy/title-refinements.json") } init() { if let data = try? Data(contentsOf: Self.url), let map = try? JSONDecoder().decode([String: String].self, from: data) { cache = map } else { cache = [:] } } var healthy: Bool { lock.lock(); defer { lock.unlock() } return consecutiveFailures < Self.maxFailures } func cached(_ filename: String) -> String? { lock.lock(); defer { lock.unlock() } return cache[filename] } func recordFailure() { lock.lock(); defer { lock.unlock() } consecutiveFailures += 1 } func recordSuccess() { lock.lock(); defer { lock.unlock() } consecutiveFailures = 0 } func remember(_ filename: String, title: String) { lock.lock() cache[filename] = title dirty = true let snapshot = cache lock.unlock() // Persist outside the lock; last-writer-wins is fine for a cache. guard let data = try? JSONEncoder().encode(snapshot) else { return } try? FileManager.default.createDirectory(at: Self.url.deletingLastPathComponent(), withIntermediateDirectories: true) try? data.write(to: Self.url, options: .atomic) } }