tv-anarchy/Sources/TVAnarchyCore/Metadata/LocalLLMTitleRefiner.swift
Natalie 8f12f470b7 feat(metadata): add local llm title refiner integration
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-06-09 21:10:47 -07:00

101 lines
4 KiB
Swift

import Foundation
/// `TitleRefiner` backed by the local MLX model on plum (recommender's
/// `title_refiner.py`) closes the seam declared in `FilenameParser`. Shells
/// into the recommender like `LocalLLMGrouper`; returns `nil` on any failure so
/// the regex path always stands alone.
///
/// The refiner sits on the synchronous parse path, so two guards keep the messy
/// tail from stalling a scan:
/// - **Result cache** (`~/.local/state/tv-anarchy/title-refinements.json`):
/// each distinct messy filename pays for the model exactly once, ever.
/// - **Session kill-switch**: consecutive subprocess failures (no `uv`, no MLX,
/// no model) disable the refiner for the rest of the process one scan never
/// pays the failure timeout per file.
public struct LocalLLMTitleRefiner: TitleRefiner {
public init() {}
public func refineTitle(from filename: String) -> String? {
if let cached = Self.store.cached(filename) { return cached.isEmpty ? nil : cached }
guard Self.store.healthy else { return nil }
let dir = RepoPaths.recommender.path
let cmd = "cd \(Self.shq(dir)) && uv run python -m media_rec.title_refiner \(Self.shq(filename))"
let r = ProcessRunner.runShell(cmd, timeout: 90, cwd: dir)
guard r.ok,
let data = r.stdout.trimmingCharacters(in: .whitespacesAndNewlines).data(using: .utf8),
let decoded = try? JSONDecoder().decode(Refined.self, from: data) else {
if !r.ok { Log.warn("title refiner failed (exit \(r.status)): \(r.stderr.suffix(160))") }
Self.store.recordFailure()
return nil
}
Self.store.recordSuccess()
let title = decoded.title.trimmingCharacters(in: .whitespaces)
// Cache empties too "the model has no answer" is also worth remembering.
Self.store.remember(filename, title: title)
return title.isEmpty ? nil : title
}
private struct Refined: Decodable { let title: String }
private static func shq(_ s: String) -> String { "'" + s.replacingOccurrences(of: "'", with: "'\\''") + "'" }
private static let store = RefinementStore()
}
/// Thread-safe cache + health tracking for the refiner. A class with one lock
/// the refiner is consulted from concurrent scan work.
final class RefinementStore: @unchecked Sendable {
private let lock = NSLock()
private var cache: [String: String]
private var consecutiveFailures = 0
private var dirty = false
/// After this many subprocess failures in a row, stop trying this session.
private static let maxFailures = 2
private static var url: URL {
FileManager.default.homeDirectoryForCurrentUser
.appendingPathComponent(".local/state/tv-anarchy/title-refinements.json")
}
init() {
if let data = try? Data(contentsOf: Self.url),
let map = try? JSONDecoder().decode([String: String].self, from: data) {
cache = map
} else {
cache = [:]
}
}
var healthy: Bool {
lock.lock(); defer { lock.unlock() }
return consecutiveFailures < Self.maxFailures
}
func cached(_ filename: String) -> String? {
lock.lock(); defer { lock.unlock() }
return cache[filename]
}
func recordFailure() {
lock.lock(); defer { lock.unlock() }
consecutiveFailures += 1
}
func recordSuccess() {
lock.lock(); defer { lock.unlock() }
consecutiveFailures = 0
}
func remember(_ filename: String, title: String) {
lock.lock()
cache[filename] = title
dirty = true
let snapshot = cache
lock.unlock()
// Persist outside the lock; last-writer-wins is fine for a cache.
guard let data = try? JSONEncoder().encode(snapshot) else { return }
try? FileManager.default.createDirectory(at: Self.url.deletingLastPathComponent(),
withIntermediateDirectories: true)
try? data.write(to: Self.url, options: .atomic)
}
}