101 lines
4 KiB
Swift
101 lines
4 KiB
Swift
import Foundation
|
|
|
|
/// `TitleRefiner` backed by the local MLX model on plum (recommender's
|
|
/// `title_refiner.py`) — closes the seam declared in `FilenameParser`. Shells
|
|
/// into the recommender like `LocalLLMGrouper`; returns `nil` on any failure so
|
|
/// the regex path always stands alone.
|
|
///
|
|
/// The refiner sits on the synchronous parse path, so two guards keep the messy
|
|
/// tail from stalling a scan:
|
|
/// - **Result cache** (`~/.local/state/tv-anarchy/title-refinements.json`):
|
|
/// each distinct messy filename pays for the model exactly once, ever.
|
|
/// - **Session kill-switch**: consecutive subprocess failures (no `uv`, no MLX,
|
|
/// no model) disable the refiner for the rest of the process — one scan never
|
|
/// pays the failure timeout per file.
|
|
public struct LocalLLMTitleRefiner: TitleRefiner {
|
|
public init() {}
|
|
|
|
public func refineTitle(from filename: String) -> String? {
|
|
if let cached = Self.store.cached(filename) { return cached.isEmpty ? nil : cached }
|
|
guard Self.store.healthy else { return nil }
|
|
|
|
let dir = RepoPaths.recommender.path
|
|
let cmd = "cd \(Self.shq(dir)) && uv run python -m media_rec.title_refiner \(Self.shq(filename))"
|
|
let r = ProcessRunner.runShell(cmd, timeout: 90, cwd: dir)
|
|
guard r.ok,
|
|
let data = r.stdout.trimmingCharacters(in: .whitespacesAndNewlines).data(using: .utf8),
|
|
let decoded = try? JSONDecoder().decode(Refined.self, from: data) else {
|
|
if !r.ok { Log.warn("title refiner failed (exit \(r.status)): \(r.stderr.suffix(160))") }
|
|
Self.store.recordFailure()
|
|
return nil
|
|
}
|
|
Self.store.recordSuccess()
|
|
let title = decoded.title.trimmingCharacters(in: .whitespaces)
|
|
// Cache empties too — "the model has no answer" is also worth remembering.
|
|
Self.store.remember(filename, title: title)
|
|
return title.isEmpty ? nil : title
|
|
}
|
|
|
|
private struct Refined: Decodable { let title: String }
|
|
private static func shq(_ s: String) -> String { "'" + s.replacingOccurrences(of: "'", with: "'\\''") + "'" }
|
|
|
|
private static let store = RefinementStore()
|
|
}
|
|
|
|
/// Thread-safe cache + health tracking for the refiner. A class with one lock —
|
|
/// the refiner is consulted from concurrent scan work.
|
|
final class RefinementStore: @unchecked Sendable {
|
|
private let lock = NSLock()
|
|
private var cache: [String: String]
|
|
private var consecutiveFailures = 0
|
|
private var dirty = false
|
|
|
|
/// After this many subprocess failures in a row, stop trying this session.
|
|
private static let maxFailures = 2
|
|
private static var url: URL {
|
|
FileManager.default.homeDirectoryForCurrentUser
|
|
.appendingPathComponent(".local/state/tv-anarchy/title-refinements.json")
|
|
}
|
|
|
|
init() {
|
|
if let data = try? Data(contentsOf: Self.url),
|
|
let map = try? JSONDecoder().decode([String: String].self, from: data) {
|
|
cache = map
|
|
} else {
|
|
cache = [:]
|
|
}
|
|
}
|
|
|
|
var healthy: Bool {
|
|
lock.lock(); defer { lock.unlock() }
|
|
return consecutiveFailures < Self.maxFailures
|
|
}
|
|
|
|
func cached(_ filename: String) -> String? {
|
|
lock.lock(); defer { lock.unlock() }
|
|
return cache[filename]
|
|
}
|
|
|
|
func recordFailure() {
|
|
lock.lock(); defer { lock.unlock() }
|
|
consecutiveFailures += 1
|
|
}
|
|
|
|
func recordSuccess() {
|
|
lock.lock(); defer { lock.unlock() }
|
|
consecutiveFailures = 0
|
|
}
|
|
|
|
func remember(_ filename: String, title: String) {
|
|
lock.lock()
|
|
cache[filename] = title
|
|
dirty = true
|
|
let snapshot = cache
|
|
lock.unlock()
|
|
// Persist outside the lock; last-writer-wins is fine for a cache.
|
|
guard let data = try? JSONEncoder().encode(snapshot) else { return }
|
|
try? FileManager.default.createDirectory(at: Self.url.deletingLastPathComponent(),
|
|
withIntermediateDirectories: true)
|
|
try? data.write(to: Self.url, options: .atomic)
|
|
}
|
|
}
|