tv-anarchy/Sources/TVAnarchyCore/Library/LibraryScanner.swift
Natalie 83a21ca105 feat(library): optimize scan merging with cached metadata
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-06-09 22:16:25 -07:00

357 lines
19 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import Foundation
/// Direct Swift scan of local media roots a faithful port of plum-control-mcp's
/// `media/library.ts` (SxxEyy parse, show-root bucketing, release-noise name
/// normalization). The primary library source is black's prebuilt index (see
/// `scanFromIndex`); this local walk only covers a configured, media-tree-structured
/// `MEDIA_ROOTS` dir. It never walks the old NFS `~/media` mount this project does
/// not depend on NFS. (Loose downloaded files are matched separately by
/// `DownloadsIndex`, by filename.)
public enum LibraryScanner {
private static let videoExt: Set<String> = ["mkv", "mp4", "m4v", "avi", "mov", "webm"]
// Compiled once. `S(\d{1,2})E(\d{1,3})`, case-insensitive.
private static let sxxeyy = try! NSRegularExpression(pattern: "S(\\d{1,2})E(\\d{1,3})",
options: [.caseInsensitive])
/// Media-tree-structured roots for the offline library walk (`<root>/<category>/
/// <show>/`). Colon-separated `MEDIA_ROOTS`, else none there is no `~/media`
/// default. (Distinct from `DownloadsIndex`, which matches loose downloaded files
/// by name and is NOT tree-structured.)
public static func mediaRoots() -> [String] {
if let env = ProcessInfo.processInfo.environment["MEDIA_ROOTS"], !env.isEmpty {
return env.split(separator: ":").map(String.init).filter { !$0.isEmpty }
}
return []
}
/// True when at least one local root exists and holds content. (No autofs to
/// coax now that `~/media` is gone, so a single readdir suffices.) MUST be
/// called off the main actor a directory read can block on slow disks.
public static func rootsAvailable() -> Bool {
let fm = FileManager.default
return mediaRoots().contains { (try? fm.contentsOfDirectory(atPath: $0))?.isEmpty == false }
}
/// One video found on disk. `season`/`episode` are nil for non-episodic files
/// (movies/clips); `size` is only populated for those (used to pick the main
/// file in a movie folder) episodic files skip the extra stat.
private struct FoundFile { let path: String; let size: Int64; let season: Int?; let episode: Int?; let mtime: Date? }
/// `onProgress` (if given) is called periodically with the running count of
/// directories read a live, honest progress proxy for the UI (each readdir is
/// one NFS round-trip; there's no known total to make a determinate %).
public static func scan(onProgress: ((Int) -> Void)? = nil) -> [CachedShow] {
let fm = FileManager.default
var grouped: [String: [FoundFile]] = [:]
var mediaRootForKey: [String: String] = [:]
for root in mediaRoots() {
var isDir: ObjCBool = false
guard fm.fileExists(atPath: root, isDirectory: &isDir), isDir.boolValue else { continue }
for f in walkForVideos(rootURL: URL(fileURLWithPath: root, isDirectory: true),
rootPath: root, maxDepth: 4, onProgress: onProgress) {
let key = showRoot(for: f.path, mediaRoot: root)
grouped[key, default: []].append(f)
mediaRootForKey[key] = root
}
}
return group(grouped: grouped, mediaRootForKey: mediaRootForKey)
}
/// Build the index TSV produced by black's `build_index.sh` (one
/// `sizemtime-epochpath` line per video, black-side absolute paths) into the
/// same `CachedShow` list a local walk produces running the IDENTICAL grouping
/// rules so there's no second source of truth. Paths are kept black-side (the
/// canonical identity); at launch a downloaded copy plays on plum's VLC and
/// anything else routes to black (mpv) see `PlayerController`. This is the
/// primary, NFS-free library path: black builds the index out-of-band, plum
/// just parses it.
public static func scanFromIndex(_ tsv: String) -> [CachedShow] {
let root = MediaPaths.remoteRoot
var grouped: [String: [FoundFile]] = [:]
var mediaRootForKey: [String: String] = [:]
for raw in tsv.split(separator: "\n") {
let cols = raw.split(separator: "\t", maxSplits: 2, omittingEmptySubsequences: false)
guard cols.count == 3 else { continue }
let size = Int64(cols[0]) ?? 0
let mtime = Double(cols[1]).map { Date(timeIntervalSince1970: $0) }
let path = String(cols[2])
let name = (path as NSString).lastPathComponent
let se = parseSxxEyy(name)
let f = FoundFile(path: path, size: size, season: se?.0, episode: se?.1, mtime: mtime)
let key = showRoot(for: path, mediaRoot: root)
grouped[key, default: []].append(f)
mediaRootForKey[key] = root
}
return group(grouped: grouped, mediaRootForKey: mediaRootForKey)
}
/// Shared post-gather step: turn grouped FoundFiles into shows (series vs movie),
/// dedup episodes, capture year + newest-mtime, merge split-season siblings, sort.
private static func group(grouped: [String: [FoundFile]],
mediaRootForKey: [String: String]) -> [CachedShow] {
var out: [CachedShow] = []
for (key, files) in grouped {
let mediaRoot = mediaRootForKey[key] ?? ""
let comps = componentsAfter(mediaRoot: mediaRoot, path: key)
let cat = comps.first ?? ""
// A series only when the key is a real show FOLDER (category/show, depth
// 2). Loose files share the category-root key; one stray SxxEyy match
// among them must NOT flip the whole pile into a single "series" and drop
// the rest (this silently ate ~940 loose porn files). Those go to
// movieItems, which makes each loose file its own movie.
let isShowFolder = comps.count >= 2
let episodic = files.filter { $0.season != nil }
if isShowFolder, !episodic.isEmpty {
var eps = episodic.map {
CachedEpisode(path: $0.path, season: $0.season!, episode: $0.episode!,
label: episodeLabel($0.path))
}
eps.sort(by: episodeOrder)
// Collapse duplicate episodes across releases (e.g. 1080p + 720p
// both have S01E06) one entry per season×episode. The first after
// sort wins; alternate releases stay reachable via the player's
// quality switcher.
var seen = Set<Int>()
eps = eps.filter { seen.insert($0.season * 1000 + $0.episode).inserted }
let year = episodic.compactMap { parseYear($0.path) }.min()
out.append(CachedShow(name: normalizeShowName((key as NSString).lastPathComponent),
rootDir: key, category: cat, kind: .series, episodes: eps, year: year,
addedAt: files.compactMap(\.mtime).max()))
} else {
out.append(contentsOf: movieItems(key: key, files: files, mediaRoot: mediaRoot, category: cat))
}
}
return mergeSeriesByName(out)
.sorted { $0.name.localizedCaseInsensitiveCompare($1.name) == .orderedAscending }
}
/// Merge SERIES that share a normalized name + category but live in separate
/// top-level folders (e.g. `Bridgerton.S01`, `Bridgerton.S02`, `Bridgerton.S03`
/// as siblings one "Bridgerton" with all seasons). Episodes are unioned and
/// deduped by season×episode; the earliest year and first rootDir win. Movies
/// are left distinct.
static func mergeSeriesByName(_ shows: [CachedShow]) -> [CachedShow] {
var byKey: [String: CachedShow] = [:]
var order: [String] = []
var out: [CachedShow] = []
for show in shows {
guard show.kind == .series else { out.append(show); continue }
let key = show.category + "\u{1}" + show.name.lowercased()
guard var existing = byKey[key] else { byKey[key] = show; order.append(key); continue }
// Merge ONLY when the season sets are disjoint one show split across
// sibling season folders (Bridgerton S01 / S02 / S03). Overlapping
// seasons (both start at S01) mean two DIFFERENT shows that merely share
// a name + category (e.g. an anime and its live-action remake) keep
// them as separate entries rather than interleaving their episodes.
let haveSeasons = Set(existing.episodes.map(\.season))
let newSeasons = Set(show.episodes.map(\.season))
guard haveSeasons.isDisjoint(with: newSeasons) else { out.append(show); continue }
existing.episodes += show.episodes
existing.episodes.sort(by: episodeOrder)
var seen = Set<Int>()
existing.episodes = existing.episodes.filter { seen.insert($0.season * 1000 + $0.episode).inserted }
existing.year = [existing.year, show.year].compactMap { $0 }.min()
byKey[key] = existing
}
out.append(contentsOf: order.compactMap { byKey[$0] })
return out
}
/// Turn a group of non-episodic videos into movie items. A movie *folder*
/// (key deeper than the category dir) yields one item the largest non-sample
/// file. Loose files sitting directly under the category dir each become their
/// own item.
private static func movieItems(key: String, files: [FoundFile],
mediaRoot: String, category: String) -> [CachedShow] {
let real = files.filter { !isSampleOrExtra($0.path) }
let use = real.isEmpty ? files : real
let depth = componentsAfter(mediaRoot: mediaRoot, path: key).count
func movie(path: String, name: String, root: String, addedAt: Date?) -> CachedShow {
CachedShow(name: normalizeShowName(name), rootDir: root, category: category, kind: .movie,
episodes: [CachedEpisode(path: path, season: 0, episode: 0, label: name)],
year: parseYear(path) ?? parseYear(root), addedAt: addedAt)
}
if depth <= 1 {
// Loose files at the category root one movie each (rootDir = file).
return use.map { f in
let base = ((f.path as NSString).lastPathComponent as NSString).deletingPathExtension
return movie(path: f.path, name: base, root: f.path, addedAt: f.mtime)
}
}
// Movie folder the largest file represents it (rootDir = the folder).
guard let main = use.max(by: { $0.size < $1.size }) else { return [] }
return [movie(path: main.path, name: (key as NSString).lastPathComponent, root: key,
addedAt: files.compactMap(\.mtime).max())]
}
private static func episodeOrder(_ lhs: CachedEpisode, _ rhs: CachedEpisode) -> Bool {
if lhs.season != rhs.season { return lhs.season < rhs.season }
return lhs.episode < rhs.episode
}
/// Carry forward poster/overview from a prior snapshot onto a fresh scan, then
/// backfill anything still missing from the `.meta` cache. Keys are normalized
/// to black-side form so enrichment survives the legacy-plum black-side path
/// switch (a raw rootDir match would silently drop every poster). The `.meta`
/// cache is the durable record: re-folding it here means one bad snapshot can
/// never lose artwork permanently.
public static func mergeEnrichment(_ scanned: [CachedShow], from previous: [CachedShow]) -> [CachedShow] {
let prior = Dictionary(previous.map { (MediaPaths.toRemote($0.rootDir), $0) },
uniquingKeysWith: { a, _ in a })
return scanned.map { show in
var s = show
let key = MediaPaths.toRemote(show.rootDir)
if let old = prior[key] {
s.posterPath = old.posterPath
s.overview = old.overview
// re-attach per-episode metaPath by (normalized) episode path
let oldMeta = Dictionary(old.episodes.map { (MediaPaths.toRemote($0.path), $0.metaPath) },
uniquingKeysWith: { a, _ in a })
s.episodes = s.episodes.map { ep in
var e = ep
if let m = oldMeta[MediaPaths.toRemote(ep.path)] ?? nil { e.metaPath = m }
return e
}
}
if s.posterPath == nil || s.overview == nil, let meta = MetaWriter.loadCache(forPath: key) {
if s.posterPath == nil { s.posterPath = meta.posterURL }
if s.overview == nil { s.overview = meta.overview }
}
return s
}
}
// MARK: - walk
private static func walkForVideos(rootURL: URL, rootPath: String, maxDepth: Int,
onProgress: ((Int) -> Void)? = nil) -> [FoundFile] {
let fm = FileManager.default
var out: [FoundFile] = []
var dirsVisited = 0
// Prefetch is-dir + size with the directory read. Over NFS this is the hot
// path: the old code did a separate `fileExists(isDirectory:)` stat PER
// entry plus an `attributesOfItem` stat per movie file two extra round
// trips each. `contentsOfDirectory(at:includingPropertiesForKeys:)` batches
// those attributes into the enumeration (readdirplus), and the values are
// cached on the URL, so `resourceValues` below costs no further syscall.
//
// We build emitted paths as STRINGS appended to `rootPath` rather than
// reading `url.path`, because `contentsOfDirectory(at:)` canonicalizes
// symlinks/APFS firmlinks (e.g. /var /private/var) so `url.path` would
// no longer be prefixed by the media root the rest of the scan compares
// against (`componentsAfter`). Appending to `rootPath` keeps the prefix.
let keys: [URLResourceKey] = [.isDirectoryKey, .fileSizeKey, .contentModificationDateKey]
let keySet = Set(keys)
var stack: [(url: URL, path: String, depth: Int)] = [(rootURL, rootPath, 0)]
while let top = stack.popLast() {
guard let entries = try? fm.contentsOfDirectory(
at: top.url, includingPropertiesForKeys: keys, options: [.skipsHiddenFiles]
) else { continue }
dirsVisited += 1
if dirsVisited % 32 == 0 { onProgress?(dirsVisited) }
for url in entries {
let name = url.lastPathComponent
let full = top.path + "/" + name
let rv = try? url.resourceValues(forKeys: keySet)
if rv?.isDirectory == true {
if top.depth < maxDepth { stack.append((url, full, top.depth + 1)) }
continue
}
guard videoExt.contains((name as NSString).pathExtension.lowercased()) else { continue }
let mtime = rv?.contentModificationDate
if let (s, e) = parseSxxEyy(name) {
out.append(FoundFile(path: full, size: 0, season: s, episode: e, mtime: mtime))
} else {
// Non-episodic video (movie/clip) size (prefetched, no extra
// stat) lets a movie folder pick the main file over samples.
out.append(FoundFile(path: full, size: Int64(rv?.fileSize ?? 0),
season: nil, episode: nil, mtime: mtime))
}
}
}
return out
}
/// Group key for an episode/file: the **top-level show folder** under the
/// category (`/media/tv/Psych`), collapsing every release + season subfolder
/// beneath it into ONE show. (Was: the release/season dir, which made Psych
/// appear once per release.) Loose files directly under the category fall back
/// to the category dir so `movieItems` splits them per-file.
private static func showRoot(for filePath: String, mediaRoot: String) -> String {
let comps = componentsAfter(mediaRoot: mediaRoot, path: filePath)
if comps.count >= 3 { // <category>/<show>//<file>
return mediaRoot + "/" + comps[0] + "/" + comps[1]
}
return (filePath as NSString).deletingLastPathComponent
}
// MARK: - parsing
public static func parseSxxEyy(_ name: String) -> (Int, Int)? {
let range = NSRange(name.startIndex..., in: name)
guard let m = sxxeyy.firstMatch(in: name, range: range),
let sR = Range(m.range(at: 1), in: name),
let eR = Range(m.range(at: 2), in: name),
let s = Int(name[sR]), let e = Int(name[eR]) else { return nil }
return (s, e)
}
private static func episodeLabel(_ path: String) -> String {
let base = (path as NSString).lastPathComponent
return (base as NSString).deletingPathExtension
}
private static let yearRe = try! NSRegularExpression(pattern: "\\b(19|20)\\d{2}\\b")
/// First 19xx/20xx year in the path the release/air year for franchise order.
static func parseYear(_ path: String) -> Int? {
let r = NSRange(path.startIndex..., in: path)
guard let m = yearRe.firstMatch(in: path, range: r), let rr = Range(m.range, in: path) else { return nil }
return Int(path[rr])
}
/// Path components of `path` below `mediaRoot` e.g. `/media/movies/Inception`
/// under `/media` `["movies", "Inception"]`. First element is the category.
static func componentsAfter(mediaRoot: String, path: String) -> [String] {
guard path.hasPrefix(mediaRoot) else { return [] }
var rest = String(path.dropFirst(mediaRoot.count))
while rest.hasPrefix("/") { rest = String(rest.dropFirst()) }
return rest.split(separator: "/").map(String.init)
}
/// Sample reels, trailers and extras shouldn't represent a movie folder.
static func isSampleOrExtra(_ path: String) -> Bool {
let base = (path as NSString).lastPathComponent.lowercased()
return matches(base, "\\bsample\\b")
|| matches(base, "\\b(extras?|featurettes?|trailers?|behind[ ._-]the[ ._-]scenes)\\b")
}
/// Strip bracketed groups, year-and-after, release-noise-and-after, then tidy
/// separators. Mirrors `normalizeShowName` in library.ts.
public static func normalizeShowName(_ dirName: String) -> String {
var s = dirName
s = replace(s, "\\[[^\\]]*\\]", " ")
s = replace(s, "\\([^)]*\\)", " ")
s = replace(s, "\\b(19|20)\\d{2}\\b.*$", "")
s = replace(s, "\\b(season\\s*\\d+|s\\d{1,2}|complete|series|repack|bluray|webrip|web-dl|hdtv|dvdrip|x264|x265|h\\.?26[45]|hevc|1080p|720p|480p|tvrip|extras?|batch|commentary)\\b.*$", "")
s = replace(s, "[._-]+", " ")
s = replace(s, "\\s+", " ").trimmingCharacters(in: .whitespaces)
return s.isEmpty ? dirName : s
}
// MARK: - regex helpers
private static func matches(_ s: String, _ pattern: String) -> Bool {
s.range(of: pattern, options: [.regularExpression, .caseInsensitive]) != nil
}
private static func replace(_ s: String, _ pattern: String, _ with: String) -> String {
guard let re = try? NSRegularExpression(pattern: pattern, options: [.caseInsensitive]) else { return s }
let range = NSRange(s.startIndex..., in: s)
return re.stringByReplacingMatches(in: s, range: range, withTemplate: with)
}
}