perf: parallelize --repair with a thread pool (--workers, default 8)

Each repaired file is an independent yt-dlp metadata round-trip, so repair is
network-bound; run them concurrently via ThreadPoolExecutor. Adds --workers
(default 8) to cap concurrency and a progress line every 100 files. At ~50k
tracks this turns a ~day-long sequential run into hours. Lower --workers if
YouTube rate-limits (429/403).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-13 11:13:52 -07:00
parent 0347a638cf
commit 92742b9ad6
3 changed files with 60 additions and 10 deletions

View File

@@ -336,3 +336,34 @@ def test_repair_file_dry_run_does_not_rename(tmp_path, monkeypatch):
def test_fs_safe_replaces_slash():
assert "/" not in mf._fs_safe("AC/DC Live")
# ---- parallel repair ----
def test_repair_library_parallel_visits_all(tmp_path, monkeypatch):
root = tmp_path
n = 50
for i in range(n):
d = root / f"Artist{i}" / "youtube"
d.mkdir(parents=True)
(d / f"T{i} [{YT_ID}].opus").write_text("x")
import threading
seen = set()
lock = threading.Lock()
def fake(path, source, dry_run):
with lock:
seen.add(path)
return ["album=X"]
monkeypatch.setattr(mf, "repair_file", fake)
scanned, changed = mf.repair_library(str(root), dry_run=False, workers=8)
assert scanned == n and changed == n
assert len(seen) == n
def test_repair_library_default_workers_still_works(tmp_path, monkeypatch):
root = tmp_path
(root / "A" / "youtube").mkdir(parents=True)
(root / "A" / "youtube" / f"T [{YT_ID}].opus").write_text("x")
monkeypatch.setattr(mf, "repair_file", lambda p, s, d: ["x"])
assert mf.repair_library(str(root), dry_run=False) == (1, 1)