perf: parallelize --repair with a thread pool (--workers, default 8)
Each repaired file is an independent yt-dlp metadata round-trip, so repair is network-bound; run them concurrently via ThreadPoolExecutor. Adds --workers (default 8) to cap concurrency and a progress line every 100 files. At ~50k tracks this turns a ~day-long sequential run into hours. Lower --workers if YouTube rate-limits (429/403). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -336,3 +336,34 @@ def test_repair_file_dry_run_does_not_rename(tmp_path, monkeypatch):
|
||||
|
||||
def test_fs_safe_replaces_slash():
|
||||
assert "/" not in mf._fs_safe("AC/DC Live")
|
||||
|
||||
|
||||
# ---- parallel repair ----
|
||||
def test_repair_library_parallel_visits_all(tmp_path, monkeypatch):
|
||||
root = tmp_path
|
||||
n = 50
|
||||
for i in range(n):
|
||||
d = root / f"Artist{i}" / "youtube"
|
||||
d.mkdir(parents=True)
|
||||
(d / f"T{i} [{YT_ID}].opus").write_text("x")
|
||||
|
||||
import threading
|
||||
seen = set()
|
||||
lock = threading.Lock()
|
||||
|
||||
def fake(path, source, dry_run):
|
||||
with lock:
|
||||
seen.add(path)
|
||||
return ["album=X"]
|
||||
monkeypatch.setattr(mf, "repair_file", fake)
|
||||
scanned, changed = mf.repair_library(str(root), dry_run=False, workers=8)
|
||||
assert scanned == n and changed == n
|
||||
assert len(seen) == n
|
||||
|
||||
|
||||
def test_repair_library_default_workers_still_works(tmp_path, monkeypatch):
|
||||
root = tmp_path
|
||||
(root / "A" / "youtube").mkdir(parents=True)
|
||||
(root / "A" / "youtube" / f"T [{YT_ID}].opus").write_text("x")
|
||||
monkeypatch.setattr(mf, "repair_file", lambda p, s, d: ["x"])
|
||||
assert mf.repair_library(str(root), dry_run=False) == (1, 1)
|
||||
|
||||
Reference in New Issue
Block a user