perf: parallelize --repair with a thread pool (--workers, default 8)
Each repaired file is an independent yt-dlp metadata round-trip, so repair is network-bound; run them concurrently via ThreadPoolExecutor. Adds --workers (default 8) to cap concurrency and a progress line every 100 files. At ~50k tracks this turns a ~day-long sequential run into hours. Lower --workers if YouTube rate-limits (429/403). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -95,6 +95,7 @@ export LIDARR_API_KEY="your-lidarr-api-key"
|
||||
| `-o`, `--root PATH` | Output root folder (default `/media/music`). |
|
||||
| `--search-all` | Search all albums when adding an artist to Lidarr. |
|
||||
| `--repair` | Re-tag existing downloads under `--root` from source metadata (see below). |
|
||||
| `--workers N` | Parallel metadata fetches during `--repair` (default 8). |
|
||||
| `--retag-from-path` | Offline: re-tag artist/title from folder + filename (see below). |
|
||||
| `-x`, `--exclude NAME` | Folder under `--root` to skip during `--repair`/`--retag-from-path` (repeatable). |
|
||||
| `--debug` | Verbose output. |
|
||||
@@ -137,8 +138,11 @@ but it never overwrites a genuine existing artist/title with a channel name or d
|
||||
title. A bogus `NA [<id>].<ext>` filename is renamed to the recovered title, and a literal
|
||||
`NA` album with no source album is normalised to `Unknown Album`.
|
||||
|
||||
It re-queries the source over the network, so run it occasionally, not constantly. Requires
|
||||
`mutagen` (a yt-dlp dependency, usually already present). CLI-only — not exposed via the REST API.
|
||||
Each file is its own yt-dlp network round-trip, so repair runs them in a thread pool;
|
||||
`--workers N` (default 8) caps concurrency — lower it if YouTube starts rate-limiting
|
||||
(HTTP 429/403), raise it to go faster on a large library. Progress prints every 100 files.
|
||||
Requires `mutagen` (a yt-dlp dependency, usually already present). CLI-only — not exposed via
|
||||
the REST API.
|
||||
|
||||
```bash
|
||||
# Preview what would change (writes nothing)
|
||||
|
||||
31
musicfetch
31
musicfetch
@@ -1020,19 +1020,31 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]:
|
||||
return changed
|
||||
|
||||
|
||||
def repair_library(root: str, dry_run: bool, exclude=()) -> tuple[int, int]:
|
||||
"""Walk <root>/<artist>/<source>/ and re-tag audio files. Returns (scanned, changed)."""
|
||||
def repair_library(root: str, dry_run: bool, exclude=(), workers: int = 8) -> tuple[int, int]:
|
||||
"""Walk <root>/<artist>/<source>/ and re-tag audio files. Returns (scanned, changed).
|
||||
Each file is an independent yt-dlp network round-trip, so they run in a
|
||||
thread pool (network-bound); `workers` caps concurrency. Each thread owns
|
||||
its own file + request, so no shared state needs locking beyond the counts.
|
||||
Lower `workers` if YouTube starts rate-limiting (HTTP 429/403)."""
|
||||
if not os.path.isdir(root):
|
||||
err(f"Root folder not found: {root}")
|
||||
return 0, 0
|
||||
scanned = changed = 0
|
||||
for path, source, _artist in _iter_source_files(root, exclude):
|
||||
scanned += 1
|
||||
|
||||
def _one(path, source):
|
||||
try:
|
||||
if repair_file(path, source, dry_run):
|
||||
changed += 1
|
||||
return bool(repair_file(path, source, dry_run))
|
||||
except Exception as e: # noqa: BLE001 — one bad file shouldn't abort
|
||||
err(f"repair failed ({os.path.basename(path)}): {e}")
|
||||
return False
|
||||
|
||||
scanned = changed = 0
|
||||
files = ((p, s) for p, s, _a in _iter_source_files(root, exclude))
|
||||
with ThreadPoolExecutor(max_workers=max(1, workers)) as ex:
|
||||
for ok in ex.map(lambda ps: _one(*ps), files):
|
||||
scanned += 1
|
||||
changed += int(ok)
|
||||
if scanned % 100 == 0:
|
||||
print(f"… {scanned} scanned, {changed} changed", flush=True)
|
||||
verb = "Would repair" if dry_run else "Repaired"
|
||||
print(f"{verb} {changed}/{scanned} files")
|
||||
return scanned, changed
|
||||
@@ -1171,6 +1183,9 @@ def parse_args():
|
||||
help="Search all albums when adding an artist to Lidarr.")
|
||||
p.add_argument("--repair", action="store_true",
|
||||
help="Re-tag existing downloads under --root from source metadata.")
|
||||
p.add_argument("--workers", type=int, default=8,
|
||||
help="Parallel yt-dlp metadata fetches during --repair (default 8; "
|
||||
"lower if YouTube rate-limits).")
|
||||
p.add_argument("--retag-from-path", action="store_true",
|
||||
help="Offline: re-tag artist/title from folder + filename "
|
||||
"(fixes tags damaged by a prior --repair).")
|
||||
@@ -1192,7 +1207,7 @@ def main():
|
||||
return
|
||||
|
||||
if args.repair:
|
||||
repair_library(args.root, args.dry_run, args.exclude)
|
||||
repair_library(args.root, args.dry_run, args.exclude, args.workers)
|
||||
return
|
||||
|
||||
if not query:
|
||||
|
||||
@@ -336,3 +336,34 @@ def test_repair_file_dry_run_does_not_rename(tmp_path, monkeypatch):
|
||||
|
||||
def test_fs_safe_replaces_slash():
|
||||
assert "/" not in mf._fs_safe("AC/DC Live")
|
||||
|
||||
|
||||
# ---- parallel repair ----
|
||||
def test_repair_library_parallel_visits_all(tmp_path, monkeypatch):
|
||||
root = tmp_path
|
||||
n = 50
|
||||
for i in range(n):
|
||||
d = root / f"Artist{i}" / "youtube"
|
||||
d.mkdir(parents=True)
|
||||
(d / f"T{i} [{YT_ID}].opus").write_text("x")
|
||||
|
||||
import threading
|
||||
seen = set()
|
||||
lock = threading.Lock()
|
||||
|
||||
def fake(path, source, dry_run):
|
||||
with lock:
|
||||
seen.add(path)
|
||||
return ["album=X"]
|
||||
monkeypatch.setattr(mf, "repair_file", fake)
|
||||
scanned, changed = mf.repair_library(str(root), dry_run=False, workers=8)
|
||||
assert scanned == n and changed == n
|
||||
assert len(seen) == n
|
||||
|
||||
|
||||
def test_repair_library_default_workers_still_works(tmp_path, monkeypatch):
|
||||
root = tmp_path
|
||||
(root / "A" / "youtube").mkdir(parents=True)
|
||||
(root / "A" / "youtube" / f"T [{YT_ID}].opus").write_text("x")
|
||||
monkeypatch.setattr(mf, "repair_file", lambda p, s, d: ["x"])
|
||||
assert mf.repair_library(str(root), dry_run=False) == (1, 1)
|
||||
|
||||
Reference in New Issue
Block a user