From a6aa4690848441b5d360ecebde7b6cd8036ad63c Mon Sep 17 00:00:00 2001 From: zebra Date: Wed, 10 Jun 2026 22:52:39 -0700 Subject: [PATCH] perf(repair): skip YouTube JS signature step when fetching tags --repair only reads metadata (never downloads), so pass --extractor-args youtube:player_skip=js to yt-dlp. Keeps album/artist/year/title but avoids the slow, throttle-prone nsig JS step (which crawls without a JS runtime and trips YouTube rate-limiting during bulk runs). run_yt_dlp_get_metadata gains an optional extra_args param; the download path is unchanged. Co-Authored-By: Claude Opus 4.8 --- musicfetch | 15 +++++++++------ tests/test_repair.py | 36 +++++++++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/musicfetch b/musicfetch index 0df5be6..3164f74 100755 --- a/musicfetch +++ b/musicfetch @@ -767,18 +767,21 @@ def download_single(url: str, root: str, quality: str, dry_run: bool) -> dict: return {"title": title, "artist": artist, "ok": ok} -def run_yt_dlp_get_metadata(url: str) -> Optional[dict]: +def run_yt_dlp_get_metadata(url: str, extra_args=None) -> Optional[dict]: + cmd = ["yt-dlp", "-j", "--no-playlist", *(extra_args or []), url] try: - result = subprocess.run( - ["yt-dlp", "-j", "--no-playlist", url], - capture_output=True, text=True, check=True, - ) + result = subprocess.run(cmd, capture_output=True, text=True, check=True) return json.loads(result.stdout) except (subprocess.CalledProcessError, json.JSONDecodeError) as e: err(f"yt-dlp metadata extraction failed: {e}") return None +# Repair only reads tags — skip YouTube's slow/throttled JS signature step +# (we never download here), which keeps metadata but is far faster per file. +_REPAIR_META_ARGS = ["--extractor-args", "youtube:player_skip=js"] + + def get_artist_from_metadata(meta: dict) -> str: for key in ("artist", "creator", "uploader", "channel"): if meta.get(key): @@ -893,7 +896,7 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]: if not url: dbg(f"skip (source '{source}' not re-queryable): {path}") return [] - meta = run_yt_dlp_get_metadata(url) + meta = run_yt_dlp_get_metadata(url, _REPAIR_META_ARGS) if not meta: dbg(f"skip (no metadata): {path}") return [] diff --git a/tests/test_repair.py b/tests/test_repair.py index 4f82fd4..a690a43 100644 --- a/tests/test_repair.py +++ b/tests/test_repair.py @@ -63,7 +63,7 @@ class _FakeAudio(dict): def test_repair_file_fixes_album_and_year(monkeypatch): monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", - lambda url: {"artist": "Daft Punk", "title": "Aerodynamic", + lambda url, *a: {"artist": "Daft Punk", "title": "Aerodynamic", "album": "Discovery", "release_year": 2001}) audio = _FakeAudio({"artist": ["Daft Punk"], "title": ["Aerodynamic"]}) # album/date missing monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) @@ -76,7 +76,7 @@ def test_repair_file_fixes_album_and_year(monkeypatch): def test_repair_file_dry_run_writes_nothing(monkeypatch): monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", - lambda url: {"artist": "A", "title": "T", "album": "Alb", "release_year": 2020}) + lambda url, *a: {"artist": "A", "title": "T", "album": "Alb", "release_year": 2020}) audio = _FakeAudio({}) monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) changed = mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True) @@ -88,7 +88,7 @@ def test_repair_file_dry_run_writes_nothing(monkeypatch): def test_repair_file_skips_music_video(monkeypatch): # No album AND no valid release year -> treat as a video, leave tags alone. monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", - lambda url: {"title": "Artist - Song (Official Music Video)", + lambda url, *a: {"title": "Artist - Song (Official Music Video)", "uploader": "SomeVEVO", "upload_date": "20110101"}) audio = _FakeAudio({"artist": ["Real Artist"], "title": ["Song"]}) monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) @@ -100,7 +100,7 @@ def test_repair_file_skips_music_video(monkeypatch): def test_repair_file_fills_missing_but_never_clobbers(monkeypatch): # Source artist is a channel name; existing artist must be kept. monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", - lambda url: {"artist": "SomeChannelVEVO", "title": "Channel Decorated Title", + lambda url, *a: {"artist": "SomeChannelVEVO", "title": "Channel Decorated Title", "album": "Real Album", "release_year": 2019}) audio = _FakeAudio({"artist": ["Correct Artist"], "title": ["Clean Title"]}) monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) @@ -112,7 +112,7 @@ def test_repair_file_fills_missing_but_never_clobbers(monkeypatch): def test_repair_file_fills_missing_artist_when_absent(monkeypatch): monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", - lambda url: {"artist": "Real Artist", "title": "T", + lambda url, *a: {"artist": "Real Artist", "title": "T", "album": "Alb", "release_year": 2020}) audio = _FakeAudio({}) # nothing present -> fill artist + title too monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) @@ -123,7 +123,7 @@ def test_repair_file_fills_missing_artist_when_absent(monkeypatch): def test_repair_file_skips_bad_id(monkeypatch): called = {"meta": False} monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", - lambda url: called.update(meta=True) or {}) + lambda url, *a: called.update(meta=True) or {}) # last bracket is a descriptor, not a real id assert mf.repair_file("X/youtube/Song [Official Video].opus", "youtube", dry_run=False) == [] assert called["meta"] is False # never hit the network @@ -132,11 +132,33 @@ def test_repair_file_skips_bad_id(monkeypatch): def test_repair_file_skips_unparseable(monkeypatch): called = {"meta": False} monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", - lambda url: called.update(meta=True) or {}) + lambda url, *a: called.update(meta=True) or {}) assert mf.repair_file("X/youtube/no-id.opus", "youtube", dry_run=False) == [] assert called["meta"] is False +def test_run_yt_dlp_get_metadata_passes_extra_args(monkeypatch): + captured = {} + + class _R: + stdout = '{"title": "x"}' + monkeypatch.setattr(mf.subprocess, "run", lambda cmd, **k: captured.update(cmd=cmd) or _R()) + mf.run_yt_dlp_get_metadata("http://u", ["--extractor-args", "youtube:player_skip=js"]) + assert "youtube:player_skip=js" in captured["cmd"] + + +def test_repair_uses_player_skip_fast_args(monkeypatch): + captured = {} + + def fake_meta(url, extra_args=None): + captured["extra"] = extra_args + return {"album": "A", "release_year": 2020, "artist": "X", "title": "T"} + monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", fake_meta) + monkeypatch.setattr(mf, "_open_audio", lambda p: (_FakeAudio({}), None)) + mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True) + assert captured["extra"] == mf._REPAIR_META_ARGS + + # ---- repair_library (real temp tree, repair_file mocked) ---- def test_repair_library_scans_only_source_dirs(tmp_path, monkeypatch): root = tmp_path