perf(repair): skip YouTube JS signature step when fetching tags

--repair only reads metadata (never downloads), so pass
--extractor-args youtube:player_skip=js to yt-dlp. Keeps album/artist/year/title
but avoids the slow, throttle-prone nsig JS step (which crawls without a JS
runtime and trips YouTube rate-limiting during bulk runs). run_yt_dlp_get_metadata
gains an optional extra_args param; the download path is unchanged.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-10 22:52:39 -07:00
parent f071158c10
commit a6aa469084
2 changed files with 38 additions and 13 deletions

View File

@@ -63,7 +63,7 @@ class _FakeAudio(dict):
def test_repair_file_fixes_album_and_year(monkeypatch):
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: {"artist": "Daft Punk", "title": "Aerodynamic",
lambda url, *a: {"artist": "Daft Punk", "title": "Aerodynamic",
"album": "Discovery", "release_year": 2001})
audio = _FakeAudio({"artist": ["Daft Punk"], "title": ["Aerodynamic"]}) # album/date missing
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
@@ -76,7 +76,7 @@ def test_repair_file_fixes_album_and_year(monkeypatch):
def test_repair_file_dry_run_writes_nothing(monkeypatch):
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: {"artist": "A", "title": "T", "album": "Alb", "release_year": 2020})
lambda url, *a: {"artist": "A", "title": "T", "album": "Alb", "release_year": 2020})
audio = _FakeAudio({})
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
changed = mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True)
@@ -88,7 +88,7 @@ def test_repair_file_dry_run_writes_nothing(monkeypatch):
def test_repair_file_skips_music_video(monkeypatch):
# No album AND no valid release year -> treat as a video, leave tags alone.
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: {"title": "Artist - Song (Official Music Video)",
lambda url, *a: {"title": "Artist - Song (Official Music Video)",
"uploader": "SomeVEVO", "upload_date": "20110101"})
audio = _FakeAudio({"artist": ["Real Artist"], "title": ["Song"]})
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
@@ -100,7 +100,7 @@ def test_repair_file_skips_music_video(monkeypatch):
def test_repair_file_fills_missing_but_never_clobbers(monkeypatch):
# Source artist is a channel name; existing artist must be kept.
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: {"artist": "SomeChannelVEVO", "title": "Channel Decorated Title",
lambda url, *a: {"artist": "SomeChannelVEVO", "title": "Channel Decorated Title",
"album": "Real Album", "release_year": 2019})
audio = _FakeAudio({"artist": ["Correct Artist"], "title": ["Clean Title"]})
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
@@ -112,7 +112,7 @@ def test_repair_file_fills_missing_but_never_clobbers(monkeypatch):
def test_repair_file_fills_missing_artist_when_absent(monkeypatch):
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: {"artist": "Real Artist", "title": "T",
lambda url, *a: {"artist": "Real Artist", "title": "T",
"album": "Alb", "release_year": 2020})
audio = _FakeAudio({}) # nothing present -> fill artist + title too
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
@@ -123,7 +123,7 @@ def test_repair_file_fills_missing_artist_when_absent(monkeypatch):
def test_repair_file_skips_bad_id(monkeypatch):
called = {"meta": False}
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: called.update(meta=True) or {})
lambda url, *a: called.update(meta=True) or {})
# last bracket is a descriptor, not a real id
assert mf.repair_file("X/youtube/Song [Official Video].opus", "youtube", dry_run=False) == []
assert called["meta"] is False # never hit the network
@@ -132,11 +132,33 @@ def test_repair_file_skips_bad_id(monkeypatch):
def test_repair_file_skips_unparseable(monkeypatch):
called = {"meta": False}
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: called.update(meta=True) or {})
lambda url, *a: called.update(meta=True) or {})
assert mf.repair_file("X/youtube/no-id.opus", "youtube", dry_run=False) == []
assert called["meta"] is False
def test_run_yt_dlp_get_metadata_passes_extra_args(monkeypatch):
captured = {}
class _R:
stdout = '{"title": "x"}'
monkeypatch.setattr(mf.subprocess, "run", lambda cmd, **k: captured.update(cmd=cmd) or _R())
mf.run_yt_dlp_get_metadata("http://u", ["--extractor-args", "youtube:player_skip=js"])
assert "youtube:player_skip=js" in captured["cmd"]
def test_repair_uses_player_skip_fast_args(monkeypatch):
captured = {}
def fake_meta(url, extra_args=None):
captured["extra"] = extra_args
return {"album": "A", "release_year": 2020, "artist": "X", "title": "T"}
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", fake_meta)
monkeypatch.setattr(mf, "_open_audio", lambda p: (_FakeAudio({}), None))
mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True)
assert captured["extra"] == mf._REPAIR_META_ARGS
# ---- repair_library (real temp tree, repair_file mocked) ----
def test_repair_library_scans_only_source_dirs(tmp_path, monkeypatch):
root = tmp_path