Merge feat/repair-fast-meta: faster --repair via player_skip=js

This commit is contained in:
2026-06-10 22:52:39 -07:00
2 changed files with 38 additions and 13 deletions

View File

@@ -767,18 +767,21 @@ def download_single(url: str, root: str, quality: str, dry_run: bool) -> dict:
return {"title": title, "artist": artist, "ok": ok} return {"title": title, "artist": artist, "ok": ok}
def run_yt_dlp_get_metadata(url: str) -> Optional[dict]: def run_yt_dlp_get_metadata(url: str, extra_args=None) -> Optional[dict]:
cmd = ["yt-dlp", "-j", "--no-playlist", *(extra_args or []), url]
try: try:
result = subprocess.run( result = subprocess.run(cmd, capture_output=True, text=True, check=True)
["yt-dlp", "-j", "--no-playlist", url],
capture_output=True, text=True, check=True,
)
return json.loads(result.stdout) return json.loads(result.stdout)
except (subprocess.CalledProcessError, json.JSONDecodeError) as e: except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
err(f"yt-dlp metadata extraction failed: {e}") err(f"yt-dlp metadata extraction failed: {e}")
return None return None
# Repair only reads tags — skip YouTube's slow/throttled JS signature step
# (we never download here), which keeps metadata but is far faster per file.
_REPAIR_META_ARGS = ["--extractor-args", "youtube:player_skip=js"]
def get_artist_from_metadata(meta: dict) -> str: def get_artist_from_metadata(meta: dict) -> str:
for key in ("artist", "creator", "uploader", "channel"): for key in ("artist", "creator", "uploader", "channel"):
if meta.get(key): if meta.get(key):
@@ -893,7 +896,7 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]:
if not url: if not url:
dbg(f"skip (source '{source}' not re-queryable): {path}") dbg(f"skip (source '{source}' not re-queryable): {path}")
return [] return []
meta = run_yt_dlp_get_metadata(url) meta = run_yt_dlp_get_metadata(url, _REPAIR_META_ARGS)
if not meta: if not meta:
dbg(f"skip (no metadata): {path}") dbg(f"skip (no metadata): {path}")
return [] return []

View File

@@ -63,7 +63,7 @@ class _FakeAudio(dict):
def test_repair_file_fixes_album_and_year(monkeypatch): def test_repair_file_fixes_album_and_year(monkeypatch):
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: {"artist": "Daft Punk", "title": "Aerodynamic", lambda url, *a: {"artist": "Daft Punk", "title": "Aerodynamic",
"album": "Discovery", "release_year": 2001}) "album": "Discovery", "release_year": 2001})
audio = _FakeAudio({"artist": ["Daft Punk"], "title": ["Aerodynamic"]}) # album/date missing audio = _FakeAudio({"artist": ["Daft Punk"], "title": ["Aerodynamic"]}) # album/date missing
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
@@ -76,7 +76,7 @@ def test_repair_file_fixes_album_and_year(monkeypatch):
def test_repair_file_dry_run_writes_nothing(monkeypatch): def test_repair_file_dry_run_writes_nothing(monkeypatch):
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: {"artist": "A", "title": "T", "album": "Alb", "release_year": 2020}) lambda url, *a: {"artist": "A", "title": "T", "album": "Alb", "release_year": 2020})
audio = _FakeAudio({}) audio = _FakeAudio({})
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
changed = mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True) changed = mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True)
@@ -88,7 +88,7 @@ def test_repair_file_dry_run_writes_nothing(monkeypatch):
def test_repair_file_skips_music_video(monkeypatch): def test_repair_file_skips_music_video(monkeypatch):
# No album AND no valid release year -> treat as a video, leave tags alone. # No album AND no valid release year -> treat as a video, leave tags alone.
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: {"title": "Artist - Song (Official Music Video)", lambda url, *a: {"title": "Artist - Song (Official Music Video)",
"uploader": "SomeVEVO", "upload_date": "20110101"}) "uploader": "SomeVEVO", "upload_date": "20110101"})
audio = _FakeAudio({"artist": ["Real Artist"], "title": ["Song"]}) audio = _FakeAudio({"artist": ["Real Artist"], "title": ["Song"]})
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
@@ -100,7 +100,7 @@ def test_repair_file_skips_music_video(monkeypatch):
def test_repair_file_fills_missing_but_never_clobbers(monkeypatch): def test_repair_file_fills_missing_but_never_clobbers(monkeypatch):
# Source artist is a channel name; existing artist must be kept. # Source artist is a channel name; existing artist must be kept.
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: {"artist": "SomeChannelVEVO", "title": "Channel Decorated Title", lambda url, *a: {"artist": "SomeChannelVEVO", "title": "Channel Decorated Title",
"album": "Real Album", "release_year": 2019}) "album": "Real Album", "release_year": 2019})
audio = _FakeAudio({"artist": ["Correct Artist"], "title": ["Clean Title"]}) audio = _FakeAudio({"artist": ["Correct Artist"], "title": ["Clean Title"]})
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
@@ -112,7 +112,7 @@ def test_repair_file_fills_missing_but_never_clobbers(monkeypatch):
def test_repair_file_fills_missing_artist_when_absent(monkeypatch): def test_repair_file_fills_missing_artist_when_absent(monkeypatch):
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: {"artist": "Real Artist", "title": "T", lambda url, *a: {"artist": "Real Artist", "title": "T",
"album": "Alb", "release_year": 2020}) "album": "Alb", "release_year": 2020})
audio = _FakeAudio({}) # nothing present -> fill artist + title too audio = _FakeAudio({}) # nothing present -> fill artist + title too
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
@@ -123,7 +123,7 @@ def test_repair_file_fills_missing_artist_when_absent(monkeypatch):
def test_repair_file_skips_bad_id(monkeypatch): def test_repair_file_skips_bad_id(monkeypatch):
called = {"meta": False} called = {"meta": False}
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: called.update(meta=True) or {}) lambda url, *a: called.update(meta=True) or {})
# last bracket is a descriptor, not a real id # last bracket is a descriptor, not a real id
assert mf.repair_file("X/youtube/Song [Official Video].opus", "youtube", dry_run=False) == [] assert mf.repair_file("X/youtube/Song [Official Video].opus", "youtube", dry_run=False) == []
assert called["meta"] is False # never hit the network assert called["meta"] is False # never hit the network
@@ -132,11 +132,33 @@ def test_repair_file_skips_bad_id(monkeypatch):
def test_repair_file_skips_unparseable(monkeypatch): def test_repair_file_skips_unparseable(monkeypatch):
called = {"meta": False} called = {"meta": False}
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url: called.update(meta=True) or {}) lambda url, *a: called.update(meta=True) or {})
assert mf.repair_file("X/youtube/no-id.opus", "youtube", dry_run=False) == [] assert mf.repair_file("X/youtube/no-id.opus", "youtube", dry_run=False) == []
assert called["meta"] is False assert called["meta"] is False
def test_run_yt_dlp_get_metadata_passes_extra_args(monkeypatch):
captured = {}
class _R:
stdout = '{"title": "x"}'
monkeypatch.setattr(mf.subprocess, "run", lambda cmd, **k: captured.update(cmd=cmd) or _R())
mf.run_yt_dlp_get_metadata("http://u", ["--extractor-args", "youtube:player_skip=js"])
assert "youtube:player_skip=js" in captured["cmd"]
def test_repair_uses_player_skip_fast_args(monkeypatch):
captured = {}
def fake_meta(url, extra_args=None):
captured["extra"] = extra_args
return {"album": "A", "release_year": 2020, "artist": "X", "title": "T"}
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", fake_meta)
monkeypatch.setattr(mf, "_open_audio", lambda p: (_FakeAudio({}), None))
mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True)
assert captured["extra"] == mf._REPAIR_META_ARGS
# ---- repair_library (real temp tree, repair_file mocked) ---- # ---- repair_library (real temp tree, repair_file mocked) ----
def test_repair_library_scans_only_source_dirs(tmp_path, monkeypatch): def test_repair_library_scans_only_source_dirs(tmp_path, monkeypatch):
root = tmp_path root = tmp_path