Merge feat/repair-fast-meta: faster --repair via player_skip=js
This commit is contained in:
15
musicfetch
15
musicfetch
@@ -767,18 +767,21 @@ def download_single(url: str, root: str, quality: str, dry_run: bool) -> dict:
|
|||||||
return {"title": title, "artist": artist, "ok": ok}
|
return {"title": title, "artist": artist, "ok": ok}
|
||||||
|
|
||||||
|
|
||||||
def run_yt_dlp_get_metadata(url: str) -> Optional[dict]:
|
def run_yt_dlp_get_metadata(url: str, extra_args=None) -> Optional[dict]:
|
||||||
|
cmd = ["yt-dlp", "-j", "--no-playlist", *(extra_args or []), url]
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||||
["yt-dlp", "-j", "--no-playlist", url],
|
|
||||||
capture_output=True, text=True, check=True,
|
|
||||||
)
|
|
||||||
return json.loads(result.stdout)
|
return json.loads(result.stdout)
|
||||||
except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
|
except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
|
||||||
err(f"yt-dlp metadata extraction failed: {e}")
|
err(f"yt-dlp metadata extraction failed: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# Repair only reads tags — skip YouTube's slow/throttled JS signature step
|
||||||
|
# (we never download here), which keeps metadata but is far faster per file.
|
||||||
|
_REPAIR_META_ARGS = ["--extractor-args", "youtube:player_skip=js"]
|
||||||
|
|
||||||
|
|
||||||
def get_artist_from_metadata(meta: dict) -> str:
|
def get_artist_from_metadata(meta: dict) -> str:
|
||||||
for key in ("artist", "creator", "uploader", "channel"):
|
for key in ("artist", "creator", "uploader", "channel"):
|
||||||
if meta.get(key):
|
if meta.get(key):
|
||||||
@@ -893,7 +896,7 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]:
|
|||||||
if not url:
|
if not url:
|
||||||
dbg(f"skip (source '{source}' not re-queryable): {path}")
|
dbg(f"skip (source '{source}' not re-queryable): {path}")
|
||||||
return []
|
return []
|
||||||
meta = run_yt_dlp_get_metadata(url)
|
meta = run_yt_dlp_get_metadata(url, _REPAIR_META_ARGS)
|
||||||
if not meta:
|
if not meta:
|
||||||
dbg(f"skip (no metadata): {path}")
|
dbg(f"skip (no metadata): {path}")
|
||||||
return []
|
return []
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ class _FakeAudio(dict):
|
|||||||
|
|
||||||
def test_repair_file_fixes_album_and_year(monkeypatch):
|
def test_repair_file_fixes_album_and_year(monkeypatch):
|
||||||
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
lambda url: {"artist": "Daft Punk", "title": "Aerodynamic",
|
lambda url, *a: {"artist": "Daft Punk", "title": "Aerodynamic",
|
||||||
"album": "Discovery", "release_year": 2001})
|
"album": "Discovery", "release_year": 2001})
|
||||||
audio = _FakeAudio({"artist": ["Daft Punk"], "title": ["Aerodynamic"]}) # album/date missing
|
audio = _FakeAudio({"artist": ["Daft Punk"], "title": ["Aerodynamic"]}) # album/date missing
|
||||||
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
||||||
@@ -76,7 +76,7 @@ def test_repair_file_fixes_album_and_year(monkeypatch):
|
|||||||
|
|
||||||
def test_repair_file_dry_run_writes_nothing(monkeypatch):
|
def test_repair_file_dry_run_writes_nothing(monkeypatch):
|
||||||
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
lambda url: {"artist": "A", "title": "T", "album": "Alb", "release_year": 2020})
|
lambda url, *a: {"artist": "A", "title": "T", "album": "Alb", "release_year": 2020})
|
||||||
audio = _FakeAudio({})
|
audio = _FakeAudio({})
|
||||||
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
||||||
changed = mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True)
|
changed = mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True)
|
||||||
@@ -88,7 +88,7 @@ def test_repair_file_dry_run_writes_nothing(monkeypatch):
|
|||||||
def test_repair_file_skips_music_video(monkeypatch):
|
def test_repair_file_skips_music_video(monkeypatch):
|
||||||
# No album AND no valid release year -> treat as a video, leave tags alone.
|
# No album AND no valid release year -> treat as a video, leave tags alone.
|
||||||
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
lambda url: {"title": "Artist - Song (Official Music Video)",
|
lambda url, *a: {"title": "Artist - Song (Official Music Video)",
|
||||||
"uploader": "SomeVEVO", "upload_date": "20110101"})
|
"uploader": "SomeVEVO", "upload_date": "20110101"})
|
||||||
audio = _FakeAudio({"artist": ["Real Artist"], "title": ["Song"]})
|
audio = _FakeAudio({"artist": ["Real Artist"], "title": ["Song"]})
|
||||||
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
||||||
@@ -100,7 +100,7 @@ def test_repair_file_skips_music_video(monkeypatch):
|
|||||||
def test_repair_file_fills_missing_but_never_clobbers(monkeypatch):
|
def test_repair_file_fills_missing_but_never_clobbers(monkeypatch):
|
||||||
# Source artist is a channel name; existing artist must be kept.
|
# Source artist is a channel name; existing artist must be kept.
|
||||||
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
lambda url: {"artist": "SomeChannelVEVO", "title": "Channel Decorated Title",
|
lambda url, *a: {"artist": "SomeChannelVEVO", "title": "Channel Decorated Title",
|
||||||
"album": "Real Album", "release_year": 2019})
|
"album": "Real Album", "release_year": 2019})
|
||||||
audio = _FakeAudio({"artist": ["Correct Artist"], "title": ["Clean Title"]})
|
audio = _FakeAudio({"artist": ["Correct Artist"], "title": ["Clean Title"]})
|
||||||
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
||||||
@@ -112,7 +112,7 @@ def test_repair_file_fills_missing_but_never_clobbers(monkeypatch):
|
|||||||
|
|
||||||
def test_repair_file_fills_missing_artist_when_absent(monkeypatch):
|
def test_repair_file_fills_missing_artist_when_absent(monkeypatch):
|
||||||
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
lambda url: {"artist": "Real Artist", "title": "T",
|
lambda url, *a: {"artist": "Real Artist", "title": "T",
|
||||||
"album": "Alb", "release_year": 2020})
|
"album": "Alb", "release_year": 2020})
|
||||||
audio = _FakeAudio({}) # nothing present -> fill artist + title too
|
audio = _FakeAudio({}) # nothing present -> fill artist + title too
|
||||||
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
||||||
@@ -123,7 +123,7 @@ def test_repair_file_fills_missing_artist_when_absent(monkeypatch):
|
|||||||
def test_repair_file_skips_bad_id(monkeypatch):
|
def test_repair_file_skips_bad_id(monkeypatch):
|
||||||
called = {"meta": False}
|
called = {"meta": False}
|
||||||
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
lambda url: called.update(meta=True) or {})
|
lambda url, *a: called.update(meta=True) or {})
|
||||||
# last bracket is a descriptor, not a real id
|
# last bracket is a descriptor, not a real id
|
||||||
assert mf.repair_file("X/youtube/Song [Official Video].opus", "youtube", dry_run=False) == []
|
assert mf.repair_file("X/youtube/Song [Official Video].opus", "youtube", dry_run=False) == []
|
||||||
assert called["meta"] is False # never hit the network
|
assert called["meta"] is False # never hit the network
|
||||||
@@ -132,11 +132,33 @@ def test_repair_file_skips_bad_id(monkeypatch):
|
|||||||
def test_repair_file_skips_unparseable(monkeypatch):
|
def test_repair_file_skips_unparseable(monkeypatch):
|
||||||
called = {"meta": False}
|
called = {"meta": False}
|
||||||
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
lambda url: called.update(meta=True) or {})
|
lambda url, *a: called.update(meta=True) or {})
|
||||||
assert mf.repair_file("X/youtube/no-id.opus", "youtube", dry_run=False) == []
|
assert mf.repair_file("X/youtube/no-id.opus", "youtube", dry_run=False) == []
|
||||||
assert called["meta"] is False
|
assert called["meta"] is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_yt_dlp_get_metadata_passes_extra_args(monkeypatch):
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
class _R:
|
||||||
|
stdout = '{"title": "x"}'
|
||||||
|
monkeypatch.setattr(mf.subprocess, "run", lambda cmd, **k: captured.update(cmd=cmd) or _R())
|
||||||
|
mf.run_yt_dlp_get_metadata("http://u", ["--extractor-args", "youtube:player_skip=js"])
|
||||||
|
assert "youtube:player_skip=js" in captured["cmd"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_repair_uses_player_skip_fast_args(monkeypatch):
|
||||||
|
captured = {}
|
||||||
|
|
||||||
|
def fake_meta(url, extra_args=None):
|
||||||
|
captured["extra"] = extra_args
|
||||||
|
return {"album": "A", "release_year": 2020, "artist": "X", "title": "T"}
|
||||||
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", fake_meta)
|
||||||
|
monkeypatch.setattr(mf, "_open_audio", lambda p: (_FakeAudio({}), None))
|
||||||
|
mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True)
|
||||||
|
assert captured["extra"] == mf._REPAIR_META_ARGS
|
||||||
|
|
||||||
|
|
||||||
# ---- repair_library (real temp tree, repair_file mocked) ----
|
# ---- repair_library (real temp tree, repair_file mocked) ----
|
||||||
def test_repair_library_scans_only_source_dirs(tmp_path, monkeypatch):
|
def test_repair_library_scans_only_source_dirs(tmp_path, monkeypatch):
|
||||||
root = tmp_path
|
root = tmp_path
|
||||||
|
|||||||
Reference in New Issue
Block a user