Each repaired file is an independent yt-dlp metadata round-trip, so repair is network-bound; run them concurrently via ThreadPoolExecutor. Adds --workers (default 8) to cap concurrency and a progress line every 100 files. At ~50k tracks this turns a ~day-long sequential run into hours. Lower --workers if YouTube rate-limits (429/403). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
370 lines
17 KiB
Python
370 lines
17 KiB
Python
import server.mf # noqa: F401 — loads musicfetch, registers musicfetch_core
|
|
import musicfetch_core as mf
|
|
|
|
YT_ID = "dQw4w9WgXcQ" # valid 11-char YouTube id
|
|
|
|
|
|
# ---- _is_source_dir ----
|
|
def test_is_source_dir():
|
|
assert mf._is_source_dir("youtube") is True
|
|
assert mf._is_source_dir("soundcloud") is True
|
|
assert mf._is_source_dir("downloads") is True
|
|
assert mf._is_source_dir("Discovery") is False # Lidarr album folder
|
|
assert mf._is_source_dir("Random Access Memories") is False
|
|
assert mf._is_source_dir("") is False
|
|
|
|
|
|
# ---- _parse_track_file ----
|
|
def test_parse_track_file():
|
|
assert mf._parse_track_file("Under My Skin [nGSNF2l44Zc].opus") == ("Under My Skin", "nGSNF2l44Zc")
|
|
assert mf._parse_track_file("Ignomon [2202690443].m4a") == ("Ignomon", "2202690443")
|
|
# greedy title: real id is the LAST bracket
|
|
assert mf._parse_track_file("WHO GON' SLIDE [Official Music Video] [AxjP9s6J3uY].opus") \
|
|
== ("WHO GON' SLIDE [Official Music Video]", "AxjP9s6J3uY")
|
|
assert mf._parse_track_file("no-id-here.opus") is None
|
|
assert mf._parse_track_file("cover.jpg") is None
|
|
|
|
|
|
# ---- _repair_id_ok ----
|
|
def test_repair_id_ok():
|
|
assert mf._repair_id_ok("youtube", YT_ID) is True
|
|
assert mf._repair_id_ok("youtube", "Official Video") is False # space, wrong length
|
|
assert mf._repair_id_ok("youtube", "Cover") is False
|
|
assert mf._repair_id_ok("soundcloud", "2202690443") is True
|
|
assert mf._repair_id_ok("soundcloud", "abc") is False
|
|
assert mf._repair_id_ok("bandcamp", "x") is False
|
|
|
|
|
|
# ---- _valid_year ----
|
|
def test_valid_year():
|
|
assert mf._valid_year({"release_year": 2001}) == "2001"
|
|
assert mf._valid_year({"release_date": "1976-09-10"}) == "1976"
|
|
assert mf._valid_year({"upload_date": "20110101"}) == "" # upload date ignored
|
|
assert mf._valid_year({"release_year": 6577}) == "" # out of range
|
|
assert mf._valid_year({}) == ""
|
|
|
|
|
|
# ---- _repair_probe_url ----
|
|
def test_repair_probe_url():
|
|
assert mf._repair_probe_url("youtube", YT_ID) == f"https://music.youtube.com/watch?v={YT_ID}"
|
|
assert mf._repair_probe_url("soundcloud", "123") == "https://api.soundcloud.com/tracks/123"
|
|
assert mf._repair_probe_url("bandcamp", "x") is None
|
|
|
|
|
|
# ---- repair_file (fake audio + mocked metadata) ----
|
|
class _FakeAudio(dict):
|
|
def __init__(self, initial):
|
|
super().__init__(initial)
|
|
self.saved = False
|
|
|
|
def save(self):
|
|
self.saved = True
|
|
|
|
|
|
def test_repair_file_fixes_album_and_year(monkeypatch):
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: {"artist": "Daft Punk", "title": "Aerodynamic",
|
|
"album": "Discovery", "release_year": 2001})
|
|
audio = _FakeAudio({"artist": ["Daft Punk"], "title": ["Aerodynamic"]}) # album/date missing
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
|
changed = mf.repair_file(f"X/youtube/Aerodynamic [{YT_ID}].opus", "youtube", dry_run=False)
|
|
assert set(changed) == {"album=Discovery", "date=2001"}
|
|
assert audio["album"] == ["Discovery"]
|
|
assert audio["date"] == ["2001"]
|
|
assert audio.saved is True
|
|
|
|
|
|
def test_repair_file_dry_run_writes_nothing(monkeypatch):
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: {"artist": "A", "title": "T", "album": "Alb", "release_year": 2020})
|
|
audio = _FakeAudio({})
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
|
changed = mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True)
|
|
assert changed
|
|
assert audio == {}
|
|
assert audio.saved is False
|
|
|
|
|
|
def test_repair_file_skips_music_video(monkeypatch):
|
|
# No album AND no valid release year -> treat as a video, leave tags alone.
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: {"title": "Artist - Song (Official Music Video)",
|
|
"uploader": "SomeVEVO", "upload_date": "20110101"})
|
|
audio = _FakeAudio({"artist": ["Real Artist"], "title": ["Song"]})
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
|
changed = mf.repair_file(f"X/youtube/Song [{YT_ID}].opus", "youtube", dry_run=False)
|
|
assert changed == []
|
|
assert audio == {"artist": ["Real Artist"], "title": ["Song"]} # untouched
|
|
|
|
|
|
def test_repair_file_fills_missing_but_never_clobbers(monkeypatch):
|
|
# Source artist is a channel name; existing artist must be kept.
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: {"artist": "SomeChannelVEVO", "title": "Channel Decorated Title",
|
|
"album": "Real Album", "release_year": 2019})
|
|
audio = _FakeAudio({"artist": ["Correct Artist"], "title": ["Clean Title"]})
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
|
changed = mf.repair_file(f"X/youtube/x [{YT_ID}].opus", "youtube", dry_run=False)
|
|
assert set(changed) == {"album=Real Album", "date=2019"}
|
|
assert audio["artist"] == ["Correct Artist"] # NOT overwritten with channel
|
|
assert audio["title"] == ["Clean Title"] # NOT overwritten with decorated title
|
|
|
|
|
|
def test_repair_file_fills_missing_artist_when_absent(monkeypatch):
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: {"artist": "Real Artist", "title": "T",
|
|
"album": "Alb", "release_year": 2020})
|
|
audio = _FakeAudio({}) # nothing present -> fill artist + title too
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
|
changed = mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=False)
|
|
assert set(changed) == {"album=Alb", "date=2020", "artist=Real Artist", "title=T"}
|
|
|
|
|
|
def test_repair_file_skips_bad_id(monkeypatch):
|
|
called = {"meta": False}
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: called.update(meta=True) or {})
|
|
# last bracket is a descriptor, not a real id
|
|
assert mf.repair_file("X/youtube/Song [Official Video].opus", "youtube", dry_run=False) == []
|
|
assert called["meta"] is False # never hit the network
|
|
|
|
|
|
def test_repair_file_skips_unparseable(monkeypatch):
|
|
called = {"meta": False}
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: called.update(meta=True) or {})
|
|
assert mf.repair_file("X/youtube/no-id.opus", "youtube", dry_run=False) == []
|
|
assert called["meta"] is False
|
|
|
|
|
|
def test_run_yt_dlp_get_metadata_passes_extra_args(monkeypatch):
|
|
captured = {}
|
|
|
|
class _R:
|
|
stdout = '{"title": "x"}'
|
|
monkeypatch.setattr(mf.subprocess, "run", lambda cmd, **k: captured.update(cmd=cmd) or _R())
|
|
mf.run_yt_dlp_get_metadata("http://u", ["--extractor-args", "youtube:player_skip=js"])
|
|
assert "youtube:player_skip=js" in captured["cmd"]
|
|
|
|
|
|
def test_repair_uses_player_skip_fast_args(monkeypatch):
|
|
captured = {}
|
|
|
|
def fake_meta(url, extra_args=None):
|
|
captured["extra"] = extra_args
|
|
return {"album": "A", "release_year": 2020, "artist": "X", "title": "T"}
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", fake_meta)
|
|
monkeypatch.setattr(mf, "_open_audio", lambda p: (_FakeAudio({}), None))
|
|
mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True)
|
|
assert captured["extra"] == mf._REPAIR_META_ARGS
|
|
|
|
|
|
# ---- repair_library (real temp tree, repair_file mocked) ----
|
|
def test_repair_library_scans_only_source_dirs(tmp_path, monkeypatch):
|
|
root = tmp_path
|
|
(root / "Daft Punk" / "youtube").mkdir(parents=True)
|
|
(root / "Daft Punk" / "youtube" / f"Aerodynamic [{YT_ID}].opus").write_text("x")
|
|
(root / "Daft Punk" / "Discovery").mkdir(parents=True) # Lidarr album -> skip
|
|
(root / "Daft Punk" / "Discovery" / "Aerodynamic.flac").write_text("x")
|
|
(root / "Ephixa" / "soundcloud").mkdir(parents=True)
|
|
(root / "Ephixa" / "soundcloud" / "Ignomon [123].m4a").write_text("x")
|
|
|
|
visited = []
|
|
monkeypatch.setattr(mf, "repair_file",
|
|
lambda path, source, dry_run: visited.append((source, path)) or ["album=X"])
|
|
scanned, changed = mf.repair_library(str(root), dry_run=False)
|
|
assert scanned == 2 and changed == 2
|
|
assert sorted(s for s, _ in visited) == ["soundcloud", "youtube"] # album folder skipped
|
|
|
|
|
|
def test_repair_library_missing_root():
|
|
assert mf.repair_library("/no/such/dir", dry_run=False) == (0, 0)
|
|
|
|
|
|
def test_repair_library_exclude_skips_folders(tmp_path, monkeypatch):
|
|
root = tmp_path
|
|
(root / "Daft Punk" / "youtube").mkdir(parents=True)
|
|
(root / "Daft Punk" / "youtube" / f"A [{YT_ID}].opus").write_text("x")
|
|
(root / "Unsorted" / "youtube").mkdir(parents=True) # excluded artist folder
|
|
(root / "Unsorted" / "youtube" / f"B [{YT_ID}].opus").write_text("x")
|
|
(root / "Ephixa" / "playlists").mkdir(parents=True) # excluded source folder
|
|
(root / "Ephixa" / "playlists" / f"C [{YT_ID}].opus").write_text("x")
|
|
|
|
visited = []
|
|
monkeypatch.setattr(mf, "repair_file",
|
|
lambda path, source, dry_run: visited.append(path) or ["x"])
|
|
scanned, _ = mf.repair_library(str(root), dry_run=False, exclude=["unsorted", "playlists"])
|
|
assert scanned == 1
|
|
assert visited and "Daft Punk" in visited[0]
|
|
|
|
|
|
# ---- offline retag-from-path ----
|
|
def test_title_from_filename():
|
|
assert mf._title_from_filename(f"Song [{YT_ID}].opus") == "Song"
|
|
assert mf._title_from_filename("STARDUST (Official Music Video) [3nsYNXtALhA].opus") \
|
|
== "STARDUST (Official Music Video)"
|
|
assert mf._title_from_filename("no brackets.mp3") == "no brackets"
|
|
|
|
|
|
def test_strip_decorations():
|
|
assert mf._strip_decorations("STARDUST (Official Music Video)") == "STARDUST"
|
|
assert mf._strip_decorations("Away From You (Lyrics)") == "Away From You"
|
|
assert mf._strip_decorations("More Than a Feeling (Official HD Video)") == "More Than a Feeling"
|
|
# real info like a feature credit is kept
|
|
assert mf._strip_decorations("WHO GON' SLIDE (Feat. Shakewell) [Official Music Video]") \
|
|
== "WHO GON' SLIDE (Feat. Shakewell)"
|
|
|
|
|
|
def test_derive_from_filename():
|
|
# plain title -> folder is the artist
|
|
assert mf._derive_from_filename(f"Aerodynamic [{YT_ID}].opus", "Daft Punk") == ("Daft Punk", "Aerodynamic")
|
|
# decorated music video filed under the artist
|
|
assert mf._derive_from_filename("STARDUST (Official Music Video) [3nsYNXtALhA].opus", "1nonly") \
|
|
== ("1nonly", "STARDUST")
|
|
# 'Artist - Title' name wins over a channel folder
|
|
assert mf._derive_from_filename("BLCKLGHT - Away From You (Lyrics) [QapF4b1jYw8].opus", "7clouds Techno") \
|
|
== ("BLCKLGHT", "Away From You")
|
|
|
|
|
|
def test_retag_file_from_path_fixes_clobbered_tags(monkeypatch):
|
|
audio = _FakeAudio({"artist": ["7clouds Techno"], "title": ["BLCKLGHT - Away From You (Lyrics)"]})
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
|
changed = mf.retag_file_from_path(
|
|
"X/7clouds Techno/youtube/BLCKLGHT - Away From You (Lyrics) [QapF4b1jYw8].opus",
|
|
"7clouds Techno", dry_run=False)
|
|
assert set(changed) == {"artist=BLCKLGHT", "title=Away From You"}
|
|
assert audio["artist"] == ["BLCKLGHT"]
|
|
assert audio["title"] == ["Away From You"]
|
|
assert audio.saved is True
|
|
|
|
|
|
def test_retag_file_from_path_dry_run(monkeypatch):
|
|
audio = _FakeAudio({"artist": ["wrong"], "title": ["wrong"]})
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
|
changed = mf.retag_file_from_path(f"X/Daft Punk/youtube/Aerodynamic [{YT_ID}].opus",
|
|
"Daft Punk", dry_run=True)
|
|
assert changed
|
|
assert audio == {"artist": ["wrong"], "title": ["wrong"]}
|
|
assert audio.saved is False
|
|
|
|
|
|
def test_retag_library_walks_source_files(tmp_path, monkeypatch):
|
|
root = tmp_path
|
|
(root / "Daft Punk" / "youtube").mkdir(parents=True)
|
|
(root / "Daft Punk" / "youtube" / f"Aerodynamic [{YT_ID}].opus").write_text("x")
|
|
(root / "Daft Punk" / "Discovery").mkdir(parents=True) # album folder -> skip
|
|
(root / "Daft Punk" / "Discovery" / "x.flac").write_text("x")
|
|
visited = []
|
|
monkeypatch.setattr(mf, "retag_file_from_path",
|
|
lambda path, artist, dry_run: visited.append(artist) or ["artist=x"])
|
|
scanned, changed = mf.retag_library_from_path(str(root), dry_run=False)
|
|
assert (scanned, changed) == (1, 1)
|
|
assert visited == ["Daft Punk"]
|
|
|
|
|
|
# ---- bogus-tag recovery (old-code NA / Unknown breakage) ----
|
|
def test_is_bogus():
|
|
for v in ("", "NA", "na", "N/A", "Unknown", "Unknown Album", "unknown artist", " NA "):
|
|
assert mf._is_bogus(v) is True, v
|
|
for v in ("Cochise", "Solid", "Brother Stoon", "Discovery"):
|
|
assert mf._is_bogus(v) is False, v
|
|
|
|
|
|
def test_repair_file_overwrites_bogus_title(monkeypatch):
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: {"artist": "Audioslave", "title": "Cochise",
|
|
"album": "Audioslave", "release_year": 2002})
|
|
audio = _FakeAudio({"artist": ["Audioslave"], "title": ["NA"]}) # bogus title
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
|
changed = mf.repair_file(f"X/youtube/Brother Stoon [{YT_ID}].opus", "youtube", dry_run=False)
|
|
assert "title=Cochise" in changed
|
|
assert audio["title"] == ["Cochise"]
|
|
|
|
|
|
def test_repair_file_overwrites_bogus_artist(monkeypatch):
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: {"artist": "Real Artist", "title": "Real Title",
|
|
"album": "Alb", "release_year": 2020})
|
|
audio = _FakeAudio({"artist": ["NA"], "title": ["Good Title"]}) # bogus artist, good title
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
|
changed = mf.repair_file(f"X/youtube/Good Title [{YT_ID}].opus", "youtube", dry_run=False)
|
|
assert "artist=Real Artist" in changed
|
|
assert audio["artist"] == ["Real Artist"]
|
|
assert audio["title"] == ["Good Title"] # good title untouched
|
|
|
|
|
|
def test_repair_file_normalizes_na_album_when_source_has_none(monkeypatch):
|
|
# Music video: no source album/year, but album tag is the literal 'NA' -> Unknown Album.
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: {"title": "Some Live Thing", "uploader": "Chan"})
|
|
audio = _FakeAudio({"artist": ["X"], "title": ["Y"], "album": ["NA"]})
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
|
changed = mf.repair_file(f"X/youtube/Y [{YT_ID}].opus", "youtube", dry_run=False)
|
|
assert "album=Unknown Album" in changed
|
|
assert audio["album"] == ["Unknown Album"]
|
|
|
|
|
|
def test_repair_file_renames_bogus_filename(tmp_path, monkeypatch):
|
|
d = tmp_path / "Audioslave" / "youtube"
|
|
d.mkdir(parents=True)
|
|
f = d / f"NA [{YT_ID}].opus"
|
|
f.write_text("x")
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: {"artist": "Audioslave", "title": "Cochise",
|
|
"album": "Audioslave", "release_year": 2002})
|
|
audio = _FakeAudio({"artist": ["Audioslave"], "title": ["NA"]})
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
|
changed = mf.repair_file(str(f), "youtube", dry_run=False)
|
|
assert (d / f"Cochise [{YT_ID}].opus").exists()
|
|
assert not f.exists()
|
|
assert any("rename" in c.lower() or c.startswith("title=") for c in changed)
|
|
|
|
|
|
def test_repair_file_dry_run_does_not_rename(tmp_path, monkeypatch):
|
|
d = tmp_path / "Audioslave" / "youtube"
|
|
d.mkdir(parents=True)
|
|
f = d / f"NA [{YT_ID}].opus"
|
|
f.write_text("x")
|
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
|
lambda url, *a: {"artist": "Audioslave", "title": "Cochise",
|
|
"album": "Audioslave", "release_year": 2002})
|
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (_FakeAudio({"title": ["NA"]}), None))
|
|
mf.repair_file(str(f), "youtube", dry_run=True)
|
|
assert f.exists() # untouched in dry-run
|
|
assert not (d / f"Cochise [{YT_ID}].opus").exists()
|
|
|
|
|
|
def test_fs_safe_replaces_slash():
|
|
assert "/" not in mf._fs_safe("AC/DC Live")
|
|
|
|
|
|
# ---- parallel repair ----
|
|
def test_repair_library_parallel_visits_all(tmp_path, monkeypatch):
|
|
root = tmp_path
|
|
n = 50
|
|
for i in range(n):
|
|
d = root / f"Artist{i}" / "youtube"
|
|
d.mkdir(parents=True)
|
|
(d / f"T{i} [{YT_ID}].opus").write_text("x")
|
|
|
|
import threading
|
|
seen = set()
|
|
lock = threading.Lock()
|
|
|
|
def fake(path, source, dry_run):
|
|
with lock:
|
|
seen.add(path)
|
|
return ["album=X"]
|
|
monkeypatch.setattr(mf, "repair_file", fake)
|
|
scanned, changed = mf.repair_library(str(root), dry_run=False, workers=8)
|
|
assert scanned == n and changed == n
|
|
assert len(seen) == n
|
|
|
|
|
|
def test_repair_library_default_workers_still_works(tmp_path, monkeypatch):
|
|
root = tmp_path
|
|
(root / "A" / "youtube").mkdir(parents=True)
|
|
(root / "A" / "youtube" / f"T [{YT_ID}].opus").write_text("x")
|
|
monkeypatch.setattr(mf, "repair_file", lambda p, s, d: ["x"])
|
|
assert mf.repair_library(str(root), dry_run=False) == (1, 1)
|