Merge fix/repair-music-videos: conservative --repair + first-artist single folder
This commit is contained in:
14
README.md
14
README.md
@@ -125,10 +125,16 @@ export LIDARR_API_KEY="your-lidarr-api-key"
|
|||||||
|
|
||||||
`--repair` walks `<root>/<artist>/<source>/` (the `youtube`/`soundcloud`/… download
|
`--repair` walks `<root>/<artist>/<source>/` (the `youtube`/`soundcloud`/… download
|
||||||
folders — Lidarr album folders are skipped), re-fetches authoritative metadata for each
|
folders — Lidarr album folders are skipped), re-fetches authoritative metadata for each
|
||||||
file using the `[id]` in its filename, and fixes tags (album, year, artist, title). Useful
|
file using the `[id]` in its filename, and fixes tags. Useful when downloads landed with
|
||||||
when downloads landed with missing album or wrong year. It re-queries the source over the
|
missing album or wrong year.
|
||||||
network, so run it occasionally, not constantly. Requires `mutagen` (a yt-dlp dependency,
|
|
||||||
usually already present). CLI-only — not exposed via the REST API.
|
It is deliberately **conservative**: it overwrites **album** and **year** (the usual
|
||||||
|
breakage), but only *fills in* a missing **artist**/**title** — it never overwrites existing
|
||||||
|
artist/title with channel names or decorated video titles. Files whose source returns no real
|
||||||
|
music metadata (no album and no release year — e.g. plain music videos) are left untouched.
|
||||||
|
|
||||||
|
It re-queries the source over the network, so run it occasionally, not constantly. Requires
|
||||||
|
`mutagen` (a yt-dlp dependency, usually already present). CLI-only — not exposed via the REST API.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Preview what would change (writes nothing)
|
# Preview what would change (writes nothing)
|
||||||
|
|||||||
67
musicfetch
67
musicfetch
@@ -760,7 +760,9 @@ def download_single(url: str, root: str, quality: str, dry_run: bool) -> dict:
|
|||||||
artist = get_artist_from_metadata(meta) if meta else "Unknown Artist"
|
artist = get_artist_from_metadata(meta) if meta else "Unknown Artist"
|
||||||
title = (meta or {}).get("title", "")
|
title = (meta or {}).get("title", "")
|
||||||
source = _sanitize_source((meta or {}).get("extractor", "")) if meta else "downloads"
|
source = _sanitize_source((meta or {}).get("extractor", "")) if meta else "downloads"
|
||||||
target = os.path.join(root, artist, source)
|
# First artist only for the folder (matches the search/playlist paths).
|
||||||
|
artist_dir = artist.split(",")[0].strip() or "Unknown Artist"
|
||||||
|
target = os.path.join(root, artist_dir, source)
|
||||||
ok = yt_download(url, target, quality, dry_run)
|
ok = yt_download(url, target, quality, dry_run)
|
||||||
return {"title": title, "artist": artist, "ok": ok}
|
return {"title": title, "artist": artist, "ok": ok}
|
||||||
|
|
||||||
@@ -827,18 +829,24 @@ def _repair_probe_url(source: str, vid: str):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _desired_tags(meta: dict) -> dict:
|
def _repair_id_ok(source: str, vid: str) -> bool:
|
||||||
"""Authoritative tags from yt-dlp metadata (omit empties)."""
|
"""True if the parsed id matches the source's id format (avoids querying
|
||||||
year = (str(meta.get("release_year") or "")
|
junk ids pulled from bracketed descriptors like '[Official Video]')."""
|
||||||
or (meta.get("release_date") or "")[:4]
|
if source == "youtube":
|
||||||
or (meta.get("upload_date") or "")[:4])
|
return bool(re.fullmatch(r"[A-Za-z0-9_-]{11}", vid))
|
||||||
fields = {
|
if source == "soundcloud":
|
||||||
"artist": get_artist_from_metadata(meta),
|
return vid.isdigit()
|
||||||
"title": meta.get("title", ""),
|
return False
|
||||||
"album": meta.get("album", ""),
|
|
||||||
"date": year,
|
|
||||||
}
|
def _valid_year(meta: dict) -> str:
|
||||||
return {k: v for k, v in fields.items() if v and v != "Unknown Artist"}
|
"""A plausible release year from metadata, or '' . Uses release info only —
|
||||||
|
NOT upload_date, which is the upload year, not the song's year."""
|
||||||
|
for v in (meta.get("release_year"), (meta.get("release_date") or "")[:4]):
|
||||||
|
s = str(v or "")
|
||||||
|
if s.isdigit() and 1000 <= int(s) <= 2100:
|
||||||
|
return s
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def _open_audio(path: str):
|
def _open_audio(path: str):
|
||||||
@@ -868,12 +876,19 @@ def _read_tag(audio, key_map, field: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def repair_file(path: str, source: str, dry_run: bool) -> list[str]:
|
def repair_file(path: str, source: str, dry_run: bool) -> list[str]:
|
||||||
"""Re-tag one file from source metadata. Returns the list of changed fields."""
|
"""Re-tag one file from source metadata. Conservative: fixes album/year
|
||||||
|
(the common breakage) and only fills MISSING artist/title — never clobbers
|
||||||
|
existing tags with channel names or decorated music-video titles. Files whose
|
||||||
|
source has no real music metadata (no album/year, e.g. music videos) are left
|
||||||
|
untouched. Returns the list of changed fields."""
|
||||||
parsed = _parse_track_file(os.path.basename(path))
|
parsed = _parse_track_file(os.path.basename(path))
|
||||||
if not parsed:
|
if not parsed:
|
||||||
dbg(f"skip (no id): {path}")
|
dbg(f"skip (no id): {path}")
|
||||||
return []
|
return []
|
||||||
_, vid = parsed
|
_, vid = parsed
|
||||||
|
if not _repair_id_ok(source, vid):
|
||||||
|
dbg(f"skip (bad {source} id '{vid}'): {path}")
|
||||||
|
return []
|
||||||
url = _repair_probe_url(source, vid)
|
url = _repair_probe_url(source, vid)
|
||||||
if not url:
|
if not url:
|
||||||
dbg(f"skip (source '{source}' not re-queryable): {path}")
|
dbg(f"skip (source '{source}' not re-queryable): {path}")
|
||||||
@@ -882,7 +897,13 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]:
|
|||||||
if not meta:
|
if not meta:
|
||||||
dbg(f"skip (no metadata): {path}")
|
dbg(f"skip (no metadata): {path}")
|
||||||
return []
|
return []
|
||||||
desired = _desired_tags(meta)
|
|
||||||
|
album = (meta.get("album") or "").strip()
|
||||||
|
year = _valid_year(meta)
|
||||||
|
if not album and not year:
|
||||||
|
dbg(f"skip (no music metadata, likely a video): {path}")
|
||||||
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
audio, key_map = _open_audio(path)
|
audio, key_map = _open_audio(path)
|
||||||
except Exception as e: # noqa: BLE001
|
except Exception as e: # noqa: BLE001
|
||||||
@@ -890,8 +911,22 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]:
|
|||||||
return []
|
return []
|
||||||
if audio is None:
|
if audio is None:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# album/year are authoritative (overwrite); artist/title fill-missing only.
|
||||||
|
updates = {}
|
||||||
|
if album:
|
||||||
|
updates["album"] = album
|
||||||
|
if year:
|
||||||
|
updates["date"] = year
|
||||||
|
artist = get_artist_from_metadata(meta)
|
||||||
|
if artist and artist != "Unknown Artist" and not _read_tag(audio, key_map, "artist"):
|
||||||
|
updates["artist"] = artist
|
||||||
|
title = meta.get("title", "")
|
||||||
|
if title and not _read_tag(audio, key_map, "title"):
|
||||||
|
updates["title"] = title
|
||||||
|
|
||||||
changed = []
|
changed = []
|
||||||
for field, value in desired.items():
|
for field, value in updates.items():
|
||||||
if _read_tag(audio, key_map, field) != value:
|
if _read_tag(audio, key_map, field) != value:
|
||||||
changed.append(f"{field}={value}")
|
changed.append(f"{field}={value}")
|
||||||
if not dry_run:
|
if not dry_run:
|
||||||
|
|||||||
@@ -1,13 +1,15 @@
|
|||||||
import server.mf # noqa: F401 — loads musicfetch, registers musicfetch_core
|
import server.mf # noqa: F401 — loads musicfetch, registers musicfetch_core
|
||||||
import musicfetch_core as mf
|
import musicfetch_core as mf
|
||||||
|
|
||||||
|
YT_ID = "dQw4w9WgXcQ" # valid 11-char YouTube id
|
||||||
|
|
||||||
|
|
||||||
# ---- _is_source_dir ----
|
# ---- _is_source_dir ----
|
||||||
def test_is_source_dir():
|
def test_is_source_dir():
|
||||||
assert mf._is_source_dir("youtube") is True
|
assert mf._is_source_dir("youtube") is True
|
||||||
assert mf._is_source_dir("soundcloud") is True
|
assert mf._is_source_dir("soundcloud") is True
|
||||||
assert mf._is_source_dir("downloads") is True
|
assert mf._is_source_dir("downloads") is True
|
||||||
assert mf._is_source_dir("Discovery") is False # Lidarr album folder
|
assert mf._is_source_dir("Discovery") is False # Lidarr album folder
|
||||||
assert mf._is_source_dir("Random Access Memories") is False
|
assert mf._is_source_dir("Random Access Memories") is False
|
||||||
assert mf._is_source_dir("") is False
|
assert mf._is_source_dir("") is False
|
||||||
|
|
||||||
@@ -16,38 +18,39 @@ def test_is_source_dir():
|
|||||||
def test_parse_track_file():
|
def test_parse_track_file():
|
||||||
assert mf._parse_track_file("Under My Skin [nGSNF2l44Zc].opus") == ("Under My Skin", "nGSNF2l44Zc")
|
assert mf._parse_track_file("Under My Skin [nGSNF2l44Zc].opus") == ("Under My Skin", "nGSNF2l44Zc")
|
||||||
assert mf._parse_track_file("Ignomon [2202690443].m4a") == ("Ignomon", "2202690443")
|
assert mf._parse_track_file("Ignomon [2202690443].m4a") == ("Ignomon", "2202690443")
|
||||||
|
# greedy title: real id is the LAST bracket
|
||||||
|
assert mf._parse_track_file("WHO GON' SLIDE [Official Music Video] [AxjP9s6J3uY].opus") \
|
||||||
|
== ("WHO GON' SLIDE [Official Music Video]", "AxjP9s6J3uY")
|
||||||
assert mf._parse_track_file("no-id-here.opus") is None
|
assert mf._parse_track_file("no-id-here.opus") is None
|
||||||
assert mf._parse_track_file("cover.jpg") is None
|
assert mf._parse_track_file("cover.jpg") is None
|
||||||
|
|
||||||
|
|
||||||
|
# ---- _repair_id_ok ----
|
||||||
|
def test_repair_id_ok():
|
||||||
|
assert mf._repair_id_ok("youtube", YT_ID) is True
|
||||||
|
assert mf._repair_id_ok("youtube", "Official Video") is False # space, wrong length
|
||||||
|
assert mf._repair_id_ok("youtube", "Cover") is False
|
||||||
|
assert mf._repair_id_ok("soundcloud", "2202690443") is True
|
||||||
|
assert mf._repair_id_ok("soundcloud", "abc") is False
|
||||||
|
assert mf._repair_id_ok("bandcamp", "x") is False
|
||||||
|
|
||||||
|
|
||||||
|
# ---- _valid_year ----
|
||||||
|
def test_valid_year():
|
||||||
|
assert mf._valid_year({"release_year": 2001}) == "2001"
|
||||||
|
assert mf._valid_year({"release_date": "1976-09-10"}) == "1976"
|
||||||
|
assert mf._valid_year({"upload_date": "20110101"}) == "" # upload date ignored
|
||||||
|
assert mf._valid_year({"release_year": 6577}) == "" # out of range
|
||||||
|
assert mf._valid_year({}) == ""
|
||||||
|
|
||||||
|
|
||||||
# ---- _repair_probe_url ----
|
# ---- _repair_probe_url ----
|
||||||
def test_repair_probe_url():
|
def test_repair_probe_url():
|
||||||
assert mf._repair_probe_url("youtube", "vid") == "https://music.youtube.com/watch?v=vid"
|
assert mf._repair_probe_url("youtube", YT_ID) == f"https://music.youtube.com/watch?v={YT_ID}"
|
||||||
assert mf._repair_probe_url("soundcloud", "123") == "https://api.soundcloud.com/tracks/123"
|
assert mf._repair_probe_url("soundcloud", "123") == "https://api.soundcloud.com/tracks/123"
|
||||||
assert mf._repair_probe_url("bandcamp", "x") is None
|
assert mf._repair_probe_url("bandcamp", "x") is None
|
||||||
|
|
||||||
|
|
||||||
# ---- _desired_tags ----
|
|
||||||
def test_desired_tags_full():
|
|
||||||
meta = {"artist": "Daft Punk", "title": "Aerodynamic", "album": "Discovery", "release_year": 2001}
|
|
||||||
assert mf._desired_tags(meta) == {"artist": "Daft Punk", "title": "Aerodynamic",
|
|
||||||
"album": "Discovery", "date": "2001"}
|
|
||||||
|
|
||||||
|
|
||||||
def test_desired_tags_year_fallbacks_and_omits_empty():
|
|
||||||
meta = {"title": "T", "uploader": "Chan", "upload_date": "20230102"} # no album, no release_year
|
|
||||||
out = mf._desired_tags(meta)
|
|
||||||
assert out["date"] == "2023"
|
|
||||||
assert out["title"] == "T"
|
|
||||||
assert out["artist"] == "Chan"
|
|
||||||
assert "album" not in out # omitted when empty
|
|
||||||
|
|
||||||
|
|
||||||
def test_desired_tags_drops_unknown_artist():
|
|
||||||
meta = {"title": "T"} # get_artist_from_metadata -> "Unknown Artist"
|
|
||||||
assert "artist" not in mf._desired_tags(meta)
|
|
||||||
|
|
||||||
|
|
||||||
# ---- repair_file (fake audio + mocked metadata) ----
|
# ---- repair_file (fake audio + mocked metadata) ----
|
||||||
class _FakeAudio(dict):
|
class _FakeAudio(dict):
|
||||||
def __init__(self, initial):
|
def __init__(self, initial):
|
||||||
@@ -58,13 +61,13 @@ class _FakeAudio(dict):
|
|||||||
self.saved = True
|
self.saved = True
|
||||||
|
|
||||||
|
|
||||||
def test_repair_file_writes_changed_fields(monkeypatch):
|
def test_repair_file_fixes_album_and_year(monkeypatch):
|
||||||
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
lambda url: {"artist": "Daft Punk", "title": "Aerodynamic",
|
lambda url: {"artist": "Daft Punk", "title": "Aerodynamic",
|
||||||
"album": "Discovery", "release_year": 2001})
|
"album": "Discovery", "release_year": 2001})
|
||||||
audio = _FakeAudio({"artist": ["Daft Punk"], "title": ["Aerodynamic"]}) # album/date missing
|
audio = _FakeAudio({"artist": ["Daft Punk"], "title": ["Aerodynamic"]}) # album/date missing
|
||||||
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
||||||
changed = mf.repair_file("X/youtube/Aerodynamic [vid].opus", "youtube", dry_run=False)
|
changed = mf.repair_file(f"X/youtube/Aerodynamic [{YT_ID}].opus", "youtube", dry_run=False)
|
||||||
assert set(changed) == {"album=Discovery", "date=2001"}
|
assert set(changed) == {"album=Discovery", "date=2001"}
|
||||||
assert audio["album"] == ["Discovery"]
|
assert audio["album"] == ["Discovery"]
|
||||||
assert audio["date"] == ["2001"]
|
assert audio["date"] == ["2001"]
|
||||||
@@ -76,30 +79,69 @@ def test_repair_file_dry_run_writes_nothing(monkeypatch):
|
|||||||
lambda url: {"artist": "A", "title": "T", "album": "Alb", "release_year": 2020})
|
lambda url: {"artist": "A", "title": "T", "album": "Alb", "release_year": 2020})
|
||||||
audio = _FakeAudio({})
|
audio = _FakeAudio({})
|
||||||
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
||||||
changed = mf.repair_file("X/youtube/T [vid].opus", "youtube", dry_run=True)
|
changed = mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=True)
|
||||||
assert changed # reports would-change
|
assert changed
|
||||||
assert audio == {} # nothing written
|
assert audio == {}
|
||||||
assert audio.saved is False
|
assert audio.saved is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_repair_file_skips_music_video(monkeypatch):
|
||||||
|
# No album AND no valid release year -> treat as a video, leave tags alone.
|
||||||
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
|
lambda url: {"title": "Artist - Song (Official Music Video)",
|
||||||
|
"uploader": "SomeVEVO", "upload_date": "20110101"})
|
||||||
|
audio = _FakeAudio({"artist": ["Real Artist"], "title": ["Song"]})
|
||||||
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
||||||
|
changed = mf.repair_file(f"X/youtube/Song [{YT_ID}].opus", "youtube", dry_run=False)
|
||||||
|
assert changed == []
|
||||||
|
assert audio == {"artist": ["Real Artist"], "title": ["Song"]} # untouched
|
||||||
|
|
||||||
|
|
||||||
|
def test_repair_file_fills_missing_but_never_clobbers(monkeypatch):
|
||||||
|
# Source artist is a channel name; existing artist must be kept.
|
||||||
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
|
lambda url: {"artist": "SomeChannelVEVO", "title": "Channel Decorated Title",
|
||||||
|
"album": "Real Album", "release_year": 2019})
|
||||||
|
audio = _FakeAudio({"artist": ["Correct Artist"], "title": ["Clean Title"]})
|
||||||
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
||||||
|
changed = mf.repair_file(f"X/youtube/x [{YT_ID}].opus", "youtube", dry_run=False)
|
||||||
|
assert set(changed) == {"album=Real Album", "date=2019"}
|
||||||
|
assert audio["artist"] == ["Correct Artist"] # NOT overwritten with channel
|
||||||
|
assert audio["title"] == ["Clean Title"] # NOT overwritten with decorated title
|
||||||
|
|
||||||
|
|
||||||
|
def test_repair_file_fills_missing_artist_when_absent(monkeypatch):
|
||||||
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
|
lambda url: {"artist": "Real Artist", "title": "T",
|
||||||
|
"album": "Alb", "release_year": 2020})
|
||||||
|
audio = _FakeAudio({}) # nothing present -> fill artist + title too
|
||||||
|
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
|
||||||
|
changed = mf.repair_file(f"X/youtube/T [{YT_ID}].opus", "youtube", dry_run=False)
|
||||||
|
assert set(changed) == {"album=Alb", "date=2020", "artist=Real Artist", "title=T"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_repair_file_skips_bad_id(monkeypatch):
|
||||||
|
called = {"meta": False}
|
||||||
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
|
lambda url: called.update(meta=True) or {})
|
||||||
|
# last bracket is a descriptor, not a real id
|
||||||
|
assert mf.repair_file("X/youtube/Song [Official Video].opus", "youtube", dry_run=False) == []
|
||||||
|
assert called["meta"] is False # never hit the network
|
||||||
|
|
||||||
|
|
||||||
def test_repair_file_skips_unparseable(monkeypatch):
|
def test_repair_file_skips_unparseable(monkeypatch):
|
||||||
called = {"meta": False}
|
called = {"meta": False}
|
||||||
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
|
||||||
lambda url: called.update(meta=True) or {})
|
lambda url: called.update(meta=True) or {})
|
||||||
assert mf.repair_file("X/youtube/no-id.opus", "youtube", dry_run=False) == []
|
assert mf.repair_file("X/youtube/no-id.opus", "youtube", dry_run=False) == []
|
||||||
assert called["meta"] is False # never hit the network
|
assert called["meta"] is False
|
||||||
|
|
||||||
|
|
||||||
def test_repair_file_skips_unqueryable_source(monkeypatch):
|
|
||||||
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", lambda url: {"title": "x"})
|
|
||||||
assert mf.repair_file("X/bandcamp/T [id].m4a", "bandcamp", dry_run=False) == []
|
|
||||||
|
|
||||||
|
|
||||||
# ---- repair_library (real temp tree, repair_file mocked) ----
|
# ---- repair_library (real temp tree, repair_file mocked) ----
|
||||||
def test_repair_library_scans_only_source_dirs(tmp_path, monkeypatch):
|
def test_repair_library_scans_only_source_dirs(tmp_path, monkeypatch):
|
||||||
root = tmp_path
|
root = tmp_path
|
||||||
(root / "Daft Punk" / "youtube").mkdir(parents=True)
|
(root / "Daft Punk" / "youtube").mkdir(parents=True)
|
||||||
(root / "Daft Punk" / "youtube" / "Aerodynamic [vid].opus").write_text("x")
|
(root / "Daft Punk" / "youtube" / f"Aerodynamic [{YT_ID}].opus").write_text("x")
|
||||||
(root / "Daft Punk" / "Discovery").mkdir(parents=True) # Lidarr album -> skip
|
(root / "Daft Punk" / "Discovery").mkdir(parents=True) # Lidarr album -> skip
|
||||||
(root / "Daft Punk" / "Discovery" / "Aerodynamic.flac").write_text("x")
|
(root / "Daft Punk" / "Discovery" / "Aerodynamic.flac").write_text("x")
|
||||||
(root / "Ephixa" / "soundcloud").mkdir(parents=True)
|
(root / "Ephixa" / "soundcloud").mkdir(parents=True)
|
||||||
@@ -110,9 +152,8 @@ def test_repair_library_scans_only_source_dirs(tmp_path, monkeypatch):
|
|||||||
lambda path, source, dry_run: visited.append((source, path)) or ["album=X"])
|
lambda path, source, dry_run: visited.append((source, path)) or ["album=X"])
|
||||||
scanned, changed = mf.repair_library(str(root), dry_run=False)
|
scanned, changed = mf.repair_library(str(root), dry_run=False)
|
||||||
assert scanned == 2 and changed == 2
|
assert scanned == 2 and changed == 2
|
||||||
sources = sorted(s for s, _ in visited)
|
assert sorted(s for s, _ in visited) == ["soundcloud", "youtube"] # album folder skipped
|
||||||
assert sources == ["soundcloud", "youtube"] # Discovery album folder skipped
|
|
||||||
|
|
||||||
|
|
||||||
def test_repair_library_missing_root(monkeypatch):
|
def test_repair_library_missing_root():
|
||||||
assert mf.repair_library("/no/such/dir", dry_run=False) == (0, 0)
|
assert mf.repair_library("/no/such/dir", dry_run=False) == (0, 0)
|
||||||
|
|||||||
Reference in New Issue
Block a user