diff --git a/.gitignore b/.gitignore index 7a60b85..77dc02c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ __pycache__/ *.pyc + +server/log.txt diff --git a/README.md b/README.md index c3f5ff5..523b3f6 100644 --- a/README.md +++ b/README.md @@ -131,9 +131,11 @@ file using the `[id]` in its filename, and fixes tags. Useful when downloads lan missing album or wrong year. It is deliberately **conservative**: it overwrites **album** and **year** (the usual -breakage), but only *fills in* a missing **artist**/**title** — it never overwrites existing -artist/title with channel names or decorated video titles. Files whose source returns no real -music metadata (no album and no release year — e.g. plain music videos) are left untouched. +breakage), and fills in **artist**/**title** when they are missing *or* a known-bogus +placeholder (`NA`, `Unknown Album`, `Unknown Artist` — left behind by older buggy tagging) — +but it never overwrites a genuine existing artist/title with a channel name or decorated video +title. A bogus `NA [].` filename is renamed to the recovered title, and a literal +`NA` album with no source album is normalised to `Unknown Album`. It re-queries the source over the network, so run it occasionally, not constantly. Requires `mutagen` (a yt-dlp dependency, usually already present). CLI-only — not exposed via the REST API. diff --git a/musicfetch b/musicfetch index f2c64c6..2438c7d 100755 --- a/musicfetch +++ b/musicfetch @@ -18,7 +18,8 @@ from dataclasses import dataclass, field from typing import Optional import requests -from requests.exceptions import RequestException +from requests.exceptions import ConnectionError as ReqConnectionError +from requests.exceptions import RequestException, Timeout from urllib.parse import urlparse, parse_qs # Optional deps — degrade gracefully if missing. @@ -236,11 +237,22 @@ def lidarr_search(query: str, limit: int) -> list[Hit]: return _fallback_lookup(query, limit, artist_first=True) +def _log_lidarr_failure(label: str, e: Exception) -> None: + """A connection/timeout error means Lidarr is unreachable — the silent + YouTube fallback that follows is easy to mistake for "Lidarr had no match", + so surface it loudly. Ordinary HTTP errors stay debug-only.""" + if isinstance(e, (ReqConnectionError, Timeout)): + err(f"Lidarr unreachable ({label} at {LIDARR_URL}): {e}. " + f"Falling back to YouTube.") + else: + dbg(f"{label} failed: {e}") + + def _lidarr_album_candidates(term: str) -> list[Hit]: try: return [_album_to_hit(a) for a in lidarr_get("/api/v1/album/lookup", params={"term": term})] except RequestException as e: - dbg(f"album/lookup failed: {e}") + _log_lidarr_failure("album/lookup", e) return [] @@ -248,7 +260,7 @@ def _lidarr_artist_candidates(term: str) -> list[Hit]: try: return [_artist_to_hit(a) for a in lidarr_get("/api/v1/artist/lookup", params={"term": term})] except RequestException as e: - dbg(f"artist/lookup failed: {e}") + _log_lidarr_failure("artist/lookup", e) return [] @@ -605,22 +617,34 @@ def yt_download(url_or_query: str, target_folder: Optional[str], quality: str, d cmd += ["-o", outtmpl] else: cmd += ["-P", target_folder] - # Override tags from the chosen hit so they don't rely on scraped titles. + # Override embedded tags from the chosen hit. Inject literals via a + # seed-then-replace pair: --parse-metadata first copies an always-present + # field into meta_ (so the tag exists even when the source lacks it, + # e.g. YouTube videos with no album), then --replace-in-metadata overwrites + # it with the literal value. This dodges yt-dlp's output-template trap where + # a bare-word FROM (e.g. "Cochise") matches field_to_template's r'[a-zA-Z_]+$' + # and is read as a *field name* -> "NA". --replace-in-metadata args are + # literal, so single-word values and parens survive intact. + def _force_tag(field: str, value: str) -> list[str]: + repl = value.replace("\\", r"\\") # backslash is special in re.sub repl + return ["--parse-metadata", f"%(title,id)s:%(meta_{field})s", + "--replace-in-metadata", f"meta_{field}", "^.*$", repl] + if hit: - if hit.artist: - # First artist only; anchored ^.*$ replaces the whole field exactly once - # (a bare .* matches twice and doubles the value). - primary_artist = hit.artist.split(",")[0].strip() - cmd += ["--replace-in-metadata", "artist", "^.*$", primary_artist] - if hit.album: - cmd += ["--parse-metadata", f"{hit.album}:%(album)s"] + primary_artist = hit.artist.split(",")[0].strip() if hit.artist else "" + if primary_artist: + cmd += _force_tag("artist", primary_artist) if hit.title: - cmd += ["--parse-metadata", f"{hit.title}:%(title)s"] + cmd += _force_tag("title", hit.title) + if hit.album: + cmd += _force_tag("album", hit.album) if hit.year: cmd += ["--parse-metadata", f"{hit.year}:%(release_year)s"] - # Always embed an album: the resolved/native album if present, else a - # placeholder so players (e.g. Plexamp) don't choke on a blank album. - cmd += ["--parse-metadata", "%(album|Unknown Album)s:%(meta_album)s"] + # When the hit carried no album, still embed one: the resolved/native album + # if present, else a placeholder so players (e.g. Plexamp) don't choke on a + # blank album. (A hit album is already forced above and must not be clobbered.) + if not (hit and hit.album): + cmd += ["--parse-metadata", "%(album|Unknown Album)s:%(meta_album)s"] cmd.append(url_or_query) dest = outtmpl or target_folder @@ -881,12 +905,53 @@ def _read_tag(audio, key_map, field: str) -> str: return str(val[0]) if isinstance(val, list) else str(val) +# Placeholder tag values the old tagging bug left behind (yt-dlp's "NA" missing +# marker, and the "Unknown *" fallbacks). Treated as empty so repair overwrites +# them rather than mistaking them for a real, present tag. +_BOGUS_TAGS = {"", "na", "n/a", "unknown", "unknown album", "unknown artist"} + + +def _is_bogus(value: str) -> bool: + return (value or "").strip().casefold() in _BOGUS_TAGS + + +def _fs_safe(name: str) -> str: + """Filesystem-safe filename stem: mirror yt-dlp's default '/'->'⧸' so the + path stays a single segment, and drop NULs.""" + return name.replace("/", "⧸").replace("\0", "").strip() + + +def _maybe_rename_bogus(path: str, title: str, dry_run: bool) -> tuple[str, Optional[str]]: + """When the filename stem is a placeholder (e.g. 'NA []'), rename to + ' [<id>].<ext>'. Returns (current_path, change_note_or_None).""" + fname = os.path.basename(path) + parsed = _parse_track_file(fname) + if not parsed: + return path, None + stem_title, vid = parsed + if not _is_bogus(stem_title) or _is_bogus(title): + return path, None + ext = fname.rsplit(".", 1)[-1] + new_name = f"{_fs_safe(title)} [{vid}].{ext}" + new_path = os.path.join(os.path.dirname(path), new_name) + if new_path == path or not new_name: + return path, None + note = f"renamed -> {new_name}" + if dry_run: + print(f"[dry-run] would rename {fname} -> {new_name}") + return path, note + os.rename(path, new_path) + print(f"renamed {fname} -> {new_name}") + return new_path, note + + def repair_file(path: str, source: str, dry_run: bool) -> list[str]: - """Re-tag one file from source metadata. Conservative: fixes album/year - (the common breakage) and only fills MISSING artist/title — never clobbers - existing tags with channel names or decorated music-video titles. Files whose - source has no real music metadata (no album/year, e.g. music videos) are left - untouched. Returns the list of changed fields.""" + """Re-tag one file from source metadata. album/year are authoritative + (overwrite); artist/title are filled when MISSING *or* a known-bogus + placeholder ('NA', 'Unknown …') — the old tagging bug wrote those — but a + genuine existing tag is never clobbered with a channel name or decorated + music-video title. A bogus 'NA [<id>]' filename is renamed to the recovered + title. Returns the list of changed fields.""" parsed = _parse_track_file(os.path.basename(path)) if not parsed: dbg(f"skip (no id): {path}") @@ -904,12 +969,6 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]: dbg(f"skip (no metadata): {path}") return [] - album = (meta.get("album") or "").strip() - year = _valid_year(meta) - if not album and not year: - dbg(f"skip (no music metadata, likely a video): {path}") - return [] - try: audio, key_map = _open_audio(path) except Exception as e: # noqa: BLE001 @@ -918,18 +977,28 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]: if audio is None: return [] - # album/year are authoritative (overwrite); artist/title fill-missing only. + album = (meta.get("album") or "").strip() + year = _valid_year(meta) + cur_artist = _read_tag(audio, key_map, "artist") + cur_title = _read_tag(audio, key_map, "title") + cur_album = _read_tag(audio, key_map, "album") + meta_artist = get_artist_from_metadata(meta) + meta_title = (meta.get("title") or "").strip() + updates = {} if album: updates["album"] = album + elif cur_album and _is_bogus(cur_album) and cur_album.strip().casefold() != "unknown album": + # No source album, but the tag is a literal 'NA' — normalise it so no + # file keeps the placeholder (a blank album is left blank, as before). + updates["album"] = "Unknown Album" if year: updates["date"] = year - artist = get_artist_from_metadata(meta) - if artist and artist != "Unknown Artist" and not _read_tag(audio, key_map, "artist"): - updates["artist"] = artist - title = meta.get("title", "") - if title and not _read_tag(audio, key_map, "title"): - updates["title"] = title + # Fill artist/title when missing OR bogus; never overwrite a genuine value. + if meta_artist and not _is_bogus(meta_artist) and _is_bogus(cur_artist): + updates["artist"] = meta_artist + if meta_title and not _is_bogus(meta_title) and _is_bogus(cur_title): + updates["title"] = meta_title changed = [] for field, value in updates.items(): @@ -942,6 +1011,12 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]: if changed: prefix = "[dry-run] would set" if dry_run else "set" print(f"{prefix} [{', '.join(changed)}] on {path}") + + # Repair a placeholder filename using the final (recovered) title. + final_title = updates.get("title") or cur_title + _, rename_note = _maybe_rename_bogus(path, final_title, dry_run) + if rename_note: + changed.append(rename_note) return changed diff --git a/server/Dockerfile b/server/Dockerfile index 7ae60c0..d623684 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -1,7 +1,20 @@ FROM python:3.12-slim +# ffmpeg for audio extraction/embedding; deno is the JS runtime yt-dlp needs +# for YouTube (without it: "No supported JavaScript runtime" -> missing formats +# / HTTP 403). yt-dlp auto-detects deno on PATH. RUN apt-get update \ - && apt-get install -y --no-install-recommends ffmpeg \ + && apt-get install -y --no-install-recommends ffmpeg ca-certificates curl unzip \ + && arch="$(uname -m)" \ + && case "$arch" in \ + x86_64) deno_arch=x86_64-unknown-linux-gnu ;; \ + aarch64) deno_arch=aarch64-unknown-linux-gnu ;; \ + *) echo "unsupported arch: $arch" >&2; exit 1 ;; \ + esac \ + && curl -fsSL "https://github.com/denoland/deno/releases/latest/download/deno-${deno_arch}.zip" -o /tmp/deno.zip \ + && unzip /tmp/deno.zip -d /usr/local/bin \ + && rm /tmp/deno.zip \ + && apt-get purge -y --auto-remove curl unzip \ && rm -rf /var/lib/apt/lists/* WORKDIR /app diff --git a/tests/test_lidarr_search.py b/tests/test_lidarr_search.py index c4a5fd6..1deb952 100644 --- a/tests/test_lidarr_search.py +++ b/tests/test_lidarr_search.py @@ -81,3 +81,30 @@ def test_last_resort_universal_search(monkeypatch): monkeypatch.setattr(mf, "lidarr_get", fake_get) hits = mf.lidarr_search("Daft Punk - Discovery", 10) assert hits and hits[0].album == "Discovery" + + +def test_unreachable_lidarr_warns_loudly(monkeypatch, capsys): + # A connection error must surface on stderr (not silent dbg) so the + # YouTube fallback isn't mistaken for "Lidarr had no match". + monkeypatch.setattr(mf, "API_KEY", "testkey") + monkeypatch.setattr(mf, "DEBUG", False) + + def boom(path, params=None, timeout=15): + raise mf.ReqConnectionError("Name or service not known") + monkeypatch.setattr(mf, "lidarr_get", boom) + + hits = mf._lidarr_album_candidates("anything") + assert hits == [] + assert "Lidarr unreachable" in capsys.readouterr().err + + +def test_http_error_stays_quiet(monkeypatch, capsys): + monkeypatch.setattr(mf, "API_KEY", "testkey") + monkeypatch.setattr(mf, "DEBUG", False) + + def boom(path, params=None, timeout=15): + raise mf.RequestException("500 Server Error") + monkeypatch.setattr(mf, "lidarr_get", boom) + + assert mf._lidarr_album_candidates("anything") == [] + assert "Lidarr unreachable" not in capsys.readouterr().err diff --git a/tests/test_playlist.py b/tests/test_playlist.py index 7c700d2..2ddbac7 100644 --- a/tests/test_playlist.py +++ b/tests/test_playlist.py @@ -110,3 +110,25 @@ def test_yt_download_always_sets_album_default(monkeypatch): monkeypatch.setattr(mf.subprocess, "run", lambda cmd, **k: captured.update(cmd=cmd) or _CP("")) mf.yt_download("u", "/tmp/x", "best", False) assert "%(album|Unknown Album)s:%(meta_album)s" in captured["cmd"] + + +def test_yt_download_single_word_tags_injected_literally(monkeypatch): + # Regression: `--parse-metadata "Cochise:%(title)s"` makes yt-dlp treat the + # bare word 'Cochise' as a FIELD name (field_to_template's r'[a-zA-Z_]+$'), + # producing 'NA'. Single-word album/title must reach yt-dlp as literals. + captured = {} + monkeypatch.setattr(mf.os, "makedirs", lambda *a, **k: None) + monkeypatch.setattr(mf.subprocess, "run", lambda cmd, **k: captured.update(cmd=cmd) or _CP("")) + hit = mf.Hit(source="youtube", kind="track", title="Cochise", + artist="Audioslave", album="Solid", payload={"videoId": "x"}) + mf.yt_download("u", "/tmp/x", "best", False, hit=hit) + cmd = captured["cmd"] + joined = " ".join(cmd) + # The buggy bare-word parse-metadata FROM must be gone. + assert "Solid:%(album)s" not in joined + assert "Cochise:%(title)s" not in joined + # Literal values must be passed as literal args (immune to template parsing). + assert "Solid" in cmd + assert "Cochise" in cmd + # A hit album must not be clobbered by the Unknown-Album default. + assert "%(album|Unknown Album)s:%(meta_album)s" not in cmd diff --git a/tests/test_repair.py b/tests/test_repair.py index a690a43..a8d115a 100644 --- a/tests/test_repair.py +++ b/tests/test_repair.py @@ -260,3 +260,79 @@ def test_retag_library_walks_source_files(tmp_path, monkeypatch): scanned, changed = mf.retag_library_from_path(str(root), dry_run=False) assert (scanned, changed) == (1, 1) assert visited == ["Daft Punk"] + + +# ---- bogus-tag recovery (old-code NA / Unknown breakage) ---- +def test_is_bogus(): + for v in ("", "NA", "na", "N/A", "Unknown", "Unknown Album", "unknown artist", " NA "): + assert mf._is_bogus(v) is True, v + for v in ("Cochise", "Solid", "Brother Stoon", "Discovery"): + assert mf._is_bogus(v) is False, v + + +def test_repair_file_overwrites_bogus_title(monkeypatch): + monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", + lambda url, *a: {"artist": "Audioslave", "title": "Cochise", + "album": "Audioslave", "release_year": 2002}) + audio = _FakeAudio({"artist": ["Audioslave"], "title": ["NA"]}) # bogus title + monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) + changed = mf.repair_file(f"X/youtube/Brother Stoon [{YT_ID}].opus", "youtube", dry_run=False) + assert "title=Cochise" in changed + assert audio["title"] == ["Cochise"] + + +def test_repair_file_overwrites_bogus_artist(monkeypatch): + monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", + lambda url, *a: {"artist": "Real Artist", "title": "Real Title", + "album": "Alb", "release_year": 2020}) + audio = _FakeAudio({"artist": ["NA"], "title": ["Good Title"]}) # bogus artist, good title + monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) + changed = mf.repair_file(f"X/youtube/Good Title [{YT_ID}].opus", "youtube", dry_run=False) + assert "artist=Real Artist" in changed + assert audio["artist"] == ["Real Artist"] + assert audio["title"] == ["Good Title"] # good title untouched + + +def test_repair_file_normalizes_na_album_when_source_has_none(monkeypatch): + # Music video: no source album/year, but album tag is the literal 'NA' -> Unknown Album. + monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", + lambda url, *a: {"title": "Some Live Thing", "uploader": "Chan"}) + audio = _FakeAudio({"artist": ["X"], "title": ["Y"], "album": ["NA"]}) + monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) + changed = mf.repair_file(f"X/youtube/Y [{YT_ID}].opus", "youtube", dry_run=False) + assert "album=Unknown Album" in changed + assert audio["album"] == ["Unknown Album"] + + +def test_repair_file_renames_bogus_filename(tmp_path, monkeypatch): + d = tmp_path / "Audioslave" / "youtube" + d.mkdir(parents=True) + f = d / f"NA [{YT_ID}].opus" + f.write_text("x") + monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", + lambda url, *a: {"artist": "Audioslave", "title": "Cochise", + "album": "Audioslave", "release_year": 2002}) + audio = _FakeAudio({"artist": ["Audioslave"], "title": ["NA"]}) + monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None)) + changed = mf.repair_file(str(f), "youtube", dry_run=False) + assert (d / f"Cochise [{YT_ID}].opus").exists() + assert not f.exists() + assert any("rename" in c.lower() or c.startswith("title=") for c in changed) + + +def test_repair_file_dry_run_does_not_rename(tmp_path, monkeypatch): + d = tmp_path / "Audioslave" / "youtube" + d.mkdir(parents=True) + f = d / f"NA [{YT_ID}].opus" + f.write_text("x") + monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", + lambda url, *a: {"artist": "Audioslave", "title": "Cochise", + "album": "Audioslave", "release_year": 2002}) + monkeypatch.setattr(mf, "_open_audio", lambda path: (_FakeAudio({"title": ["NA"]}), None)) + mf.repair_file(str(f), "youtube", dry_run=True) + assert f.exists() # untouched in dry-run + assert not (d / f"Cochise [{YT_ID}].opus").exists() + + +def test_fs_safe_replaces_slash(): + assert "/" not in mf._fs_safe("AC/DC Live")