fix: reliable YouTube tagging, loud Lidarr failures, deno runtime, repair recovery

Root cause of bad album/title tags: yt-dlp's --parse-metadata reads a
single-word FROM (matching field_to_template's ^[a-zA-Z_]+$) as a *field
name*, so literal one-word titles/albums like "Cochise" became "NA". Inject
literals via seed-then-replace into meta_<tag> instead (--parse-metadata to
create the field, --replace-in-metadata with literal args to set it), which
is immune to template parsing and also creates tags the source lacks.

- yt_download: literal-safe meta_artist/title/album; hit album no longer
  clobbered by the Unknown-Album default; artist tag now created when missing.
- lidarr_search: connection/timeout errors surface via err() ("Lidarr
  unreachable … falling back to YouTube") instead of silent dbg(), so the
  YouTube fallback isn't mistaken for "no Lidarr match".
- Dockerfile: install deno (arch-aware) — the JS runtime yt-dlp needs for
  YouTube; without it: "No supported JavaScript runtime" / HTTP 403.
- repair: treat NA/Unknown placeholders as bogus and overwrite title/artist
  from source (was fill-missing-only); normalise literal "NA" album to
  "Unknown Album"; rename bogus "NA [<id>]" filenames to the recovered title.
- README updated; .gitignore excludes server/log.txt.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-13 11:09:27 -07:00
parent 33ca743a34
commit 0347a638cf
7 changed files with 254 additions and 37 deletions

2
.gitignore vendored
View File

@@ -1,2 +1,4 @@
__pycache__/
*.pyc
server/log.txt

View File

@@ -131,9 +131,11 @@ file using the `[id]` in its filename, and fixes tags. Useful when downloads lan
missing album or wrong year.
It is deliberately **conservative**: it overwrites **album** and **year** (the usual
breakage), but only *fills in* a missing **artist**/**title** — it never overwrites existing
artist/title with channel names or decorated video titles. Files whose source returns no real
music metadata (no album and no release year — e.g. plain music videos) are left untouched.
breakage), and fills in **artist**/**title** when they are missing *or* a known-bogus
placeholder (`NA`, `Unknown Album`, `Unknown Artist` — left behind by older buggy tagging) —
but it never overwrites a genuine existing artist/title with a channel name or decorated video
title. A bogus `NA [<id>].<ext>` filename is renamed to the recovered title, and a literal
`NA` album with no source album is normalised to `Unknown Album`.
It re-queries the source over the network, so run it occasionally, not constantly. Requires
`mutagen` (a yt-dlp dependency, usually already present). CLI-only — not exposed via the REST API.

View File

@@ -18,7 +18,8 @@ from dataclasses import dataclass, field
from typing import Optional
import requests
from requests.exceptions import RequestException
from requests.exceptions import ConnectionError as ReqConnectionError
from requests.exceptions import RequestException, Timeout
from urllib.parse import urlparse, parse_qs
# Optional deps — degrade gracefully if missing.
@@ -236,11 +237,22 @@ def lidarr_search(query: str, limit: int) -> list[Hit]:
return _fallback_lookup(query, limit, artist_first=True)
def _log_lidarr_failure(label: str, e: Exception) -> None:
"""A connection/timeout error means Lidarr is unreachable — the silent
YouTube fallback that follows is easy to mistake for "Lidarr had no match",
so surface it loudly. Ordinary HTTP errors stay debug-only."""
if isinstance(e, (ReqConnectionError, Timeout)):
err(f"Lidarr unreachable ({label} at {LIDARR_URL}): {e}. "
f"Falling back to YouTube.")
else:
dbg(f"{label} failed: {e}")
def _lidarr_album_candidates(term: str) -> list[Hit]:
try:
return [_album_to_hit(a) for a in lidarr_get("/api/v1/album/lookup", params={"term": term})]
except RequestException as e:
dbg(f"album/lookup failed: {e}")
_log_lidarr_failure("album/lookup", e)
return []
@@ -248,7 +260,7 @@ def _lidarr_artist_candidates(term: str) -> list[Hit]:
try:
return [_artist_to_hit(a) for a in lidarr_get("/api/v1/artist/lookup", params={"term": term})]
except RequestException as e:
dbg(f"artist/lookup failed: {e}")
_log_lidarr_failure("artist/lookup", e)
return []
@@ -605,22 +617,34 @@ def yt_download(url_or_query: str, target_folder: Optional[str], quality: str, d
cmd += ["-o", outtmpl]
else:
cmd += ["-P", target_folder]
# Override tags from the chosen hit so they don't rely on scraped titles.
# Override embedded tags from the chosen hit. Inject literals via a
# seed-then-replace pair: --parse-metadata first copies an always-present
# field into meta_<tag> (so the tag exists even when the source lacks it,
# e.g. YouTube videos with no album), then --replace-in-metadata overwrites
# it with the literal value. This dodges yt-dlp's output-template trap where
# a bare-word FROM (e.g. "Cochise") matches field_to_template's r'[a-zA-Z_]+$'
# and is read as a *field name* -> "NA". --replace-in-metadata args are
# literal, so single-word values and parens survive intact.
def _force_tag(field: str, value: str) -> list[str]:
repl = value.replace("\\", r"\\") # backslash is special in re.sub repl
return ["--parse-metadata", f"%(title,id)s:%(meta_{field})s",
"--replace-in-metadata", f"meta_{field}", "^.*$", repl]
if hit:
if hit.artist:
# First artist only; anchored ^.*$ replaces the whole field exactly once
# (a bare .* matches twice and doubles the value).
primary_artist = hit.artist.split(",")[0].strip()
cmd += ["--replace-in-metadata", "artist", "^.*$", primary_artist]
if hit.album:
cmd += ["--parse-metadata", f"{hit.album}:%(album)s"]
primary_artist = hit.artist.split(",")[0].strip() if hit.artist else ""
if primary_artist:
cmd += _force_tag("artist", primary_artist)
if hit.title:
cmd += ["--parse-metadata", f"{hit.title}:%(title)s"]
cmd += _force_tag("title", hit.title)
if hit.album:
cmd += _force_tag("album", hit.album)
if hit.year:
cmd += ["--parse-metadata", f"{hit.year}:%(release_year)s"]
# Always embed an album: the resolved/native album if present, else a
# placeholder so players (e.g. Plexamp) don't choke on a blank album.
cmd += ["--parse-metadata", "%(album|Unknown Album)s:%(meta_album)s"]
# When the hit carried no album, still embed one: the resolved/native album
# if present, else a placeholder so players (e.g. Plexamp) don't choke on a
# blank album. (A hit album is already forced above and must not be clobbered.)
if not (hit and hit.album):
cmd += ["--parse-metadata", "%(album|Unknown Album)s:%(meta_album)s"]
cmd.append(url_or_query)
dest = outtmpl or target_folder
@@ -881,12 +905,53 @@ def _read_tag(audio, key_map, field: str) -> str:
return str(val[0]) if isinstance(val, list) else str(val)
# Placeholder tag values the old tagging bug left behind (yt-dlp's "NA" missing
# marker, and the "Unknown *" fallbacks). Treated as empty so repair overwrites
# them rather than mistaking them for a real, present tag.
_BOGUS_TAGS = {"", "na", "n/a", "unknown", "unknown album", "unknown artist"}
def _is_bogus(value: str) -> bool:
return (value or "").strip().casefold() in _BOGUS_TAGS
def _fs_safe(name: str) -> str:
"""Filesystem-safe filename stem: mirror yt-dlp's default '/'->'' so the
path stays a single segment, and drop NULs."""
return name.replace("/", "").replace("\0", "").strip()
def _maybe_rename_bogus(path: str, title: str, dry_run: bool) -> tuple[str, Optional[str]]:
"""When the filename stem is a placeholder (e.g. 'NA [<id>]'), rename to
'<title> [<id>].<ext>'. Returns (current_path, change_note_or_None)."""
fname = os.path.basename(path)
parsed = _parse_track_file(fname)
if not parsed:
return path, None
stem_title, vid = parsed
if not _is_bogus(stem_title) or _is_bogus(title):
return path, None
ext = fname.rsplit(".", 1)[-1]
new_name = f"{_fs_safe(title)} [{vid}].{ext}"
new_path = os.path.join(os.path.dirname(path), new_name)
if new_path == path or not new_name:
return path, None
note = f"renamed -> {new_name}"
if dry_run:
print(f"[dry-run] would rename {fname} -> {new_name}")
return path, note
os.rename(path, new_path)
print(f"renamed {fname} -> {new_name}")
return new_path, note
def repair_file(path: str, source: str, dry_run: bool) -> list[str]:
"""Re-tag one file from source metadata. Conservative: fixes album/year
(the common breakage) and only fills MISSING artist/title — never clobbers
existing tags with channel names or decorated music-video titles. Files whose
source has no real music metadata (no album/year, e.g. music videos) are left
untouched. Returns the list of changed fields."""
"""Re-tag one file from source metadata. album/year are authoritative
(overwrite); artist/title are filled when MISSING *or* a known-bogus
placeholder ('NA', 'Unknown …') — the old tagging bug wrote those — but a
genuine existing tag is never clobbered with a channel name or decorated
music-video title. A bogus 'NA [<id>]' filename is renamed to the recovered
title. Returns the list of changed fields."""
parsed = _parse_track_file(os.path.basename(path))
if not parsed:
dbg(f"skip (no id): {path}")
@@ -904,12 +969,6 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]:
dbg(f"skip (no metadata): {path}")
return []
album = (meta.get("album") or "").strip()
year = _valid_year(meta)
if not album and not year:
dbg(f"skip (no music metadata, likely a video): {path}")
return []
try:
audio, key_map = _open_audio(path)
except Exception as e: # noqa: BLE001
@@ -918,18 +977,28 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]:
if audio is None:
return []
# album/year are authoritative (overwrite); artist/title fill-missing only.
album = (meta.get("album") or "").strip()
year = _valid_year(meta)
cur_artist = _read_tag(audio, key_map, "artist")
cur_title = _read_tag(audio, key_map, "title")
cur_album = _read_tag(audio, key_map, "album")
meta_artist = get_artist_from_metadata(meta)
meta_title = (meta.get("title") or "").strip()
updates = {}
if album:
updates["album"] = album
elif cur_album and _is_bogus(cur_album) and cur_album.strip().casefold() != "unknown album":
# No source album, but the tag is a literal 'NA' — normalise it so no
# file keeps the placeholder (a blank album is left blank, as before).
updates["album"] = "Unknown Album"
if year:
updates["date"] = year
artist = get_artist_from_metadata(meta)
if artist and artist != "Unknown Artist" and not _read_tag(audio, key_map, "artist"):
updates["artist"] = artist
title = meta.get("title", "")
if title and not _read_tag(audio, key_map, "title"):
updates["title"] = title
# Fill artist/title when missing OR bogus; never overwrite a genuine value.
if meta_artist and not _is_bogus(meta_artist) and _is_bogus(cur_artist):
updates["artist"] = meta_artist
if meta_title and not _is_bogus(meta_title) and _is_bogus(cur_title):
updates["title"] = meta_title
changed = []
for field, value in updates.items():
@@ -942,6 +1011,12 @@ def repair_file(path: str, source: str, dry_run: bool) -> list[str]:
if changed:
prefix = "[dry-run] would set" if dry_run else "set"
print(f"{prefix} [{', '.join(changed)}] on {path}")
# Repair a placeholder filename using the final (recovered) title.
final_title = updates.get("title") or cur_title
_, rename_note = _maybe_rename_bogus(path, final_title, dry_run)
if rename_note:
changed.append(rename_note)
return changed

View File

@@ -1,7 +1,20 @@
FROM python:3.12-slim
# ffmpeg for audio extraction/embedding; deno is the JS runtime yt-dlp needs
# for YouTube (without it: "No supported JavaScript runtime" -> missing formats
# / HTTP 403). yt-dlp auto-detects deno on PATH.
RUN apt-get update \
&& apt-get install -y --no-install-recommends ffmpeg \
&& apt-get install -y --no-install-recommends ffmpeg ca-certificates curl unzip \
&& arch="$(uname -m)" \
&& case "$arch" in \
x86_64) deno_arch=x86_64-unknown-linux-gnu ;; \
aarch64) deno_arch=aarch64-unknown-linux-gnu ;; \
*) echo "unsupported arch: $arch" >&2; exit 1 ;; \
esac \
&& curl -fsSL "https://github.com/denoland/deno/releases/latest/download/deno-${deno_arch}.zip" -o /tmp/deno.zip \
&& unzip /tmp/deno.zip -d /usr/local/bin \
&& rm /tmp/deno.zip \
&& apt-get purge -y --auto-remove curl unzip \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app

View File

@@ -81,3 +81,30 @@ def test_last_resort_universal_search(monkeypatch):
monkeypatch.setattr(mf, "lidarr_get", fake_get)
hits = mf.lidarr_search("Daft Punk - Discovery", 10)
assert hits and hits[0].album == "Discovery"
def test_unreachable_lidarr_warns_loudly(monkeypatch, capsys):
# A connection error must surface on stderr (not silent dbg) so the
# YouTube fallback isn't mistaken for "Lidarr had no match".
monkeypatch.setattr(mf, "API_KEY", "testkey")
monkeypatch.setattr(mf, "DEBUG", False)
def boom(path, params=None, timeout=15):
raise mf.ReqConnectionError("Name or service not known")
monkeypatch.setattr(mf, "lidarr_get", boom)
hits = mf._lidarr_album_candidates("anything")
assert hits == []
assert "Lidarr unreachable" in capsys.readouterr().err
def test_http_error_stays_quiet(monkeypatch, capsys):
monkeypatch.setattr(mf, "API_KEY", "testkey")
monkeypatch.setattr(mf, "DEBUG", False)
def boom(path, params=None, timeout=15):
raise mf.RequestException("500 Server Error")
monkeypatch.setattr(mf, "lidarr_get", boom)
assert mf._lidarr_album_candidates("anything") == []
assert "Lidarr unreachable" not in capsys.readouterr().err

View File

@@ -110,3 +110,25 @@ def test_yt_download_always_sets_album_default(monkeypatch):
monkeypatch.setattr(mf.subprocess, "run", lambda cmd, **k: captured.update(cmd=cmd) or _CP(""))
mf.yt_download("u", "/tmp/x", "best", False)
assert "%(album|Unknown Album)s:%(meta_album)s" in captured["cmd"]
def test_yt_download_single_word_tags_injected_literally(monkeypatch):
# Regression: `--parse-metadata "Cochise:%(title)s"` makes yt-dlp treat the
# bare word 'Cochise' as a FIELD name (field_to_template's r'[a-zA-Z_]+$'),
# producing 'NA'. Single-word album/title must reach yt-dlp as literals.
captured = {}
monkeypatch.setattr(mf.os, "makedirs", lambda *a, **k: None)
monkeypatch.setattr(mf.subprocess, "run", lambda cmd, **k: captured.update(cmd=cmd) or _CP(""))
hit = mf.Hit(source="youtube", kind="track", title="Cochise",
artist="Audioslave", album="Solid", payload={"videoId": "x"})
mf.yt_download("u", "/tmp/x", "best", False, hit=hit)
cmd = captured["cmd"]
joined = " ".join(cmd)
# The buggy bare-word parse-metadata FROM must be gone.
assert "Solid:%(album)s" not in joined
assert "Cochise:%(title)s" not in joined
# Literal values must be passed as literal args (immune to template parsing).
assert "Solid" in cmd
assert "Cochise" in cmd
# A hit album must not be clobbered by the Unknown-Album default.
assert "%(album|Unknown Album)s:%(meta_album)s" not in cmd

View File

@@ -260,3 +260,79 @@ def test_retag_library_walks_source_files(tmp_path, monkeypatch):
scanned, changed = mf.retag_library_from_path(str(root), dry_run=False)
assert (scanned, changed) == (1, 1)
assert visited == ["Daft Punk"]
# ---- bogus-tag recovery (old-code NA / Unknown breakage) ----
def test_is_bogus():
for v in ("", "NA", "na", "N/A", "Unknown", "Unknown Album", "unknown artist", " NA "):
assert mf._is_bogus(v) is True, v
for v in ("Cochise", "Solid", "Brother Stoon", "Discovery"):
assert mf._is_bogus(v) is False, v
def test_repair_file_overwrites_bogus_title(monkeypatch):
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url, *a: {"artist": "Audioslave", "title": "Cochise",
"album": "Audioslave", "release_year": 2002})
audio = _FakeAudio({"artist": ["Audioslave"], "title": ["NA"]}) # bogus title
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
changed = mf.repair_file(f"X/youtube/Brother Stoon [{YT_ID}].opus", "youtube", dry_run=False)
assert "title=Cochise" in changed
assert audio["title"] == ["Cochise"]
def test_repair_file_overwrites_bogus_artist(monkeypatch):
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url, *a: {"artist": "Real Artist", "title": "Real Title",
"album": "Alb", "release_year": 2020})
audio = _FakeAudio({"artist": ["NA"], "title": ["Good Title"]}) # bogus artist, good title
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
changed = mf.repair_file(f"X/youtube/Good Title [{YT_ID}].opus", "youtube", dry_run=False)
assert "artist=Real Artist" in changed
assert audio["artist"] == ["Real Artist"]
assert audio["title"] == ["Good Title"] # good title untouched
def test_repair_file_normalizes_na_album_when_source_has_none(monkeypatch):
# Music video: no source album/year, but album tag is the literal 'NA' -> Unknown Album.
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url, *a: {"title": "Some Live Thing", "uploader": "Chan"})
audio = _FakeAudio({"artist": ["X"], "title": ["Y"], "album": ["NA"]})
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
changed = mf.repair_file(f"X/youtube/Y [{YT_ID}].opus", "youtube", dry_run=False)
assert "album=Unknown Album" in changed
assert audio["album"] == ["Unknown Album"]
def test_repair_file_renames_bogus_filename(tmp_path, monkeypatch):
d = tmp_path / "Audioslave" / "youtube"
d.mkdir(parents=True)
f = d / f"NA [{YT_ID}].opus"
f.write_text("x")
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url, *a: {"artist": "Audioslave", "title": "Cochise",
"album": "Audioslave", "release_year": 2002})
audio = _FakeAudio({"artist": ["Audioslave"], "title": ["NA"]})
monkeypatch.setattr(mf, "_open_audio", lambda path: (audio, None))
changed = mf.repair_file(str(f), "youtube", dry_run=False)
assert (d / f"Cochise [{YT_ID}].opus").exists()
assert not f.exists()
assert any("rename" in c.lower() or c.startswith("title=") for c in changed)
def test_repair_file_dry_run_does_not_rename(tmp_path, monkeypatch):
d = tmp_path / "Audioslave" / "youtube"
d.mkdir(parents=True)
f = d / f"NA [{YT_ID}].opus"
f.write_text("x")
monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",
lambda url, *a: {"artist": "Audioslave", "title": "Cochise",
"album": "Audioslave", "release_year": 2002})
monkeypatch.setattr(mf, "_open_audio", lambda path: (_FakeAudio({"title": ["NA"]}), None))
mf.repair_file(str(f), "youtube", dry_run=True)
assert f.exists() # untouched in dry-run
assert not (d / f"Cochise [{YT_ID}].opus").exists()
def test_fs_safe_replaces_slash():
assert "/" not in mf._fs_safe("AC/DC Live")