feat(lidarr): MusicBrainz track-to-album resolver

Add musicbrainz_best_album() that resolves an artist+track pair to its
best studio album via the MusicBrainz search API, with a 1 req/sec
courtesy rate-limiter. Prefers plain studio albums over compilations,
singles, and live releases; falls back to any release group when no
studio album is found. Never raises — returns None on any failure.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-08 23:17:43 -07:00
parent b0e3ce6e6c
commit babbd84fda
2 changed files with 143 additions and 0 deletions

View File

@@ -12,6 +12,7 @@ import os
import re import re
import subprocess import subprocess
import sys import sys
import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Optional from typing import Optional
@@ -126,6 +127,69 @@ def _artist_to_hit(artist: dict) -> Hit:
) )
MUSICBRAINZ_URL = "https://musicbrainz.org/ws/2"
MB_HEADERS = {"User-Agent": "musicfetch/2.0 (https://github.com/; personal music fetcher)"}
_mb_last_call = 0.0
def _mb_rate_limit():
"""Courtesy ~1 req/sec to MusicBrainz."""
global _mb_last_call
elapsed = time.time() - _mb_last_call
if elapsed < 1.0:
time.sleep(1.0 - elapsed)
_mb_last_call = time.time()
def _mb_artist_credit(credit) -> str:
"""First credited artist name only (ignore featured/secondary)."""
if credit and isinstance(credit, list) and isinstance(credit[0], dict):
return credit[0].get("name") or (credit[0].get("artist") or {}).get("name", "")
return ""
def musicbrainz_best_album(artist: str, track: str, timeout: int = 8) -> Optional[dict]:
"""Resolve 'artist - track' to its best studio album via MusicBrainz.
Returns {album_title, artist, year, rg_mbid} or None. Never raises."""
query = f'artist:"{artist}" AND recording:"{track}"'
try:
_mb_rate_limit()
resp = requests.get(
f"{MUSICBRAINZ_URL}/recording",
params={"query": query, "fmt": "json", "limit": 10},
headers=MB_HEADERS, timeout=timeout,
)
resp.raise_for_status()
data = resp.json()
except Exception as e: # noqa: BLE001 — degrade to fallback on any failure
dbg(f"MusicBrainz lookup failed: {e}")
return None
# candidate = (is_studio, date_sortkey, title, artist, year, mbid)
candidates = []
for rec in data.get("recordings", []):
rec_artist = _mb_artist_credit(rec.get("artist-credit"))
for rel in rec.get("releases", []):
rg = rel.get("release-group") or {}
title = rg.get("title") or rel.get("title") or ""
if not title:
continue
mbid = rg.get("id") or ""
primary = rg.get("primary-type") or ""
secondary = rg.get("secondary-types") or []
date = rel.get("date") or rg.get("first-release-date") or ""
is_studio = primary == "Album" and not secondary
candidates.append((is_studio, date or "9999", title, rec_artist, date[:4], mbid))
if not candidates:
return None
pool = [c for c in candidates if c[0]] or candidates
pool.sort(key=lambda c: c[1]) # earliest date first
_, _, title, art, year, mbid = pool[0]
dbg(f"MusicBrainz resolved '{artist} - {track}' -> '{title}' ({year}) mbid={mbid}")
return {"album_title": title, "artist": art or artist, "year": year, "rg_mbid": mbid}
def _split_query(query: str) -> tuple[str, Optional[str]]: def _split_query(query: str) -> tuple[str, Optional[str]]:
"""Split a Shazam-style 'Artist - Track' on the first ' - '. """Split a Shazam-style 'Artist - Track' on the first ' - '.
Returns (artist, track) or (term, None) when there is no separator.""" Returns (artist, track) or (term, None) when there is no separator."""

79
tests/test_musicbrainz.py Normal file
View File

@@ -0,0 +1,79 @@
import server.mf # noqa: F401
import musicfetch_core as mf
class _FakeResp:
def __init__(self, payload):
self._payload = payload
def raise_for_status(self):
pass
def json(self):
return self._payload
MB_PAYLOAD = {
"recordings": [
{
"artist-credit": [{"name": "Daft Punk"}],
"releases": [
{"date": "2001",
"release-group": {"id": "single-mbid", "title": "Harder, Better, Faster, Stronger",
"primary-type": "Single", "secondary-types": []}},
{"date": "2002",
"release-group": {"id": "comp-mbid", "title": "Musique, Vol. 1",
"primary-type": "Album", "secondary-types": ["Compilation"]}},
{"date": "2001",
"release-group": {"id": "48117b90-a16e-34ca-a514-19c702df1158",
"title": "Discovery", "primary-type": "Album",
"secondary-types": []}},
],
}
]
}
def test_picks_studio_album_over_single_and_comp(monkeypatch):
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(MB_PAYLOAD))
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
out = mf.musicbrainz_best_album("Daft Punk", "Harder Better Faster Stronger")
assert out["album_title"] == "Discovery"
assert out["artist"] == "Daft Punk"
assert out["year"] == "2001"
assert out["rg_mbid"] == "48117b90-a16e-34ca-a514-19c702df1158"
def test_returns_none_on_empty(monkeypatch):
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp({"recordings": []}))
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
assert mf.musicbrainz_best_album("Nobody", "Nothing") is None
def test_returns_none_on_exception(monkeypatch):
def boom(*a, **k):
raise mf.requests.exceptions.RequestException("network down")
monkeypatch.setattr(mf.requests, "get", boom)
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
assert mf.musicbrainz_best_album("Daft Punk", "Discovery") is None
def test_falls_back_to_any_releasegroup_when_no_studio(monkeypatch):
payload = {"recordings": [{"artist-credit": [{"name": "X"}], "releases": [
{"date": "2010", "release-group": {"id": "live1", "title": "Live Thing",
"primary-type": "Album", "secondary-types": ["Live"]}},
]}]}
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(payload))
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
out = mf.musicbrainz_best_album("X", "Y")
assert out["album_title"] == "Live Thing"
def test_first_artist_credit_only(monkeypatch):
payload = {"recordings": [{"artist-credit": [{"name": "SLVMLORD"}, {"name": "Travis Bradley"}],
"releases": [{"date": "2025",
"release-group": {"id": "x", "title": "Album X",
"primary-type": "Album",
"secondary-types": []}}]}]}
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(payload))
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
out = mf.musicbrainz_best_album("SLVMLORD", "Under My Skin")
assert out["artist"] == "SLVMLORD"