feat(lidarr): MusicBrainz track-to-album resolver

Add musicbrainz_best_album() that resolves an artist+track pair to its
best studio album via the MusicBrainz search API, with a 1 req/sec
courtesy rate-limiter. Prefers plain studio albums over compilations,
singles, and live releases; falls back to any release group when no
studio album is found. Never raises — returns None on any failure.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-08 23:17:43 -07:00
parent b0e3ce6e6c
commit babbd84fda
2 changed files with 143 additions and 0 deletions

View File

@@ -12,6 +12,7 @@ import os
import re
import subprocess
import sys
import time
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass, field
from typing import Optional
@@ -126,6 +127,69 @@ def _artist_to_hit(artist: dict) -> Hit:
)
MUSICBRAINZ_URL = "https://musicbrainz.org/ws/2"
MB_HEADERS = {"User-Agent": "musicfetch/2.0 (https://github.com/; personal music fetcher)"}
_mb_last_call = 0.0
def _mb_rate_limit():
"""Courtesy ~1 req/sec to MusicBrainz."""
global _mb_last_call
elapsed = time.time() - _mb_last_call
if elapsed < 1.0:
time.sleep(1.0 - elapsed)
_mb_last_call = time.time()
def _mb_artist_credit(credit) -> str:
"""First credited artist name only (ignore featured/secondary)."""
if credit and isinstance(credit, list) and isinstance(credit[0], dict):
return credit[0].get("name") or (credit[0].get("artist") or {}).get("name", "")
return ""
def musicbrainz_best_album(artist: str, track: str, timeout: int = 8) -> Optional[dict]:
"""Resolve 'artist - track' to its best studio album via MusicBrainz.
Returns {album_title, artist, year, rg_mbid} or None. Never raises."""
query = f'artist:"{artist}" AND recording:"{track}"'
try:
_mb_rate_limit()
resp = requests.get(
f"{MUSICBRAINZ_URL}/recording",
params={"query": query, "fmt": "json", "limit": 10},
headers=MB_HEADERS, timeout=timeout,
)
resp.raise_for_status()
data = resp.json()
except Exception as e: # noqa: BLE001 — degrade to fallback on any failure
dbg(f"MusicBrainz lookup failed: {e}")
return None
# candidate = (is_studio, date_sortkey, title, artist, year, mbid)
candidates = []
for rec in data.get("recordings", []):
rec_artist = _mb_artist_credit(rec.get("artist-credit"))
for rel in rec.get("releases", []):
rg = rel.get("release-group") or {}
title = rg.get("title") or rel.get("title") or ""
if not title:
continue
mbid = rg.get("id") or ""
primary = rg.get("primary-type") or ""
secondary = rg.get("secondary-types") or []
date = rel.get("date") or rg.get("first-release-date") or ""
is_studio = primary == "Album" and not secondary
candidates.append((is_studio, date or "9999", title, rec_artist, date[:4], mbid))
if not candidates:
return None
pool = [c for c in candidates if c[0]] or candidates
pool.sort(key=lambda c: c[1]) # earliest date first
_, _, title, art, year, mbid = pool[0]
dbg(f"MusicBrainz resolved '{artist} - {track}' -> '{title}' ({year}) mbid={mbid}")
return {"album_title": title, "artist": art or artist, "year": year, "rg_mbid": mbid}
def _split_query(query: str) -> tuple[str, Optional[str]]:
"""Split a Shazam-style 'Artist - Track' on the first ' - '.
Returns (artist, track) or (term, None) when there is no separator."""

79
tests/test_musicbrainz.py Normal file
View File

@@ -0,0 +1,79 @@
import server.mf # noqa: F401
import musicfetch_core as mf
class _FakeResp:
def __init__(self, payload):
self._payload = payload
def raise_for_status(self):
pass
def json(self):
return self._payload
MB_PAYLOAD = {
"recordings": [
{
"artist-credit": [{"name": "Daft Punk"}],
"releases": [
{"date": "2001",
"release-group": {"id": "single-mbid", "title": "Harder, Better, Faster, Stronger",
"primary-type": "Single", "secondary-types": []}},
{"date": "2002",
"release-group": {"id": "comp-mbid", "title": "Musique, Vol. 1",
"primary-type": "Album", "secondary-types": ["Compilation"]}},
{"date": "2001",
"release-group": {"id": "48117b90-a16e-34ca-a514-19c702df1158",
"title": "Discovery", "primary-type": "Album",
"secondary-types": []}},
],
}
]
}
def test_picks_studio_album_over_single_and_comp(monkeypatch):
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(MB_PAYLOAD))
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
out = mf.musicbrainz_best_album("Daft Punk", "Harder Better Faster Stronger")
assert out["album_title"] == "Discovery"
assert out["artist"] == "Daft Punk"
assert out["year"] == "2001"
assert out["rg_mbid"] == "48117b90-a16e-34ca-a514-19c702df1158"
def test_returns_none_on_empty(monkeypatch):
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp({"recordings": []}))
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
assert mf.musicbrainz_best_album("Nobody", "Nothing") is None
def test_returns_none_on_exception(monkeypatch):
def boom(*a, **k):
raise mf.requests.exceptions.RequestException("network down")
monkeypatch.setattr(mf.requests, "get", boom)
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
assert mf.musicbrainz_best_album("Daft Punk", "Discovery") is None
def test_falls_back_to_any_releasegroup_when_no_studio(monkeypatch):
payload = {"recordings": [{"artist-credit": [{"name": "X"}], "releases": [
{"date": "2010", "release-group": {"id": "live1", "title": "Live Thing",
"primary-type": "Album", "secondary-types": ["Live"]}},
]}]}
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(payload))
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
out = mf.musicbrainz_best_album("X", "Y")
assert out["album_title"] == "Live Thing"
def test_first_artist_credit_only(monkeypatch):
payload = {"recordings": [{"artist-credit": [{"name": "SLVMLORD"}, {"name": "Travis Bradley"}],
"releases": [{"date": "2025",
"release-group": {"id": "x", "title": "Album X",
"primary-type": "Album",
"secondary-types": []}}]}]}
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(payload))
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
out = mf.musicbrainz_best_album("SLVMLORD", "Under My Skin")
assert out["artist"] == "SLVMLORD"