feat(lidarr): MusicBrainz track-to-album resolver
Add musicbrainz_best_album() that resolves an artist+track pair to its best studio album via the MusicBrainz search API, with a 1 req/sec courtesy rate-limiter. Prefers plain studio albums over compilations, singles, and live releases; falls back to any release group when no studio album is found. Never raises — returns None on any failure. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
64
musicfetch
64
musicfetch
@@ -12,6 +12,7 @@ import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
@@ -126,6 +127,69 @@ def _artist_to_hit(artist: dict) -> Hit:
|
||||
)
|
||||
|
||||
|
||||
MUSICBRAINZ_URL = "https://musicbrainz.org/ws/2"
|
||||
MB_HEADERS = {"User-Agent": "musicfetch/2.0 (https://github.com/; personal music fetcher)"}
|
||||
_mb_last_call = 0.0
|
||||
|
||||
|
||||
def _mb_rate_limit():
|
||||
"""Courtesy ~1 req/sec to MusicBrainz."""
|
||||
global _mb_last_call
|
||||
elapsed = time.time() - _mb_last_call
|
||||
if elapsed < 1.0:
|
||||
time.sleep(1.0 - elapsed)
|
||||
_mb_last_call = time.time()
|
||||
|
||||
|
||||
def _mb_artist_credit(credit) -> str:
|
||||
"""First credited artist name only (ignore featured/secondary)."""
|
||||
if credit and isinstance(credit, list) and isinstance(credit[0], dict):
|
||||
return credit[0].get("name") or (credit[0].get("artist") or {}).get("name", "")
|
||||
return ""
|
||||
|
||||
|
||||
def musicbrainz_best_album(artist: str, track: str, timeout: int = 8) -> Optional[dict]:
|
||||
"""Resolve 'artist - track' to its best studio album via MusicBrainz.
|
||||
Returns {album_title, artist, year, rg_mbid} or None. Never raises."""
|
||||
query = f'artist:"{artist}" AND recording:"{track}"'
|
||||
try:
|
||||
_mb_rate_limit()
|
||||
resp = requests.get(
|
||||
f"{MUSICBRAINZ_URL}/recording",
|
||||
params={"query": query, "fmt": "json", "limit": 10},
|
||||
headers=MB_HEADERS, timeout=timeout,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception as e: # noqa: BLE001 — degrade to fallback on any failure
|
||||
dbg(f"MusicBrainz lookup failed: {e}")
|
||||
return None
|
||||
|
||||
# candidate = (is_studio, date_sortkey, title, artist, year, mbid)
|
||||
candidates = []
|
||||
for rec in data.get("recordings", []):
|
||||
rec_artist = _mb_artist_credit(rec.get("artist-credit"))
|
||||
for rel in rec.get("releases", []):
|
||||
rg = rel.get("release-group") or {}
|
||||
title = rg.get("title") or rel.get("title") or ""
|
||||
if not title:
|
||||
continue
|
||||
mbid = rg.get("id") or ""
|
||||
primary = rg.get("primary-type") or ""
|
||||
secondary = rg.get("secondary-types") or []
|
||||
date = rel.get("date") or rg.get("first-release-date") or ""
|
||||
is_studio = primary == "Album" and not secondary
|
||||
candidates.append((is_studio, date or "9999", title, rec_artist, date[:4], mbid))
|
||||
|
||||
if not candidates:
|
||||
return None
|
||||
pool = [c for c in candidates if c[0]] or candidates
|
||||
pool.sort(key=lambda c: c[1]) # earliest date first
|
||||
_, _, title, art, year, mbid = pool[0]
|
||||
dbg(f"MusicBrainz resolved '{artist} - {track}' -> '{title}' ({year}) mbid={mbid}")
|
||||
return {"album_title": title, "artist": art or artist, "year": year, "rg_mbid": mbid}
|
||||
|
||||
|
||||
def _split_query(query: str) -> tuple[str, Optional[str]]:
|
||||
"""Split a Shazam-style 'Artist - Track' on the first ' - '.
|
||||
Returns (artist, track) or (term, None) when there is no separator."""
|
||||
|
||||
79
tests/test_musicbrainz.py
Normal file
79
tests/test_musicbrainz.py
Normal file
@@ -0,0 +1,79 @@
|
||||
import server.mf # noqa: F401
|
||||
import musicfetch_core as mf
|
||||
|
||||
|
||||
class _FakeResp:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
def raise_for_status(self):
|
||||
pass
|
||||
def json(self):
|
||||
return self._payload
|
||||
|
||||
|
||||
MB_PAYLOAD = {
|
||||
"recordings": [
|
||||
{
|
||||
"artist-credit": [{"name": "Daft Punk"}],
|
||||
"releases": [
|
||||
{"date": "2001",
|
||||
"release-group": {"id": "single-mbid", "title": "Harder, Better, Faster, Stronger",
|
||||
"primary-type": "Single", "secondary-types": []}},
|
||||
{"date": "2002",
|
||||
"release-group": {"id": "comp-mbid", "title": "Musique, Vol. 1",
|
||||
"primary-type": "Album", "secondary-types": ["Compilation"]}},
|
||||
{"date": "2001",
|
||||
"release-group": {"id": "48117b90-a16e-34ca-a514-19c702df1158",
|
||||
"title": "Discovery", "primary-type": "Album",
|
||||
"secondary-types": []}},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def test_picks_studio_album_over_single_and_comp(monkeypatch):
|
||||
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(MB_PAYLOAD))
|
||||
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
|
||||
out = mf.musicbrainz_best_album("Daft Punk", "Harder Better Faster Stronger")
|
||||
assert out["album_title"] == "Discovery"
|
||||
assert out["artist"] == "Daft Punk"
|
||||
assert out["year"] == "2001"
|
||||
assert out["rg_mbid"] == "48117b90-a16e-34ca-a514-19c702df1158"
|
||||
|
||||
|
||||
def test_returns_none_on_empty(monkeypatch):
|
||||
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp({"recordings": []}))
|
||||
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
|
||||
assert mf.musicbrainz_best_album("Nobody", "Nothing") is None
|
||||
|
||||
|
||||
def test_returns_none_on_exception(monkeypatch):
|
||||
def boom(*a, **k):
|
||||
raise mf.requests.exceptions.RequestException("network down")
|
||||
monkeypatch.setattr(mf.requests, "get", boom)
|
||||
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
|
||||
assert mf.musicbrainz_best_album("Daft Punk", "Discovery") is None
|
||||
|
||||
|
||||
def test_falls_back_to_any_releasegroup_when_no_studio(monkeypatch):
|
||||
payload = {"recordings": [{"artist-credit": [{"name": "X"}], "releases": [
|
||||
{"date": "2010", "release-group": {"id": "live1", "title": "Live Thing",
|
||||
"primary-type": "Album", "secondary-types": ["Live"]}},
|
||||
]}]}
|
||||
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(payload))
|
||||
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
|
||||
out = mf.musicbrainz_best_album("X", "Y")
|
||||
assert out["album_title"] == "Live Thing"
|
||||
|
||||
|
||||
def test_first_artist_credit_only(monkeypatch):
|
||||
payload = {"recordings": [{"artist-credit": [{"name": "SLVMLORD"}, {"name": "Travis Bradley"}],
|
||||
"releases": [{"date": "2025",
|
||||
"release-group": {"id": "x", "title": "Album X",
|
||||
"primary-type": "Album",
|
||||
"secondary-types": []}}]}]}
|
||||
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(payload))
|
||||
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
|
||||
out = mf.musicbrainz_best_album("SLVMLORD", "Under My Skin")
|
||||
assert out["artist"] == "SLVMLORD"
|
||||
Reference in New Issue
Block a user