feat(lidarr): MusicBrainz track-to-album resolver
Add musicbrainz_best_album() that resolves an artist+track pair to its best studio album via the MusicBrainz search API, with a 1 req/sec courtesy rate-limiter. Prefers plain studio albums over compilations, singles, and live releases; falls back to any release group when no studio album is found. Never raises — returns None on any failure. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
64
musicfetch
64
musicfetch
@@ -12,6 +12,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
@@ -126,6 +127,69 @@ def _artist_to_hit(artist: dict) -> Hit:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
MUSICBRAINZ_URL = "https://musicbrainz.org/ws/2"
|
||||||
|
MB_HEADERS = {"User-Agent": "musicfetch/2.0 (https://github.com/; personal music fetcher)"}
|
||||||
|
_mb_last_call = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _mb_rate_limit():
|
||||||
|
"""Courtesy ~1 req/sec to MusicBrainz."""
|
||||||
|
global _mb_last_call
|
||||||
|
elapsed = time.time() - _mb_last_call
|
||||||
|
if elapsed < 1.0:
|
||||||
|
time.sleep(1.0 - elapsed)
|
||||||
|
_mb_last_call = time.time()
|
||||||
|
|
||||||
|
|
||||||
|
def _mb_artist_credit(credit) -> str:
|
||||||
|
"""First credited artist name only (ignore featured/secondary)."""
|
||||||
|
if credit and isinstance(credit, list) and isinstance(credit[0], dict):
|
||||||
|
return credit[0].get("name") or (credit[0].get("artist") or {}).get("name", "")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def musicbrainz_best_album(artist: str, track: str, timeout: int = 8) -> Optional[dict]:
|
||||||
|
"""Resolve 'artist - track' to its best studio album via MusicBrainz.
|
||||||
|
Returns {album_title, artist, year, rg_mbid} or None. Never raises."""
|
||||||
|
query = f'artist:"{artist}" AND recording:"{track}"'
|
||||||
|
try:
|
||||||
|
_mb_rate_limit()
|
||||||
|
resp = requests.get(
|
||||||
|
f"{MUSICBRAINZ_URL}/recording",
|
||||||
|
params={"query": query, "fmt": "json", "limit": 10},
|
||||||
|
headers=MB_HEADERS, timeout=timeout,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
except Exception as e: # noqa: BLE001 — degrade to fallback on any failure
|
||||||
|
dbg(f"MusicBrainz lookup failed: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# candidate = (is_studio, date_sortkey, title, artist, year, mbid)
|
||||||
|
candidates = []
|
||||||
|
for rec in data.get("recordings", []):
|
||||||
|
rec_artist = _mb_artist_credit(rec.get("artist-credit"))
|
||||||
|
for rel in rec.get("releases", []):
|
||||||
|
rg = rel.get("release-group") or {}
|
||||||
|
title = rg.get("title") or rel.get("title") or ""
|
||||||
|
if not title:
|
||||||
|
continue
|
||||||
|
mbid = rg.get("id") or ""
|
||||||
|
primary = rg.get("primary-type") or ""
|
||||||
|
secondary = rg.get("secondary-types") or []
|
||||||
|
date = rel.get("date") or rg.get("first-release-date") or ""
|
||||||
|
is_studio = primary == "Album" and not secondary
|
||||||
|
candidates.append((is_studio, date or "9999", title, rec_artist, date[:4], mbid))
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
return None
|
||||||
|
pool = [c for c in candidates if c[0]] or candidates
|
||||||
|
pool.sort(key=lambda c: c[1]) # earliest date first
|
||||||
|
_, _, title, art, year, mbid = pool[0]
|
||||||
|
dbg(f"MusicBrainz resolved '{artist} - {track}' -> '{title}' ({year}) mbid={mbid}")
|
||||||
|
return {"album_title": title, "artist": art or artist, "year": year, "rg_mbid": mbid}
|
||||||
|
|
||||||
|
|
||||||
def _split_query(query: str) -> tuple[str, Optional[str]]:
|
def _split_query(query: str) -> tuple[str, Optional[str]]:
|
||||||
"""Split a Shazam-style 'Artist - Track' on the first ' - '.
|
"""Split a Shazam-style 'Artist - Track' on the first ' - '.
|
||||||
Returns (artist, track) or (term, None) when there is no separator."""
|
Returns (artist, track) or (term, None) when there is no separator."""
|
||||||
|
|||||||
79
tests/test_musicbrainz.py
Normal file
79
tests/test_musicbrainz.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
import server.mf # noqa: F401
|
||||||
|
import musicfetch_core as mf
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeResp:
|
||||||
|
def __init__(self, payload):
|
||||||
|
self._payload = payload
|
||||||
|
def raise_for_status(self):
|
||||||
|
pass
|
||||||
|
def json(self):
|
||||||
|
return self._payload
|
||||||
|
|
||||||
|
|
||||||
|
MB_PAYLOAD = {
|
||||||
|
"recordings": [
|
||||||
|
{
|
||||||
|
"artist-credit": [{"name": "Daft Punk"}],
|
||||||
|
"releases": [
|
||||||
|
{"date": "2001",
|
||||||
|
"release-group": {"id": "single-mbid", "title": "Harder, Better, Faster, Stronger",
|
||||||
|
"primary-type": "Single", "secondary-types": []}},
|
||||||
|
{"date": "2002",
|
||||||
|
"release-group": {"id": "comp-mbid", "title": "Musique, Vol. 1",
|
||||||
|
"primary-type": "Album", "secondary-types": ["Compilation"]}},
|
||||||
|
{"date": "2001",
|
||||||
|
"release-group": {"id": "48117b90-a16e-34ca-a514-19c702df1158",
|
||||||
|
"title": "Discovery", "primary-type": "Album",
|
||||||
|
"secondary-types": []}},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_picks_studio_album_over_single_and_comp(monkeypatch):
|
||||||
|
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(MB_PAYLOAD))
|
||||||
|
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
|
||||||
|
out = mf.musicbrainz_best_album("Daft Punk", "Harder Better Faster Stronger")
|
||||||
|
assert out["album_title"] == "Discovery"
|
||||||
|
assert out["artist"] == "Daft Punk"
|
||||||
|
assert out["year"] == "2001"
|
||||||
|
assert out["rg_mbid"] == "48117b90-a16e-34ca-a514-19c702df1158"
|
||||||
|
|
||||||
|
|
||||||
|
def test_returns_none_on_empty(monkeypatch):
|
||||||
|
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp({"recordings": []}))
|
||||||
|
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
|
||||||
|
assert mf.musicbrainz_best_album("Nobody", "Nothing") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_returns_none_on_exception(monkeypatch):
|
||||||
|
def boom(*a, **k):
|
||||||
|
raise mf.requests.exceptions.RequestException("network down")
|
||||||
|
monkeypatch.setattr(mf.requests, "get", boom)
|
||||||
|
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
|
||||||
|
assert mf.musicbrainz_best_album("Daft Punk", "Discovery") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_falls_back_to_any_releasegroup_when_no_studio(monkeypatch):
|
||||||
|
payload = {"recordings": [{"artist-credit": [{"name": "X"}], "releases": [
|
||||||
|
{"date": "2010", "release-group": {"id": "live1", "title": "Live Thing",
|
||||||
|
"primary-type": "Album", "secondary-types": ["Live"]}},
|
||||||
|
]}]}
|
||||||
|
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(payload))
|
||||||
|
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
|
||||||
|
out = mf.musicbrainz_best_album("X", "Y")
|
||||||
|
assert out["album_title"] == "Live Thing"
|
||||||
|
|
||||||
|
|
||||||
|
def test_first_artist_credit_only(monkeypatch):
|
||||||
|
payload = {"recordings": [{"artist-credit": [{"name": "SLVMLORD"}, {"name": "Travis Bradley"}],
|
||||||
|
"releases": [{"date": "2025",
|
||||||
|
"release-group": {"id": "x", "title": "Album X",
|
||||||
|
"primary-type": "Album",
|
||||||
|
"secondary-types": []}}]}]}
|
||||||
|
monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(payload))
|
||||||
|
monkeypatch.setattr(mf.time, "sleep", lambda *_: None)
|
||||||
|
out = mf.musicbrainz_best_album("SLVMLORD", "Under My Skin")
|
||||||
|
assert out["artist"] == "SLVMLORD"
|
||||||
Reference in New Issue
Block a user