diff --git a/musicfetch b/musicfetch index c554089..36dc52d 100755 --- a/musicfetch +++ b/musicfetch @@ -12,6 +12,7 @@ import os import re import subprocess import sys +import time from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass, field from typing import Optional @@ -126,6 +127,69 @@ def _artist_to_hit(artist: dict) -> Hit: ) +MUSICBRAINZ_URL = "https://musicbrainz.org/ws/2" +MB_HEADERS = {"User-Agent": "musicfetch/2.0 (https://github.com/; personal music fetcher)"} +_mb_last_call = 0.0 + + +def _mb_rate_limit(): + """Courtesy ~1 req/sec to MusicBrainz.""" + global _mb_last_call + elapsed = time.time() - _mb_last_call + if elapsed < 1.0: + time.sleep(1.0 - elapsed) + _mb_last_call = time.time() + + +def _mb_artist_credit(credit) -> str: + """First credited artist name only (ignore featured/secondary).""" + if credit and isinstance(credit, list) and isinstance(credit[0], dict): + return credit[0].get("name") or (credit[0].get("artist") or {}).get("name", "") + return "" + + +def musicbrainz_best_album(artist: str, track: str, timeout: int = 8) -> Optional[dict]: + """Resolve 'artist - track' to its best studio album via MusicBrainz. + Returns {album_title, artist, year, rg_mbid} or None. Never raises.""" + query = f'artist:"{artist}" AND recording:"{track}"' + try: + _mb_rate_limit() + resp = requests.get( + f"{MUSICBRAINZ_URL}/recording", + params={"query": query, "fmt": "json", "limit": 10}, + headers=MB_HEADERS, timeout=timeout, + ) + resp.raise_for_status() + data = resp.json() + except Exception as e: # noqa: BLE001 — degrade to fallback on any failure + dbg(f"MusicBrainz lookup failed: {e}") + return None + + # candidate = (is_studio, date_sortkey, title, artist, year, mbid) + candidates = [] + for rec in data.get("recordings", []): + rec_artist = _mb_artist_credit(rec.get("artist-credit")) + for rel in rec.get("releases", []): + rg = rel.get("release-group") or {} + title = rg.get("title") or rel.get("title") or "" + if not title: + continue + mbid = rg.get("id") or "" + primary = rg.get("primary-type") or "" + secondary = rg.get("secondary-types") or [] + date = rel.get("date") or rg.get("first-release-date") or "" + is_studio = primary == "Album" and not secondary + candidates.append((is_studio, date or "9999", title, rec_artist, date[:4], mbid)) + + if not candidates: + return None + pool = [c for c in candidates if c[0]] or candidates + pool.sort(key=lambda c: c[1]) # earliest date first + _, _, title, art, year, mbid = pool[0] + dbg(f"MusicBrainz resolved '{artist} - {track}' -> '{title}' ({year}) mbid={mbid}") + return {"album_title": title, "artist": art or artist, "year": year, "rg_mbid": mbid} + + def _split_query(query: str) -> tuple[str, Optional[str]]: """Split a Shazam-style 'Artist - Track' on the first ' - '. Returns (artist, track) or (term, None) when there is no separator.""" diff --git a/tests/test_musicbrainz.py b/tests/test_musicbrainz.py new file mode 100644 index 0000000..4cf3686 --- /dev/null +++ b/tests/test_musicbrainz.py @@ -0,0 +1,79 @@ +import server.mf # noqa: F401 +import musicfetch_core as mf + + +class _FakeResp: + def __init__(self, payload): + self._payload = payload + def raise_for_status(self): + pass + def json(self): + return self._payload + + +MB_PAYLOAD = { + "recordings": [ + { + "artist-credit": [{"name": "Daft Punk"}], + "releases": [ + {"date": "2001", + "release-group": {"id": "single-mbid", "title": "Harder, Better, Faster, Stronger", + "primary-type": "Single", "secondary-types": []}}, + {"date": "2002", + "release-group": {"id": "comp-mbid", "title": "Musique, Vol. 1", + "primary-type": "Album", "secondary-types": ["Compilation"]}}, + {"date": "2001", + "release-group": {"id": "48117b90-a16e-34ca-a514-19c702df1158", + "title": "Discovery", "primary-type": "Album", + "secondary-types": []}}, + ], + } + ] +} + + +def test_picks_studio_album_over_single_and_comp(monkeypatch): + monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(MB_PAYLOAD)) + monkeypatch.setattr(mf.time, "sleep", lambda *_: None) + out = mf.musicbrainz_best_album("Daft Punk", "Harder Better Faster Stronger") + assert out["album_title"] == "Discovery" + assert out["artist"] == "Daft Punk" + assert out["year"] == "2001" + assert out["rg_mbid"] == "48117b90-a16e-34ca-a514-19c702df1158" + + +def test_returns_none_on_empty(monkeypatch): + monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp({"recordings": []})) + monkeypatch.setattr(mf.time, "sleep", lambda *_: None) + assert mf.musicbrainz_best_album("Nobody", "Nothing") is None + + +def test_returns_none_on_exception(monkeypatch): + def boom(*a, **k): + raise mf.requests.exceptions.RequestException("network down") + monkeypatch.setattr(mf.requests, "get", boom) + monkeypatch.setattr(mf.time, "sleep", lambda *_: None) + assert mf.musicbrainz_best_album("Daft Punk", "Discovery") is None + + +def test_falls_back_to_any_releasegroup_when_no_studio(monkeypatch): + payload = {"recordings": [{"artist-credit": [{"name": "X"}], "releases": [ + {"date": "2010", "release-group": {"id": "live1", "title": "Live Thing", + "primary-type": "Album", "secondary-types": ["Live"]}}, + ]}]} + monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(payload)) + monkeypatch.setattr(mf.time, "sleep", lambda *_: None) + out = mf.musicbrainz_best_album("X", "Y") + assert out["album_title"] == "Live Thing" + + +def test_first_artist_credit_only(monkeypatch): + payload = {"recordings": [{"artist-credit": [{"name": "SLVMLORD"}, {"name": "Travis Bradley"}], + "releases": [{"date": "2025", + "release-group": {"id": "x", "title": "Album X", + "primary-type": "Album", + "secondary-types": []}}]}]} + monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(payload)) + monkeypatch.setattr(mf.time, "sleep", lambda *_: None) + out = mf.musicbrainz_best_album("SLVMLORD", "Under My Skin") + assert out["artist"] == "SLVMLORD"