diff --git a/musicfetch b/musicfetch index 5e3fbb3..1ecff2a 100755 --- a/musicfetch +++ b/musicfetch @@ -150,13 +150,15 @@ def _mb_artist_credit(credit) -> str: def musicbrainz_best_album(artist: str, track: str, timeout: int = 8) -> Optional[dict]: """Resolve 'artist - track' to its best studio album via MusicBrainz. - Returns {album_title, artist, year, rg_mbid} or None. Never raises.""" + Prefers a studio album credited to the track's own artist (not a Various + Artists compilation). Returns {album_title, artist, year, rg_mbid} or None. + Never raises.""" query = f'artist:"{artist}" AND recording:"{track}"' try: _mb_rate_limit() resp = requests.get( f"{MUSICBRAINZ_URL}/recording", - params={"query": query, "fmt": "json", "limit": 10}, + params={"query": query, "fmt": "json", "limit": 25}, headers=MB_HEADERS, timeout=timeout, ) resp.raise_for_status() @@ -165,7 +167,7 @@ def musicbrainz_best_album(artist: str, track: str, timeout: int = 8) -> Optiona dbg(f"MusicBrainz lookup failed: {e}") return None - # candidate = (is_studio, date_sortkey, title, artist, year, mbid) + # candidate = (own_studio, is_studio, date_sortkey, title, artist, year, mbid) candidates = [] for rec in data.get("recordings", []): rec_artist = _mb_artist_credit(rec.get("artist-credit")) @@ -177,15 +179,21 @@ def musicbrainz_best_album(artist: str, track: str, timeout: int = 8) -> Optiona mbid = rg.get("id") or "" primary = rg.get("primary-type") or "" secondary = rg.get("secondary-types") or [] + rel_artist = _mb_artist_credit(rel.get("artist-credit")) date = rel.get("date") or rg.get("first-release-date") or "" is_studio = primary == "Album" and not secondary - candidates.append((is_studio, date or "9999", title, rec_artist, date[:4], mbid)) + own_studio = is_studio and ( + not rel_artist or rel_artist.casefold() == rec_artist.casefold() + ) + candidates.append((own_studio, is_studio, date or "9999", title, rec_artist, date[:4], mbid)) if not candidates: return None - pool = [c for c in candidates if c[0]] or candidates - pool.sort(key=lambda c: c[1]) # earliest date first - _, _, title, art, year, mbid = pool[0] + pool = ([c for c in candidates if c[0]] + or [c for c in candidates if c[1]] + or candidates) + pool.sort(key=lambda c: c[2]) # earliest date first + _, _, _, title, art, year, mbid = pool[0] dbg(f"MusicBrainz resolved '{artist} - {track}' -> '{title}' ({year}) mbid={mbid}") return {"album_title": title, "artist": art or artist, "year": year, "rg_mbid": mbid} diff --git a/tests/test_musicbrainz.py b/tests/test_musicbrainz.py index 4cf3686..221b641 100644 --- a/tests/test_musicbrainz.py +++ b/tests/test_musicbrainz.py @@ -77,3 +77,20 @@ def test_first_artist_credit_only(monkeypatch): monkeypatch.setattr(mf.time, "sleep", lambda *_: None) out = mf.musicbrainz_best_album("SLVMLORD", "Under My Skin") assert out["artist"] == "SLVMLORD" + + +def test_prefers_own_artist_studio_over_various_artists(monkeypatch): + # A studio-looking VA compilation dated earlier must NOT beat the artist's own album. + payload = {"recordings": [{"artist-credit": [{"name": "Daft Punk"}], "releases": [ + {"date": "2001-10-26", "artist-credit": [{"name": "Various Artists"}], + "release-group": {"id": "va-mbid", "title": "All The Hits Now", + "primary-type": "Album", "secondary-types": []}}, + {"date": "2002", "artist-credit": [{"name": "Daft Punk"}], + "release-group": {"id": "48117b90-a16e-34ca-a514-19c702df1158", "title": "Discovery", + "primary-type": "Album", "secondary-types": []}}, + ]}]} + monkeypatch.setattr(mf.requests, "get", lambda *a, **k: _FakeResp(payload)) + monkeypatch.setattr(mf.time, "sleep", lambda *_: None) + out = mf.musicbrainz_best_album("Daft Punk", "Harder Better Faster Stronger") + assert out["album_title"] == "Discovery" + assert out["rg_mbid"] == "48117b90-a16e-34ca-a514-19c702df1158"