From f103b6c253abefb795f6799900daf4f97634e462 Mon Sep 17 00:00:00 2001 From: zebra Date: Tue, 9 Jun 2026 00:25:58 -0700 Subject: [PATCH 1/2] feat: multi-platform URL & playlist support via yt-dlp probe Generalize URL handling beyond YouTube to any yt-dlp-supported site (SoundCloud, Bandcamp, etc), single tracks and playlists/sets/albums. - probe_url(): one yt-dlp --flat-playlist probe classifies playlist vs track and returns per-entry Hits; YouTube playlists still use ytmusicapi. - _track_url(): YouTube tracks keep the music.youtube album-art URL; other platforms download via their native entry URL (no more videoId reconstruction). - Per-source folders: /// (soundcloud/bandcamp/youtube) instead of hardcoded youtube; download_single derives source from metadata. - download_hits() downloads pre-probed Hits; API probes once and passes hits into the job closure. Replaces YouTube-only is_playlist_url/expand_playlist. Co-Authored-By: Claude Opus 4.8 --- musicfetch | 129 +++++++++++++++++++++++------------ server/actions.py | 10 +-- server/app.py | 8 +-- server/mf.py | 6 +- tests/test_api_url.py | 22 ++++-- tests/test_mf_url_exports.py | 4 +- tests/test_multiplatform.py | 79 +++++++++++++++++++++ tests/test_playlist.py | 104 ++++++++++++++++++---------- 8 files changed, 260 insertions(+), 102 deletions(-) create mode 100644 tests/test_multiplatform.py diff --git a/musicfetch b/musicfetch index 05ef3a8..56e1a4e 100755 --- a/musicfetch +++ b/musicfetch @@ -625,12 +625,32 @@ def yt_download(url_or_query: str, target_folder: str, quality: str, dry_run: bo return subprocess.run(cmd).returncode == 0 +def _sanitize_source(name: str) -> str: + """Normalize a yt-dlp extractor key to a folder name ('Youtube'->'youtube').""" + clean = re.sub(r"[^a-z0-9]+", "", (name or "").lower()) + return clean or "downloads" + + +def _track_url(hit: Hit) -> str: + """Resolve the best download URL for a track Hit. YouTube tracks prefer the + music.youtube URL (correct album art); other platforms use their own URL.""" + p = hit.payload + extractor = p.get("extractor") + vid = p.get("videoId") + if vid and extractor in (None, "youtube"): + return f"https://music.youtube.com/watch?v={vid}" + if p.get("url"): + return p["url"] + if vid: + return f"https://music.youtube.com/watch?v={vid}" + return f"ytsearch1:{hit.artist} {hit.title}" + + def act_youtube(hit: Hit, root: str, quality: str, dry_run: bool): - vid = hit.payload.get("videoId") - # Prefer YouTube Music URL for correct album art / topic metadata. - url = f"https://music.youtube.com/watch?v={vid}" if vid else f"ytsearch1:{hit.artist} {hit.title}" + url = _track_url(hit) artist_dir = hit.artist.split(",")[0].strip() or "Unknown Artist" - target = os.path.join(root, artist_dir, "youtube") + source = hit.payload.get("extractor") or "youtube" + target = os.path.join(root, artist_dir, source) return yt_download(url, target, quality, dry_run, hit=hit) @@ -641,60 +661,77 @@ def _playlist_id(url: str) -> str: return parse_qs(urlparse(url).query).get("list", [""])[0] -def is_playlist_url(url: str) -> bool: - """True for a pure playlist URL (/playlist?list=… or list= without v=). +def _is_youtube_playlist_url(url: str) -> bool: + """True for a YouTube playlist URL (/playlist?list=… or list= without v=). A watch?v=…&list=… URL is treated as a single track, not a batch.""" if not is_url(url): return False parsed = urlparse(url) + if "youtube" not in parsed.netloc: + return False qs = parse_qs(parsed.query) if "/playlist" in parsed.path: return True return "list" in qs and "v" not in qs -def expand_playlist(url: str) -> tuple[str, list[Hit]]: - """Return (playlist_title, [track Hits]). Prefer ytmusicapi; fall back to - yt-dlp --flat-playlist. Returns ("", []) on failure.""" - pid = _playlist_id(url) - if YTMusic is not None and pid: - try: - pl = YTMusic().get_playlist(pid, limit=None) - hits = [] - for t in pl.get("tracks", []): - vid = t.get("videoId") - if not vid: - continue - alb = t.get("album") - album = alb.get("name", "") if isinstance(alb, dict) else (alb or "") - hits.append(Hit(source="youtube", kind="track", title=t.get("title", ""), - artist=_ytm_artists(t), album=album, - year=str(t.get("year") or ""), payload={"videoId": vid})) +def _ytmusic_playlist(pid: str) -> tuple[str, list[Hit]]: + """Expand a YouTube Music playlist via ytmusicapi. Returns ("", []) on failure.""" + try: + pl = YTMusic().get_playlist(pid, limit=None) + except Exception as e: # noqa: BLE001 + dbg(f"ytmusicapi playlist expand failed: {e}") + return "", [] + hits = [] + for t in pl.get("tracks", []): + vid = t.get("videoId") + if not vid: + continue + alb = t.get("album") + album = alb.get("name", "") if isinstance(alb, dict) else (alb or "") + hits.append(Hit(source="youtube", kind="track", title=t.get("title", ""), + artist=_ytm_artists(t), album=album, year=str(t.get("year") or ""), + payload={"videoId": vid, "extractor": "youtube"})) + return pl.get("title", ""), hits + + +def _entry_to_hit(entry: dict) -> Hit: + """Map a yt-dlp --flat-playlist entry to a track Hit (any platform).""" + source = _sanitize_source(entry.get("ie_key") or entry.get("extractor") or "") + vid = entry.get("id") + return Hit(source="youtube", kind="track", title=entry.get("title", ""), + artist=entry.get("uploader") or entry.get("channel") or "", + payload={"url": entry.get("url"), + "videoId": vid if source == "youtube" else None, + "extractor": source}) + + +def probe_url(url: str) -> tuple[str, str, list[Hit]]: + """Classify a URL via yt-dlp. Returns (kind, title, hits) where kind is + 'playlist' (hits populated) or 'track' (hits empty; caller downloads the URL). + YouTube playlists use ytmusicapi for richer metadata.""" + if _is_youtube_playlist_url(url) and YTMusic is not None: + pid = _playlist_id(url) + if pid: + title, hits = _ytmusic_playlist(pid) if hits: - return pl.get("title", ""), hits - except Exception as e: # noqa: BLE001 - dbg(f"ytmusicapi playlist expand failed: {e}") + return "playlist", title, hits try: result = subprocess.run(["yt-dlp", "--flat-playlist", "-J", url], capture_output=True, text=True, check=True) data = json.loads(result.stdout) except (subprocess.CalledProcessError, json.JSONDecodeError) as e: - err(f"yt-dlp playlist expand failed: {e}") - return "", [] - hits = [] - for entry in data.get("entries", []): - vid = entry.get("id") - if not vid: - continue - hits.append(Hit(source="youtube", kind="track", title=entry.get("title", ""), - artist=entry.get("uploader") or entry.get("channel") or "", - payload={"videoId": vid})) - return data.get("title", ""), hits + dbg(f"yt-dlp probe failed: {e}") + return "track", "", [] + if data.get("entries") is not None or data.get("_type") == "playlist": + hits = [_entry_to_hit(e) for e in data.get("entries", []) + if e.get("id") or e.get("url")] + return "playlist", data.get("title", ""), hits + return "track", data.get("title", ""), [] -def download_playlist(url: str, root: str, quality: str, dry_run: bool) -> tuple[int, int, str]: - """Download each playlist track via act_youtube. Returns (ok, total, title).""" - title, hits = expand_playlist(url) +def download_hits(hits: list[Hit], root: str, quality: str, dry_run: bool) -> tuple[int, int]: + """Download each track Hit via act_youtube. Returns (ok, total).""" ok = 0 for h in hits: try: @@ -702,15 +739,16 @@ def download_playlist(url: str, root: str, quality: str, dry_run: bool) -> tuple ok += 1 except Exception as e: # noqa: BLE001 — one bad track shouldn't abort the batch err(f"track failed ({h.title}): {e}") - return ok, len(hits), title + return ok, len(hits) def download_single(url: str, root: str, quality: str, dry_run: bool) -> dict: - """Download a single URL. Returns {title, artist, ok}.""" + """Download a single URL (any yt-dlp site). Returns {title, artist, ok}.""" meta = run_yt_dlp_get_metadata(url) artist = get_artist_from_metadata(meta) if meta else "Unknown Artist" title = (meta or {}).get("title", "") - target = os.path.join(root, artist, "youtube") + source = _sanitize_source((meta or {}).get("extractor", "")) if meta else "downloads" + target = os.path.join(root, artist, source) ok = yt_download(url, target, quality, dry_run) return {"title": title, "artist": artist, "ok": ok} @@ -737,8 +775,9 @@ def get_artist_from_metadata(meta: dict) -> str: def handle_url(url: str, root: str, quality: str, dry_run: bool): - if is_playlist_url(url): - ok, total, title = download_playlist(url, root, quality, dry_run) + kind, title, hits = probe_url(url) + if kind == "playlist": + ok, total = download_hits(hits, root, quality, dry_run) label = f" from '{title}'" if title else "" print(f"Downloaded {ok}/{total} tracks{label}") return diff --git a/server/actions.py b/server/actions.py index fc80446..d2215b3 100644 --- a/server/actions.py +++ b/server/actions.py @@ -81,11 +81,11 @@ def url_done_message(result: dict) -> str: return f"Downloaded '{title}'." if title else "Download complete." -def perform_url_fetch(url: str, quality: str, root: str) -> dict: - """Download a URL (playlist -> batch, else single). Raises if nothing - downloaded so the job is marked failed.""" - if mf.is_playlist_url(url): - ok, total, title = mf.download_playlist(url, root, quality, False) +def perform_url_fetch(url: str, kind: str, title: str, hits: list, quality: str, root: str) -> dict: + """Download a probed URL (playlist -> batch over pre-probed hits, else single). + Raises if nothing downloaded so the job is marked failed.""" + if kind == "playlist": + ok, total = mf.download_hits(hits, root, quality, False) if ok == 0: raise RuntimeError(f"No tracks downloaded from playlist '{title}'." if title else "No tracks downloaded from playlist.") diff --git a/server/app.py b/server/app.py index 7907d19..a3305cc 100644 --- a/server/app.py +++ b/server/app.py @@ -53,14 +53,14 @@ def fetch(q: str = Query(..., min_length=1), raise HTTPException(status_code=422, detail=f"Invalid quality '{quality}'.") if mf.is_url(q): - kind = "playlist" if mf.is_playlist_url(q) else "track" - syn = mf.Hit(source="youtube", kind=kind, title="", artist="") - job = jobs.create_job(hit=syn, message=actions.url_started_message(kind)) + kind, title, hits = mf.probe_url(q) + syn = mf.Hit(source="youtube", kind=kind, title=title, artist="") + job = jobs.create_job(hit=syn, message=actions.url_started_message(kind, title)) response = _job_public(job) done_msg = actions.playlist_done_message if kind == "playlist" else actions.url_done_message jobs.run_job( job.id, - lambda: actions.perform_url_fetch(q, quality, ROOT), + lambda: actions.perform_url_fetch(q, kind, title, hits, quality, ROOT), done_message=done_msg, fail_message="Download failed.", ) diff --git a/server/mf.py b/server/mf.py index c3eefef..be9b11d 100644 --- a/server/mf.py +++ b/server/mf.py @@ -25,10 +25,10 @@ act_lidarr_album = _mod.act_lidarr_album act_lidarr_artist = _mod.act_lidarr_artist QUALITY_CHOICES = _mod.QUALITY_CHOICES is_url = _mod.is_url -is_playlist_url = _mod.is_playlist_url -download_playlist = _mod.download_playlist +probe_url = _mod.probe_url +download_hits = _mod.download_hits download_single = _mod.download_single __all__ = ["Hit", "build_combined_hits", "pick", "act_youtube", "act_lidarr_album", "act_lidarr_artist", "QUALITY_CHOICES", - "is_url", "is_playlist_url", "download_playlist", "download_single"] + "is_url", "probe_url", "download_hits", "download_single"] diff --git a/tests/test_api_url.py b/tests/test_api_url.py index a2ab27e..24a0b2f 100644 --- a/tests/test_api_url.py +++ b/tests/test_api_url.py @@ -20,10 +20,17 @@ def _wait_done(client, auth, job_id, timeout=2.0): raise AssertionError("job never finished") +def _mk_hit(): + from server import mf + return mf.Hit(source="youtube", kind="track", title="t", artist="a", payload={"videoId": "1"}) + + def test_playlist_url_batch_job(client, auth, monkeypatch): - monkeypatch.setattr("server.app.mf.download_playlist", - lambda url, root, quality, dry_run: (2, 3, "My Mix")) - r = client.post("/fetch", params={"q": "https://music.youtube.com/playlist?list=PLx"}, headers=auth) + monkeypatch.setattr("server.app.mf.probe_url", + lambda url: ("playlist", "My Mix", [_mk_hit(), _mk_hit(), _mk_hit()])) + monkeypatch.setattr("server.app.mf.download_hits", + lambda hits, root, quality, dry_run: (2, 3)) + r = client.post("/fetch", params={"q": "https://soundcloud.com/dj/sets/mix"}, headers=auth) assert r.status_code == 200 body = r.json() assert body["status"] == "queued" @@ -35,17 +42,20 @@ def test_playlist_url_batch_job(client, auth, monkeypatch): def test_playlist_zero_success_fails(client, auth, monkeypatch): - monkeypatch.setattr("server.app.mf.download_playlist", - lambda url, root, quality, dry_run: (0, 3, "Dead Mix")) + monkeypatch.setattr("server.app.mf.probe_url", + lambda url: ("playlist", "Dead Mix", [_mk_hit()])) + monkeypatch.setattr("server.app.mf.download_hits", + lambda hits, root, quality, dry_run: (0, 3)) body = client.post("/fetch", params={"q": "https://www.youtube.com/playlist?list=PLy"}, headers=auth).json() done = _wait_done(client, auth, body["job_id"]) assert done["status"] == "failed" def test_single_video_url_download(client, auth, monkeypatch): + monkeypatch.setattr("server.app.mf.probe_url", lambda url: ("track", "Song", [])) monkeypatch.setattr("server.app.mf.download_single", lambda url, root, quality, dry_run: {"title": "Song", "artist": "A", "ok": True}) - body = client.post("/fetch", params={"q": "https://music.youtube.com/watch?v=abc"}, headers=auth).json() + body = client.post("/fetch", params={"q": "https://soundcloud.com/a/song"}, headers=auth).json() assert body["hit"]["kind"] == "track" done = _wait_done(client, auth, body["job_id"]) assert done["status"] == "done" diff --git a/tests/test_mf_url_exports.py b/tests/test_mf_url_exports.py index 6973ea0..8891769 100644 --- a/tests/test_mf_url_exports.py +++ b/tests/test_mf_url_exports.py @@ -3,6 +3,6 @@ import server.mf as smf def test_url_helpers_reexported(): assert callable(smf.is_url) - assert callable(smf.is_playlist_url) - assert callable(smf.download_playlist) + assert callable(smf.probe_url) + assert callable(smf.download_hits) assert callable(smf.download_single) diff --git a/tests/test_multiplatform.py b/tests/test_multiplatform.py new file mode 100644 index 0000000..01e7039 --- /dev/null +++ b/tests/test_multiplatform.py @@ -0,0 +1,79 @@ +import server.mf # noqa: F401 — loads musicfetch, registers musicfetch_core +import musicfetch_core as mf + + +# ---- _sanitize_source ---- +def test_sanitize_source(): + assert mf._sanitize_source("Youtube") == "youtube" + assert mf._sanitize_source("Soundcloud") == "soundcloud" + assert mf._sanitize_source("") == "downloads" + + +# ---- _entry_to_hit ---- +def test_entry_to_hit_soundcloud_keeps_url_no_videoid(): + h = mf._entry_to_hit({"id": "t1", "title": "Track", "uploader": "DJ", + "ie_key": "Soundcloud", "url": "https://soundcloud.com/dj/track"}) + assert h.payload["extractor"] == "soundcloud" + assert h.payload["url"] == "https://soundcloud.com/dj/track" + assert h.payload["videoId"] is None + assert h.artist == "DJ" + + +def test_entry_to_hit_youtube_keeps_videoid(): + h = mf._entry_to_hit({"id": "vid123", "title": "Song", "channel": "Chan", + "ie_key": "Youtube", "url": "https://youtube.com/watch?v=vid123"}) + assert h.payload["extractor"] == "youtube" + assert h.payload["videoId"] == "vid123" + + +# ---- _track_url ---- +def test_track_url_youtube_prefers_music_youtube(): + h = mf.Hit(source="youtube", kind="track", title="T", artist="A", + payload={"videoId": "vid", "extractor": "youtube", "url": "https://youtube.com/watch?v=vid"}) + assert mf._track_url(h) == "https://music.youtube.com/watch?v=vid" + + +def test_track_url_soundcloud_uses_native_url(): + h = mf.Hit(source="youtube", kind="track", title="T", artist="A", + payload={"videoId": None, "extractor": "soundcloud", "url": "https://soundcloud.com/a/t"}) + assert mf._track_url(h) == "https://soundcloud.com/a/t" + + +def test_track_url_ytmusic_search_hit_default_youtube(): + # ytmusicapi search hits carry only videoId (no extractor) -> music.youtube. + h = mf.Hit(source="youtube", kind="track", title="T", artist="A", payload={"videoId": "vid"}) + assert mf._track_url(h) == "https://music.youtube.com/watch?v=vid" + + +# ---- act_youtube routes to per-source folder ---- +def test_act_youtube_soundcloud_folder(monkeypatch): + captured = {} + monkeypatch.setattr(mf, "yt_download", + lambda url, target, quality, dry_run, hit=None: captured.update(url=url, target=target) or True) + h = mf.Hit(source="youtube", kind="track", title="T", artist="DJ, Other", + payload={"videoId": None, "extractor": "soundcloud", "url": "https://soundcloud.com/dj/t"}) + mf.act_youtube(h, "/media/music", "best", False) + assert captured["target"] == "/media/music/DJ/soundcloud" # first artist only + assert captured["url"] == "https://soundcloud.com/dj/t" + + +def test_act_youtube_youtube_folder(monkeypatch): + captured = {} + monkeypatch.setattr(mf, "yt_download", + lambda url, target, quality, dry_run, hit=None: captured.update(target=target) or True) + h = mf.Hit(source="youtube", kind="track", title="T", artist="A", + payload={"videoId": "vid", "extractor": "youtube"}) + mf.act_youtube(h, "/media/music", "best", False) + assert captured["target"] == "/media/music/A/youtube" + + +# ---- download_single per-source folder ---- +def test_download_single_bandcamp_folder(monkeypatch): + monkeypatch.setattr(mf, "run_yt_dlp_get_metadata", + lambda url: {"title": "Song", "artist": "Band", "extractor": "Bandcamp"}) + captured = {} + monkeypatch.setattr(mf, "yt_download", + lambda url, target, quality, dry_run, hit=None: captured.update(target=target) or True) + info = mf.download_single("https://band.bandcamp.com/track/song", "/media/music", "best", False) + assert captured["target"] == "/media/music/Band/bandcamp" + assert info == {"title": "Song", "artist": "Band", "ok": True} diff --git a/tests/test_playlist.py b/tests/test_playlist.py index c552316..6b302d3 100644 --- a/tests/test_playlist.py +++ b/tests/test_playlist.py @@ -1,74 +1,104 @@ -import server.mf # noqa: F401 +import json as _json + +import server.mf # noqa: F401 — loads musicfetch, registers musicfetch_core import musicfetch_core as mf -def test_pure_playlist_url_is_playlist(): - assert mf.is_playlist_url("https://music.youtube.com/playlist?list=PLabc") is True - assert mf.is_playlist_url("https://www.youtube.com/playlist?list=PLabc") is True - - -def test_watch_with_list_is_not_playlist(): - assert mf.is_playlist_url("https://www.youtube.com/watch?v=abc&list=PLx") is False - - -def test_plain_watch_is_not_playlist(): - assert mf.is_playlist_url("https://www.youtube.com/watch?v=abc") is False - - -def test_non_url_is_not_playlist(): - assert mf.is_playlist_url("Daft Punk - Discovery") is False - - class _CP: def __init__(self, stdout): self.stdout = stdout self.returncode = 0 -def test_expand_playlist_ytdlp_fallback(monkeypatch): - import json as _json +# ---- _is_youtube_playlist_url ---- +def test_youtube_playlist_url_true(): + assert mf._is_youtube_playlist_url("https://music.youtube.com/playlist?list=PLabc") is True + assert mf._is_youtube_playlist_url("https://www.youtube.com/playlist?list=PLabc") is True + + +def test_youtube_watch_with_list_is_not_playlist(): + assert mf._is_youtube_playlist_url("https://www.youtube.com/watch?v=abc&list=PLx") is False + + +def test_non_youtube_url_not_youtube_playlist(): + # SoundCloud sets are not matched here — probe_url handles them via yt-dlp. + assert mf._is_youtube_playlist_url("https://soundcloud.com/user/sets/mix") is False + + +# ---- probe_url ---- +def test_probe_url_youtube_playlist_uses_ytmusic(monkeypatch): + h = mf.Hit(source="youtube", kind="track", title="A", artist="X", + payload={"videoId": "1", "extractor": "youtube"}) + monkeypatch.setattr(mf, "_ytmusic_playlist", lambda pid: ("My YT Mix", [h])) + monkeypatch.setattr(mf, "YTMusic", object()) # non-None to enter ytmusic branch + kind, title, hits = mf.probe_url("https://music.youtube.com/playlist?list=PLx") + assert kind == "playlist" + assert title == "My YT Mix" + assert hits == [h] + + +def test_probe_url_generic_playlist_via_ytdlp(monkeypatch): monkeypatch.setattr(mf, "YTMusic", None) - payload = {"title": "My Mix", "entries": [ - {"id": "v1", "title": "Song One", "uploader": "Artist A"}, - {"id": "v2", "title": "Song Two", "channel": "Artist B"}, - {"id": None, "title": "skip"}, + payload = {"title": "SC Set", "_type": "playlist", "entries": [ + {"id": "t1", "title": "One", "uploader": "DJ", "ie_key": "Soundcloud", + "url": "https://soundcloud.com/dj/one"}, + {"id": None, "url": None, "title": "skip"}, ]} monkeypatch.setattr(mf.subprocess, "run", lambda *a, **k: _CP(_json.dumps(payload))) - title, hits = mf.expand_playlist("https://www.youtube.com/playlist?list=PLx") - assert title == "My Mix" - assert [h.payload["videoId"] for h in hits] == ["v1", "v2"] - assert hits[0].artist == "Artist A" + kind, title, hits = mf.probe_url("https://soundcloud.com/dj/sets/sc-set") + assert kind == "playlist" + assert title == "SC Set" + assert len(hits) == 1 + assert hits[0].payload["extractor"] == "soundcloud" + assert hits[0].payload["url"] == "https://soundcloud.com/dj/one" -def test_download_playlist_counts_ok_and_total(monkeypatch): +def test_probe_url_single_track(monkeypatch): + monkeypatch.setattr(mf, "YTMusic", None) + payload = {"title": "A Song", "extractor": "soundcloud"} # no entries -> single + monkeypatch.setattr(mf.subprocess, "run", lambda *a, **k: _CP(_json.dumps(payload))) + kind, title, hits = mf.probe_url("https://soundcloud.com/dj/one") + assert kind == "track" + assert title == "A Song" + assert hits == [] + + +def test_probe_url_failure_returns_track(monkeypatch): + monkeypatch.setattr(mf, "YTMusic", None) + + def boom(*a, **k): + raise mf.subprocess.CalledProcessError(1, "yt-dlp") + monkeypatch.setattr(mf.subprocess, "run", boom) + assert mf.probe_url("https://example.com/x") == ("track", "", []) + + +# ---- download_hits ---- +def test_download_hits_counts(monkeypatch): h1 = mf.Hit(source="youtube", kind="track", title="A", artist="X", payload={"videoId": "1"}) h2 = mf.Hit(source="youtube", kind="track", title="B", artist="Y", payload={"videoId": "2"}) h3 = mf.Hit(source="youtube", kind="track", title="C", artist="Z", payload={"videoId": "3"}) - monkeypatch.setattr(mf, "expand_playlist", lambda url: ("PL Title", [h1, h2, h3])) monkeypatch.setattr(mf, "act_youtube", lambda hit, root, quality, dry_run: hit.title != "B") - ok, total, title = mf.download_playlist("u", "/tmp", "best", False) - assert (ok, total, title) == (2, 3, "PL Title") + assert mf.download_hits([h1, h2, h3], "/tmp", "best", False) == (2, 3) -def test_download_playlist_track_exception_counts_as_failure(monkeypatch): +def test_download_hits_track_exception_is_failure(monkeypatch): h1 = mf.Hit(source="youtube", kind="track", title="A", artist="X", payload={"videoId": "1"}) h2 = mf.Hit(source="youtube", kind="track", title="B", artist="Y", payload={"videoId": "2"}) - monkeypatch.setattr(mf, "expand_playlist", lambda url: ("T", [h1, h2])) def fake_act(hit, root, quality, dry_run): if hit.title == "B": raise RuntimeError("boom") return True monkeypatch.setattr(mf, "act_youtube", fake_act) - ok, total, _ = mf.download_playlist("u", "/tmp", "best", False) - assert (ok, total) == (1, 2) + assert mf.download_hits([h1, h2], "/tmp", "best", False) == (1, 2) +# ---- yt_download bool ---- def test_yt_download_returns_true_on_zero_exit(monkeypatch): monkeypatch.setattr(mf.os, "makedirs", lambda *a, **k: None) monkeypatch.setattr(mf.subprocess, "run", lambda *a, **k: _CP("")) assert mf.yt_download("u", "/tmp/x", "best", False) is True -def test_yt_download_dry_run_returns_true(monkeypatch): +def test_yt_download_dry_run_returns_true(): assert mf.yt_download("u", "/tmp/x", "best", True) is True From 6730f1f1412a7b5d5ece82a35f49d2f10365c6e8 Mon Sep 17 00:00:00 2001 From: zebra Date: Tue, 9 Jun 2026 06:55:56 -0700 Subject: [PATCH 2/2] fix: route sparse-metadata playlist tracks by yt-dlp's own metadata SoundCloud sets (and similar) return flat-playlist entries without per-track artist/title. When a track Hit has no artist, download via an output template (-o /%(artist,uploader,channel)s//...) so yt-dlp places the file under the real artist instead of "Unknown Artist". yt_download gains an optional outtmpl mode. Co-Authored-By: Claude Opus 4.8 --- musicfetch | 32 ++++++++++++++++++++++---------- tests/test_multiplatform.py | 16 +++++++++++++++- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/musicfetch b/musicfetch index 56e1a4e..8de5cc8 100755 --- a/musicfetch +++ b/musicfetch @@ -593,14 +593,18 @@ def _quality_args(quality: str) -> list[str]: return ["-f", "bestaudio/best", "-x"] -def yt_download(url_or_query: str, target_folder: str, quality: str, dry_run: bool, - hit: Optional[Hit] = None): +def yt_download(url_or_query: str, target_folder: Optional[str], quality: str, dry_run: bool, + hit: Optional[Hit] = None, outtmpl: Optional[str] = None): cmd = ["yt-dlp", *_quality_args(quality), "--embed-metadata", "--embed-thumbnail", - "--no-playlist", - "-P", target_folder] + "--no-playlist"] + # Either a fixed output dir (-P) or a metadata-driven output template (-o). + if outtmpl: + cmd += ["-o", outtmpl] + else: + cmd += ["-P", target_folder] # Override tags from the chosen hit so they don't rely on scraped titles. if hit: if hit.artist: @@ -616,12 +620,15 @@ def yt_download(url_or_query: str, target_folder: str, quality: str, dry_run: bo cmd += ["--parse-metadata", f"{hit.year}:%(release_year)s"] cmd.append(url_or_query) + dest = outtmpl or target_folder if dry_run: - print(f"[dry-run] mkdir -p {target_folder}") + if target_folder: + print(f"[dry-run] mkdir -p {target_folder}") print(f"[dry-run] {' '.join(cmd)}") return True - os.makedirs(target_folder, exist_ok=True) - print(f"Downloading via yt-dlp -> {target_folder}") + if target_folder: + os.makedirs(target_folder, exist_ok=True) + print(f"Downloading via yt-dlp -> {dest}") return subprocess.run(cmd).returncode == 0 @@ -648,10 +655,15 @@ def _track_url(hit: Hit) -> str: def act_youtube(hit: Hit, root: str, quality: str, dry_run: bool): url = _track_url(hit) - artist_dir = hit.artist.split(",")[0].strip() or "Unknown Artist" source = hit.payload.get("extractor") or "youtube" - target = os.path.join(root, artist_dir, source) - return yt_download(url, target, quality, dry_run, hit=hit) + artist_dir = hit.artist.split(",")[0].strip() + if artist_dir: + target = os.path.join(root, artist_dir, source) + return yt_download(url, target, quality, dry_run, hit=hit) + # Sparse playlist metadata (e.g. SoundCloud sets): let yt-dlp route the file + # by the track's own metadata so it lands under the real artist. + outtmpl = os.path.join(root, "%(artist,uploader,channel)s", source, "%(title)s [%(id)s].%(ext)s") + return yt_download(url, None, quality, dry_run, hit=hit, outtmpl=outtmpl) # --------------------------------------------------------------------------- diff --git a/tests/test_multiplatform.py b/tests/test_multiplatform.py index 01e7039..7d4910c 100644 --- a/tests/test_multiplatform.py +++ b/tests/test_multiplatform.py @@ -60,13 +60,27 @@ def test_act_youtube_soundcloud_folder(monkeypatch): def test_act_youtube_youtube_folder(monkeypatch): captured = {} monkeypatch.setattr(mf, "yt_download", - lambda url, target, quality, dry_run, hit=None: captured.update(target=target) or True) + lambda url, target, quality, dry_run, hit=None, outtmpl=None: + captured.update(target=target) or True) h = mf.Hit(source="youtube", kind="track", title="T", artist="A", payload={"videoId": "vid", "extractor": "youtube"}) mf.act_youtube(h, "/media/music", "best", False) assert captured["target"] == "/media/music/A/youtube" +def test_act_youtube_unknown_artist_uses_metadata_template(monkeypatch): + captured = {} + monkeypatch.setattr(mf, "yt_download", + lambda url, target, quality, dry_run, hit=None, outtmpl=None: + captured.update(target=target, outtmpl=outtmpl) or True) + h = mf.Hit(source="youtube", kind="track", title="", artist="", + payload={"videoId": None, "extractor": "soundcloud", "url": "https://soundcloud.com/a/t"}) + mf.act_youtube(h, "/media/music", "best", False) + assert captured["target"] is None + assert "%(artist,uploader,channel)s" in captured["outtmpl"] + assert captured["outtmpl"].endswith("/soundcloud/%(title)s [%(id)s].%(ext)s") + + # ---- download_single per-source folder ---- def test_download_single_bandcamp_folder(monkeypatch): monkeypatch.setattr(mf, "run_yt_dlp_get_metadata",