From 140bfef7c94b0ae027844dd698e368c11fc713b9 Mon Sep 17 00:00:00 2001 From: zebra Date: Sat, 13 Jun 2026 11:25:39 -0700 Subject: [PATCH] feat: yt-dlp cookie support + surface real failure reason; default workers 4 Bulk --repair on unauthenticated YouTube trips the bot-check (HTTP 429 "Sign in to confirm you're not a bot"), after which every call fails until the IP flag clears. Add cookie support so authenticated requests bypass it: - --cookies FILE / --cookies-from-browser BROWSER (and $YTDLP_COOKIES / $YTDLP_COOKIES_FROM_BROWSER for the API container), threaded into every yt-dlp invocation (search, probe, download, repair metadata fetch). - run_yt_dlp_get_metadata now logs yt-dlp's last stderr line (the actual 429 / bot-check / network reason) instead of a bare exit code. - Default --repair workers lowered 8 -> 4 (safe without cookies; raise with). - compose: optional YTDLP_COOKIES env + commented cookies mount. - README: how to obtain cookies (Chrome/Firefox, browser-read vs cookies.txt export); gitignore cookies.txt. Co-Authored-By: Claude Opus 4.8 --- .gitignore | 1 + README.md | 73 ++++++++++++++++++++++++++++++++++++--- musicfetch | 50 ++++++++++++++++++++++----- server/docker-compose.yml | 5 +++ tests/test_playlist.py | 10 ++++++ tests/test_repair.py | 42 ++++++++++++++++++++++ 6 files changed, 168 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 77dc02c..c8c9347 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ __pycache__/ *.pyc server/log.txt +cookies.txt diff --git a/README.md b/README.md index a3a0da9..31a3ed3 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,9 @@ export LIDARR_API_KEY="your-lidarr-api-key" | `-o`, `--root PATH` | Output root folder (default `/media/music`). | | `--search-all` | Search all albums when adding an artist to Lidarr. | | `--repair` | Re-tag existing downloads under `--root` from source metadata (see below). | -| `--workers N` | Parallel metadata fetches during `--repair` (default 8). | +| `--workers N` | Parallel metadata fetches during `--repair` (default 4). | +| `--cookies FILE` | yt-dlp `cookies.txt` for authenticated YouTube (avoids bot-check / rate limits). | +| `--cookies-from-browser BROWSER` | Load YouTube cookies from a local browser (e.g. `firefox`). | | `--retag-from-path` | Offline: re-tag artist/title from folder + filename (see below). | | `-x`, `--exclude NAME` | Folder under `--root` to skip during `--repair`/`--retag-from-path` (repeatable). | | `--debug` | Verbose output. | @@ -139,10 +141,71 @@ title. A bogus `NA [].` filename is renamed to the recovered title, and `NA` album with no source album is normalised to `Unknown Album`. Each file is its own yt-dlp network round-trip, so repair runs them in a thread pool; -`--workers N` (default 8) caps concurrency — lower it if YouTube starts rate-limiting -(HTTP 429/403), raise it to go faster on a large library. Progress prints every 100 files. -Requires `mutagen` (a yt-dlp dependency, usually already present). CLI-only — not exposed via -the REST API. +`--workers N` (default 4) caps concurrency. Progress prints every 100 files. Requires +`mutagen` (a yt-dlp dependency, usually already present). CLI-only — not exposed via the REST API. + +**Cookies (important for bulk repair).** Unauthenticated YouTube requests get throttled fast — +a large `--repair` (or even a `--dry-run`, which still fetches) will trip *"Sign in to confirm +you're not a bot"* (HTTP 429) and every subsequent call fails until the IP-level flag clears. +Pass authenticated cookies to avoid it: + +```bash +./musicfetch --repair --cookies /path/cookies.txt -o /media/music # exported cookies.txt +./musicfetch --repair --cookies-from-browser firefox -o /media/music # or read from a browser +``` + +With cookies you can raise `--workers`; without them keep it low (≤4) and expect occasional +throttling. Cookies also apply to normal fetches/downloads. The same can be set for the API +container via `$YTDLP_COOKIES` / `$YTDLP_COOKIES_FROM_BROWSER`. If you do get flagged, **stop** — +retrying extends it; wait ~30-60 min (429) or longer for a bot-check. + +#### Getting YouTube cookies + +> ⚠️ Use a **throwaway / secondary Google account**, not your main one — bulk automated +> requests can get the account flagged. You must be **logged in to YouTube** in the browser +> first. + +**Option A — read straight from the browser (simplest, host CLI only).** +`--cookies-from-browser` reads the browser's own cookie store, so there's nothing to export: + +```bash +./musicfetch --repair --cookies-from-browser firefox -o /media/music +./musicfetch --repair --cookies-from-browser chrome -o /media/music +``` + +- **Firefox:** works while open; just be logged in to YouTube. +- **Chrome / Chromium / Brave / Edge:** must be **fully quit** when you run this (Chrome locks + its cookie DB, and newer versions encrypt it — close the browser entirely first). On Linux a + running Chrome will usually fail with a "could not copy cookie database / locked" error. +- Specify a profile if not the default, e.g. `--cookies-from-browser "chrome:Profile 1"`. + +This only works where the browser lives (your host), **not** inside the Docker container. + +**Option B — export a `cookies.txt` (works anywhere, incl. the container/server).** +Use a Netscape-format cookie exporter, then point `--cookies` / `$YTDLP_COOKIES` at the file: + +1. Install a cookies exporter extension: + - Firefox: *"cookies.txt"* (a.k.a. *Export Cookies*). + - Chrome: *"Get cookies.txt LOCALLY"* (pick a **LOCALLY**-running one — avoid extensions that + upload your cookies anywhere). +2. Log in to , click the extension, **Export** → save `cookies.txt`. +3. Use it: + + ```bash + ./musicfetch --repair --cookies ~/cookies.txt -o /media/music + ``` + + For the API container, mount it and set the env var (see `server/docker-compose.yml`): + + ```yaml + environment: + YTDLP_COOKIES: "/cookies.txt" + volumes: + - /host/path/cookies.txt:/cookies.txt:ro + ``` + +Cookies expire — if YouTube starts rejecting them, re-export. Treat `cookies.txt` like a +password (it *is* your logged-in session); keep it out of git (`.gitignore` it). ```bash # Preview what would change (writes nothing) diff --git a/musicfetch b/musicfetch index 710d9a1..97cc9f2 100755 --- a/musicfetch +++ b/musicfetch @@ -47,6 +47,22 @@ HEADERS = {"X-Api-Key": API_KEY, "Content-Type": "application/json"} # Runtime flags, populated in main(). DEBUG = False +# yt-dlp cookies — authenticated requests bypass YouTube's bot-check ("Sign in +# to confirm you're not a bot") and lift rate limits, which is essential for +# bulk --repair. Set via CLI (--cookies / --cookies-from-browser) or env so the +# REST API container can supply them too. +COOKIES_FILE = os.environ.get("YTDLP_COOKIES", "") +COOKIES_FROM_BROWSER = os.environ.get("YTDLP_COOKIES_FROM_BROWSER", "") + + +def _cookie_args() -> list: + """yt-dlp cookie flags (file wins over browser); empty when neither is set.""" + if COOKIES_FILE: + return ["--cookies", COOKIES_FILE] + if COOKIES_FROM_BROWSER: + return ["--cookies-from-browser", COOKIES_FROM_BROWSER] + return [] + # Quality choices for --quality. QUALITY_CHOICES = ["best", "320", "m4a", "opus", "flac"] @@ -337,7 +353,7 @@ def _ytmusic_search(query: str, limit: int) -> list[Hit]: def _ytdlp_search(query: str, limit: int) -> list[Hit]: try: result = subprocess.run( - ["yt-dlp", "--flat-playlist", "-J", f"ytsearch{limit}:{query}"], + ["yt-dlp", *_cookie_args(), "--flat-playlist", "-J", f"ytsearch{limit}:{query}"], capture_output=True, text=True, check=True, ) data = json.loads(result.stdout) @@ -608,6 +624,7 @@ def _quality_args(quality: str) -> list[str]: def yt_download(url_or_query: str, target_folder: Optional[str], quality: str, dry_run: bool, hit: Optional[Hit] = None, outtmpl: Optional[str] = None): cmd = ["yt-dlp", + *_cookie_args(), *_quality_args(quality), "--embed-metadata", "--embed-thumbnail", @@ -756,7 +773,7 @@ def probe_url(url: str) -> tuple[str, str, list[Hit]]: if hits: return "playlist", title, hits try: - result = subprocess.run(["yt-dlp", "--flat-playlist", "-J", url], + result = subprocess.run(["yt-dlp", *_cookie_args(), "--flat-playlist", "-J", url], capture_output=True, text=True, check=True) data = json.loads(result.stdout) except (subprocess.CalledProcessError, json.JSONDecodeError) as e: @@ -795,12 +812,19 @@ def download_single(url: str, root: str, quality: str, dry_run: bool) -> dict: def run_yt_dlp_get_metadata(url: str, extra_args=None) -> Optional[dict]: - cmd = ["yt-dlp", "-j", "--no-playlist", *(extra_args or []), url] + cmd = ["yt-dlp", *_cookie_args(), "-j", "--no-playlist", *(extra_args or []), url] try: result = subprocess.run(cmd, capture_output=True, text=True, check=True) return json.loads(result.stdout) except (subprocess.CalledProcessError, json.JSONDecodeError) as e: - err(f"yt-dlp metadata extraction failed: {e}") + # Surface yt-dlp's own last stderr line (e.g. 429 / "not a bot") instead + # of a bare exit code — the actual reason is what you need to act on. + detail = "" + stderr = getattr(e, "stderr", "") or "" + lines = [ln for ln in stderr.strip().splitlines() if ln.strip()] + if lines: + detail = f" — {lines[-1]}" + err(f"yt-dlp metadata extraction failed for {url}{detail}") return None @@ -1183,9 +1207,15 @@ def parse_args(): help="Search all albums when adding an artist to Lidarr.") p.add_argument("--repair", action="store_true", help="Re-tag existing downloads under --root from source metadata.") - p.add_argument("--workers", type=int, default=8, - help="Parallel yt-dlp metadata fetches during --repair (default 8; " - "lower if YouTube rate-limits).") + p.add_argument("--workers", type=int, default=4, + help="Parallel yt-dlp metadata fetches during --repair (default 4; " + "raise with cookies, lower if YouTube rate-limits).") + p.add_argument("--cookies", metavar="FILE", + help="Path to a yt-dlp cookies.txt (authenticated requests avoid " + "YouTube's bot-check / rate limits). Overrides $YTDLP_COOKIES.") + p.add_argument("--cookies-from-browser", metavar="BROWSER", + help="Load YouTube cookies from a local browser, e.g. firefox or " + "chrome. Overrides $YTDLP_COOKIES_FROM_BROWSER.") p.add_argument("--retag-from-path", action="store_true", help="Offline: re-tag artist/title from folder + filename " "(fixes tags damaged by a prior --repair).") @@ -1197,9 +1227,13 @@ def parse_args(): def main(): - global DEBUG + global DEBUG, COOKIES_FILE, COOKIES_FROM_BROWSER args = parse_args() DEBUG = args.debug + if args.cookies: + COOKIES_FILE = args.cookies + if args.cookies_from_browser: + COOKIES_FROM_BROWSER = args.cookies_from_browser query = " ".join(args.query).strip() if args.retag_from_path: diff --git a/server/docker-compose.yml b/server/docker-compose.yml index cd6ad34..fc6678d 100644 --- a/server/docker-compose.yml +++ b/server/docker-compose.yml @@ -13,5 +13,10 @@ services: MUSICFETCH_API_KEY: "${MUSICFETCH_API_KEY}" MUSICFETCH_ROOT: "/media/music" MUSICFETCH_PORT: "6769" + # Optional: authenticated YouTube cookies to avoid bot-check / rate limits. + # Mount a cookies.txt below and point this at it (in-container path). + YTDLP_COOKIES: "${YTDLP_COOKIES:-}" volumes: - /media/music:/media/music + # Uncomment and set host path to supply cookies (see YTDLP_COOKIES above): + # - /path/to/cookies.txt:/cookies.txt:ro diff --git a/tests/test_playlist.py b/tests/test_playlist.py index 2ddbac7..0820f7e 100644 --- a/tests/test_playlist.py +++ b/tests/test_playlist.py @@ -132,3 +132,13 @@ def test_yt_download_single_word_tags_injected_literally(monkeypatch): assert "Cochise" in cmd # A hit album must not be clobbered by the Unknown-Album default. assert "%(album|Unknown Album)s:%(meta_album)s" not in cmd + + +def test_yt_download_passes_cookies(monkeypatch): + captured = {} + monkeypatch.setattr(mf, "COOKIES_FILE", "/cookies.txt") + monkeypatch.setattr(mf, "COOKIES_FROM_BROWSER", "") + monkeypatch.setattr(mf.os, "makedirs", lambda *a, **k: None) + monkeypatch.setattr(mf.subprocess, "run", lambda cmd, **k: captured.update(cmd=cmd) or _CP("")) + mf.yt_download("u", "/tmp/x", "best", False) + assert "--cookies" in captured["cmd"] and "/cookies.txt" in captured["cmd"] diff --git a/tests/test_repair.py b/tests/test_repair.py index 41384f1..2819f19 100644 --- a/tests/test_repair.py +++ b/tests/test_repair.py @@ -367,3 +367,45 @@ def test_repair_library_default_workers_still_works(tmp_path, monkeypatch): (root / "A" / "youtube" / f"T [{YT_ID}].opus").write_text("x") monkeypatch.setattr(mf, "repair_file", lambda p, s, d: ["x"]) assert mf.repair_library(str(root), dry_run=False) == (1, 1) + + +# ---- cookies + error visibility ---- +def test_cookie_args_file_takes_precedence(monkeypatch): + monkeypatch.setattr(mf, "COOKIES_FILE", "/c.txt") + monkeypatch.setattr(mf, "COOKIES_FROM_BROWSER", "firefox") + assert mf._cookie_args() == ["--cookies", "/c.txt"] + + +def test_cookie_args_browser(monkeypatch): + monkeypatch.setattr(mf, "COOKIES_FILE", "") + monkeypatch.setattr(mf, "COOKIES_FROM_BROWSER", "firefox") + assert mf._cookie_args() == ["--cookies-from-browser", "firefox"] + + +def test_cookie_args_none(monkeypatch): + monkeypatch.setattr(mf, "COOKIES_FILE", "") + monkeypatch.setattr(mf, "COOKIES_FROM_BROWSER", "") + assert mf._cookie_args() == [] + + +def test_metadata_fetch_passes_cookies(monkeypatch): + captured = {} + + class _R: + stdout = '{"title": "x"}' + monkeypatch.setattr(mf, "COOKIES_FILE", "/cookies.txt") + monkeypatch.setattr(mf, "COOKIES_FROM_BROWSER", "") + monkeypatch.setattr(mf.subprocess, "run", lambda cmd, **k: captured.update(cmd=cmd) or _R()) + mf.run_yt_dlp_get_metadata("http://u") + assert "--cookies" in captured["cmd"] + assert "/cookies.txt" in captured["cmd"] + + +def test_metadata_fetch_logs_stderr(monkeypatch, capsys): + def boom(cmd, **k): + raise mf.subprocess.CalledProcessError( + 1, cmd, output="", stderr="WARNING: foo\nERROR: Sign in to confirm you're not a bot.") + monkeypatch.setattr(mf.subprocess, "run", boom) + assert mf.run_yt_dlp_get_metadata("http://u") is None + out = capsys.readouterr().err + assert "not a bot" in out # the actionable last stderr line surfaces