From b26e32192634ab0f9002f7247e017aaebaadd019 Mon Sep 17 00:00:00 2001 From: zebra Date: Sat, 13 Jun 2026 23:39:08 -0700 Subject: [PATCH] feat: throttle yt-dlp requests to dodge YouTube rate-limiting Bulk metadata fetches trip YouTube's per-session rate limit ("This content isn't available, try again later"), failing even single-worker runs after a burst. Add --sleep-requests between extraction calls (and a randomized --sleep-interval before downloads), default 1s, tunable via --sleep / $YTDLP_SLEEP (0 disables). Applied to metadata, search, probe, and download yt-dlp invocations. Co-Authored-By: Claude Opus 4.8 --- musicfetch | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/musicfetch b/musicfetch index 28af6ec..8863e3c 100755 --- a/musicfetch +++ b/musicfetch @@ -54,6 +54,13 @@ DEBUG = False COOKIES_FILE = os.environ.get("YTDLP_COOKIES", "") COOKIES_FROM_BROWSER = os.environ.get("YTDLP_COOKIES_FROM_BROWSER", "") +# Per-request throttle. YouTube rate-limits a session after a burst of metadata +# fetches ("This content isn't available, try again later"), tripping even +# single-worker runs after a while. Sleeping between requests (and randomly +# before each download) keeps the session under the limit. Seconds; 0 disables. +# Tunable via env or --sleep; default 1s mirrors yt-dlp's own `-t sleep` advice. +SLEEP_REQUESTS = os.environ.get("YTDLP_SLEEP", "1") + def _cookie_args() -> list: """yt-dlp cookie flags (file wins over browser); empty when neither is set.""" @@ -63,6 +70,22 @@ def _cookie_args() -> list: return ["--cookies-from-browser", COOKIES_FROM_BROWSER] return [] + +def _sleep_args(download: bool = False) -> list: + """yt-dlp throttle flags; empty when SLEEP_REQUESTS<=0. Sleeps between + extraction HTTP requests; for downloads also adds a randomized pre-download + interval (secs..2*secs) to further space out hits to YouTube.""" + try: + secs = float(SLEEP_REQUESTS) + except (TypeError, ValueError): + secs = 0.0 + if secs <= 0: + return [] + args = ["--sleep-requests", str(secs)] + if download: + args += ["--sleep-interval", str(secs), "--max-sleep-interval", str(secs * 2)] + return args + # Quality choices for --quality. QUALITY_CHOICES = ["best", "320", "m4a", "opus", "flac"] @@ -368,7 +391,7 @@ def _ytmusic_search(query: str, limit: int) -> list[Hit]: def _ytdlp_search(query: str, limit: int) -> list[Hit]: try: result = subprocess.run( - ["yt-dlp", *_cookie_args(), "--flat-playlist", "-J", f"ytsearch{limit}:{query}"], + ["yt-dlp", *_cookie_args(), *_sleep_args(), "--flat-playlist", "-J", f"ytsearch{limit}:{query}"], capture_output=True, text=True, check=True, ) data = json.loads(result.stdout) @@ -640,6 +663,7 @@ def yt_download(url_or_query: str, target_folder: Optional[str], quality: str, d hit: Optional[Hit] = None, outtmpl: Optional[str] = None): cmd = ["yt-dlp", *_cookie_args(), + *_sleep_args(download=True), *_quality_args(quality), "--embed-metadata", "--embed-thumbnail", @@ -827,7 +851,7 @@ def probe_url(url: str) -> tuple[str, str, list[Hit]]: if hits: return "playlist", title, hits try: - result = subprocess.run(["yt-dlp", *_cookie_args(), "--flat-playlist", "-J", url], + result = subprocess.run(["yt-dlp", *_cookie_args(), *_sleep_args(), "--flat-playlist", "-J", url], capture_output=True, text=True, check=True) data = json.loads(result.stdout) except (subprocess.CalledProcessError, json.JSONDecodeError) as e: @@ -866,7 +890,7 @@ def download_single(url: str, root: str, quality: str, dry_run: bool) -> dict: def run_yt_dlp_get_metadata(url: str, extra_args=None) -> Optional[dict]: - cmd = ["yt-dlp", *_cookie_args(), "-j", "--no-playlist", *(extra_args or []), url] + cmd = ["yt-dlp", *_cookie_args(), *_sleep_args(), "-j", "--no-playlist", *(extra_args or []), url] try: result = subprocess.run(cmd, capture_output=True, text=True, check=True) return json.loads(result.stdout) @@ -1304,6 +1328,10 @@ def parse_args(): p.add_argument("--workers", type=int, default=4, help="Parallel yt-dlp metadata fetches during --repair (default 4; " "raise with cookies, lower if YouTube rate-limits).") + p.add_argument("--sleep", type=float, metavar="SECS", + help="Seconds to sleep between yt-dlp requests (0 disables). " + "Avoids YouTube rate-limiting on bulk runs. Overrides " + "$YTDLP_SLEEP (default 1).") p.add_argument("--cookies", metavar="FILE", help="Path to a yt-dlp cookies.txt (authenticated requests avoid " "YouTube's bot-check / rate limits). Overrides $YTDLP_COOKIES.") @@ -1341,9 +1369,11 @@ def _dispatch_chosen(chosen: Hit, hits: list[Hit], root: str, quality: str, def main(): - global DEBUG, COOKIES_FILE, COOKIES_FROM_BROWSER + global DEBUG, COOKIES_FILE, COOKIES_FROM_BROWSER, SLEEP_REQUESTS args = parse_args() DEBUG = args.debug + if args.sleep is not None: + SLEEP_REQUESTS = args.sleep if args.cookies: COOKIES_FILE = args.cookies if args.cookies_from_browser: