feat: throttle yt-dlp requests to dodge YouTube rate-limiting

Bulk metadata fetches trip YouTube's per-session rate limit ("This
content isn't available, try again later"), failing even single-worker
runs after a burst. Add --sleep-requests between extraction calls (and a
randomized --sleep-interval before downloads), default 1s, tunable via
--sleep / $YTDLP_SLEEP (0 disables). Applied to metadata, search, probe,
and download yt-dlp invocations.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-13 23:39:08 -07:00
parent 47f3482192
commit b26e321926

View File

@@ -54,6 +54,13 @@ DEBUG = False
COOKIES_FILE = os.environ.get("YTDLP_COOKIES", "") COOKIES_FILE = os.environ.get("YTDLP_COOKIES", "")
COOKIES_FROM_BROWSER = os.environ.get("YTDLP_COOKIES_FROM_BROWSER", "") COOKIES_FROM_BROWSER = os.environ.get("YTDLP_COOKIES_FROM_BROWSER", "")
# Per-request throttle. YouTube rate-limits a session after a burst of metadata
# fetches ("This content isn't available, try again later"), tripping even
# single-worker runs after a while. Sleeping between requests (and randomly
# before each download) keeps the session under the limit. Seconds; 0 disables.
# Tunable via env or --sleep; default 1s mirrors yt-dlp's own `-t sleep` advice.
SLEEP_REQUESTS = os.environ.get("YTDLP_SLEEP", "1")
def _cookie_args() -> list: def _cookie_args() -> list:
"""yt-dlp cookie flags (file wins over browser); empty when neither is set.""" """yt-dlp cookie flags (file wins over browser); empty when neither is set."""
@@ -63,6 +70,22 @@ def _cookie_args() -> list:
return ["--cookies-from-browser", COOKIES_FROM_BROWSER] return ["--cookies-from-browser", COOKIES_FROM_BROWSER]
return [] return []
def _sleep_args(download: bool = False) -> list:
"""yt-dlp throttle flags; empty when SLEEP_REQUESTS<=0. Sleeps between
extraction HTTP requests; for downloads also adds a randomized pre-download
interval (secs..2*secs) to further space out hits to YouTube."""
try:
secs = float(SLEEP_REQUESTS)
except (TypeError, ValueError):
secs = 0.0
if secs <= 0:
return []
args = ["--sleep-requests", str(secs)]
if download:
args += ["--sleep-interval", str(secs), "--max-sleep-interval", str(secs * 2)]
return args
# Quality choices for --quality. # Quality choices for --quality.
QUALITY_CHOICES = ["best", "320", "m4a", "opus", "flac"] QUALITY_CHOICES = ["best", "320", "m4a", "opus", "flac"]
@@ -368,7 +391,7 @@ def _ytmusic_search(query: str, limit: int) -> list[Hit]:
def _ytdlp_search(query: str, limit: int) -> list[Hit]: def _ytdlp_search(query: str, limit: int) -> list[Hit]:
try: try:
result = subprocess.run( result = subprocess.run(
["yt-dlp", *_cookie_args(), "--flat-playlist", "-J", f"ytsearch{limit}:{query}"], ["yt-dlp", *_cookie_args(), *_sleep_args(), "--flat-playlist", "-J", f"ytsearch{limit}:{query}"],
capture_output=True, text=True, check=True, capture_output=True, text=True, check=True,
) )
data = json.loads(result.stdout) data = json.loads(result.stdout)
@@ -640,6 +663,7 @@ def yt_download(url_or_query: str, target_folder: Optional[str], quality: str, d
hit: Optional[Hit] = None, outtmpl: Optional[str] = None): hit: Optional[Hit] = None, outtmpl: Optional[str] = None):
cmd = ["yt-dlp", cmd = ["yt-dlp",
*_cookie_args(), *_cookie_args(),
*_sleep_args(download=True),
*_quality_args(quality), *_quality_args(quality),
"--embed-metadata", "--embed-metadata",
"--embed-thumbnail", "--embed-thumbnail",
@@ -827,7 +851,7 @@ def probe_url(url: str) -> tuple[str, str, list[Hit]]:
if hits: if hits:
return "playlist", title, hits return "playlist", title, hits
try: try:
result = subprocess.run(["yt-dlp", *_cookie_args(), "--flat-playlist", "-J", url], result = subprocess.run(["yt-dlp", *_cookie_args(), *_sleep_args(), "--flat-playlist", "-J", url],
capture_output=True, text=True, check=True) capture_output=True, text=True, check=True)
data = json.loads(result.stdout) data = json.loads(result.stdout)
except (subprocess.CalledProcessError, json.JSONDecodeError) as e: except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
@@ -866,7 +890,7 @@ def download_single(url: str, root: str, quality: str, dry_run: bool) -> dict:
def run_yt_dlp_get_metadata(url: str, extra_args=None) -> Optional[dict]: def run_yt_dlp_get_metadata(url: str, extra_args=None) -> Optional[dict]:
cmd = ["yt-dlp", *_cookie_args(), "-j", "--no-playlist", *(extra_args or []), url] cmd = ["yt-dlp", *_cookie_args(), *_sleep_args(), "-j", "--no-playlist", *(extra_args or []), url]
try: try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True) result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return json.loads(result.stdout) return json.loads(result.stdout)
@@ -1304,6 +1328,10 @@ def parse_args():
p.add_argument("--workers", type=int, default=4, p.add_argument("--workers", type=int, default=4,
help="Parallel yt-dlp metadata fetches during --repair (default 4; " help="Parallel yt-dlp metadata fetches during --repair (default 4; "
"raise with cookies, lower if YouTube rate-limits).") "raise with cookies, lower if YouTube rate-limits).")
p.add_argument("--sleep", type=float, metavar="SECS",
help="Seconds to sleep between yt-dlp requests (0 disables). "
"Avoids YouTube rate-limiting on bulk runs. Overrides "
"$YTDLP_SLEEP (default 1).")
p.add_argument("--cookies", metavar="FILE", p.add_argument("--cookies", metavar="FILE",
help="Path to a yt-dlp cookies.txt (authenticated requests avoid " help="Path to a yt-dlp cookies.txt (authenticated requests avoid "
"YouTube's bot-check / rate limits). Overrides $YTDLP_COOKIES.") "YouTube's bot-check / rate limits). Overrides $YTDLP_COOKIES.")
@@ -1341,9 +1369,11 @@ def _dispatch_chosen(chosen: Hit, hits: list[Hit], root: str, quality: str,
def main(): def main():
global DEBUG, COOKIES_FILE, COOKIES_FROM_BROWSER global DEBUG, COOKIES_FILE, COOKIES_FROM_BROWSER, SLEEP_REQUESTS
args = parse_args() args = parse_args()
DEBUG = args.debug DEBUG = args.debug
if args.sleep is not None:
SLEEP_REQUESTS = args.sleep
if args.cookies: if args.cookies:
COOKIES_FILE = args.cookies COOKIES_FILE = args.cookies
if args.cookies_from_browser: if args.cookies_from_browser: