feat: throttle yt-dlp requests to dodge YouTube rate-limiting
Bulk metadata fetches trip YouTube's per-session rate limit ("This
content isn't available, try again later"), failing even single-worker
runs after a burst. Add --sleep-requests between extraction calls (and a
randomized --sleep-interval before downloads), default 1s, tunable via
--sleep / $YTDLP_SLEEP (0 disables). Applied to metadata, search, probe,
and download yt-dlp invocations.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
38
musicfetch
38
musicfetch
@@ -54,6 +54,13 @@ DEBUG = False
|
||||
COOKIES_FILE = os.environ.get("YTDLP_COOKIES", "")
|
||||
COOKIES_FROM_BROWSER = os.environ.get("YTDLP_COOKIES_FROM_BROWSER", "")
|
||||
|
||||
# Per-request throttle. YouTube rate-limits a session after a burst of metadata
|
||||
# fetches ("This content isn't available, try again later"), tripping even
|
||||
# single-worker runs after a while. Sleeping between requests (and randomly
|
||||
# before each download) keeps the session under the limit. Seconds; 0 disables.
|
||||
# Tunable via env or --sleep; default 1s mirrors yt-dlp's own `-t sleep` advice.
|
||||
SLEEP_REQUESTS = os.environ.get("YTDLP_SLEEP", "1")
|
||||
|
||||
|
||||
def _cookie_args() -> list:
|
||||
"""yt-dlp cookie flags (file wins over browser); empty when neither is set."""
|
||||
@@ -63,6 +70,22 @@ def _cookie_args() -> list:
|
||||
return ["--cookies-from-browser", COOKIES_FROM_BROWSER]
|
||||
return []
|
||||
|
||||
|
||||
def _sleep_args(download: bool = False) -> list:
|
||||
"""yt-dlp throttle flags; empty when SLEEP_REQUESTS<=0. Sleeps between
|
||||
extraction HTTP requests; for downloads also adds a randomized pre-download
|
||||
interval (secs..2*secs) to further space out hits to YouTube."""
|
||||
try:
|
||||
secs = float(SLEEP_REQUESTS)
|
||||
except (TypeError, ValueError):
|
||||
secs = 0.0
|
||||
if secs <= 0:
|
||||
return []
|
||||
args = ["--sleep-requests", str(secs)]
|
||||
if download:
|
||||
args += ["--sleep-interval", str(secs), "--max-sleep-interval", str(secs * 2)]
|
||||
return args
|
||||
|
||||
# Quality choices for --quality.
|
||||
QUALITY_CHOICES = ["best", "320", "m4a", "opus", "flac"]
|
||||
|
||||
@@ -368,7 +391,7 @@ def _ytmusic_search(query: str, limit: int) -> list[Hit]:
|
||||
def _ytdlp_search(query: str, limit: int) -> list[Hit]:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["yt-dlp", *_cookie_args(), "--flat-playlist", "-J", f"ytsearch{limit}:{query}"],
|
||||
["yt-dlp", *_cookie_args(), *_sleep_args(), "--flat-playlist", "-J", f"ytsearch{limit}:{query}"],
|
||||
capture_output=True, text=True, check=True,
|
||||
)
|
||||
data = json.loads(result.stdout)
|
||||
@@ -640,6 +663,7 @@ def yt_download(url_or_query: str, target_folder: Optional[str], quality: str, d
|
||||
hit: Optional[Hit] = None, outtmpl: Optional[str] = None):
|
||||
cmd = ["yt-dlp",
|
||||
*_cookie_args(),
|
||||
*_sleep_args(download=True),
|
||||
*_quality_args(quality),
|
||||
"--embed-metadata",
|
||||
"--embed-thumbnail",
|
||||
@@ -827,7 +851,7 @@ def probe_url(url: str) -> tuple[str, str, list[Hit]]:
|
||||
if hits:
|
||||
return "playlist", title, hits
|
||||
try:
|
||||
result = subprocess.run(["yt-dlp", *_cookie_args(), "--flat-playlist", "-J", url],
|
||||
result = subprocess.run(["yt-dlp", *_cookie_args(), *_sleep_args(), "--flat-playlist", "-J", url],
|
||||
capture_output=True, text=True, check=True)
|
||||
data = json.loads(result.stdout)
|
||||
except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
|
||||
@@ -866,7 +890,7 @@ def download_single(url: str, root: str, quality: str, dry_run: bool) -> dict:
|
||||
|
||||
|
||||
def run_yt_dlp_get_metadata(url: str, extra_args=None) -> Optional[dict]:
|
||||
cmd = ["yt-dlp", *_cookie_args(), "-j", "--no-playlist", *(extra_args or []), url]
|
||||
cmd = ["yt-dlp", *_cookie_args(), *_sleep_args(), "-j", "--no-playlist", *(extra_args or []), url]
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
return json.loads(result.stdout)
|
||||
@@ -1304,6 +1328,10 @@ def parse_args():
|
||||
p.add_argument("--workers", type=int, default=4,
|
||||
help="Parallel yt-dlp metadata fetches during --repair (default 4; "
|
||||
"raise with cookies, lower if YouTube rate-limits).")
|
||||
p.add_argument("--sleep", type=float, metavar="SECS",
|
||||
help="Seconds to sleep between yt-dlp requests (0 disables). "
|
||||
"Avoids YouTube rate-limiting on bulk runs. Overrides "
|
||||
"$YTDLP_SLEEP (default 1).")
|
||||
p.add_argument("--cookies", metavar="FILE",
|
||||
help="Path to a yt-dlp cookies.txt (authenticated requests avoid "
|
||||
"YouTube's bot-check / rate limits). Overrides $YTDLP_COOKIES.")
|
||||
@@ -1341,9 +1369,11 @@ def _dispatch_chosen(chosen: Hit, hits: list[Hit], root: str, quality: str,
|
||||
|
||||
|
||||
def main():
|
||||
global DEBUG, COOKIES_FILE, COOKIES_FROM_BROWSER
|
||||
global DEBUG, COOKIES_FILE, COOKIES_FROM_BROWSER, SLEEP_REQUESTS
|
||||
args = parse_args()
|
||||
DEBUG = args.debug
|
||||
if args.sleep is not None:
|
||||
SLEEP_REQUESTS = args.sleep
|
||||
if args.cookies:
|
||||
COOKIES_FILE = args.cookies
|
||||
if args.cookies_from_browser:
|
||||
|
||||
Reference in New Issue
Block a user