fix: _is_direct_url label-boundary host match (no notyoutube.com false positive)

Review finding: bare endswith routed look-alike hosts to the direct yt-dlp
path. Match on a domain-label boundary and drop the redundant _DIRECT_HOSTS.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-13 22:02:57 -07:00
parent 32acd038c8
commit b899d75930
2 changed files with 18 additions and 8 deletions

View File

@@ -746,21 +746,17 @@ def _is_youtube_playlist_url(url: str) -> bool:
return "list" in qs and "v" not in qs
_DIRECT_HOSTS = ("youtube.com", "youtu.be", "music.youtube.com",
"soundcloud.com", "api.soundcloud.com")
_DIRECT_DOMAINS = ("youtube.com", "youtu.be", "soundcloud.com")
def _is_direct_url(url: str) -> bool:
"""True for links yt-dlp downloads well directly (YouTube, SoundCloud).
These skip Odesli resolution and use the existing handle_url path."""
These skip Odesli resolution and use the existing handle_url path. Matches on
a label boundary so look-alikes (notyoutube.com) don't slip through."""
if not is_url(url):
return False
host = (urlparse(url).hostname or "").lower()
if host.startswith("www."):
host = host[4:]
if host.endswith(("youtube.com", "youtu.be", "soundcloud.com")):
return True
return host in _DIRECT_HOSTS
return any(host == d or host.endswith("." + d) for d in _DIRECT_DOMAINS)
def _ytmusic_playlist(pid: str) -> tuple[str, list[Hit]]:

View File

@@ -88,6 +88,20 @@ def test_is_direct_url_youtube_playlist_true():
assert mf._is_direct_url("https://www.youtube.com/playlist?list=PLabc")
def test_is_direct_url_lookalike_hosts_false():
# Trailing-substring look-alikes must NOT be treated as direct (label boundary).
assert not mf._is_direct_url("https://notyoutube.com/watch?v=abc")
assert not mf._is_direct_url("https://myyoutube.com/x")
assert not mf._is_direct_url("https://evilyoutu.be/x")
assert not mf._is_direct_url("https://youtube.com.evil.com/x")
def test_is_direct_url_subdomains_true():
assert mf._is_direct_url("https://m.youtube.com/watch?v=abc")
assert mf._is_direct_url("https://on.soundcloud.com/x")
assert mf._is_direct_url("https://api.soundcloud.com/tracks/1")
def _resolved(yt="https://music.youtube.com/watch?v=YYY"):
return mf.Resolved(title="Bloom", artist="ODESZA",
thumb="https://img/cover.jpg", youtube_url=yt)