musicfetch/musicfetch

#!/usr/bin/env python3
"""MusicFetch v2 — fetch music via Lidarr (preferred) or YouTube Music (yt-dlp).

Accepts a free-form query ("artist", "title", "album", or combos like
"artist - title" / "artist - album") or a URL. Searches Lidarr and YouTube
Music concurrently, shows the top hits in an interactive picker, and acts on
the chosen hit. See README.md for full docs.
"""
import argparse
import json
import os
import re
import subprocess
import sys
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass, field
from typing import Optional

import requests
from requests.exceptions import RequestException, Timeout

# Optional deps — degrade gracefully if missing.
try:
    from ytmusicapi import YTMusic
except ImportError:
    YTMusic = None

try:
    from rich.console import Console
    from rich.table import Table
    from rich.text import Text
    _console = Console()
except ImportError:
    Console = None
    _console = None

# === CONFIGURATION ===
LIDARR_URL = os.environ.get("LIDARR_URL", "http://localhost:8686").rstrip("/")
API_KEY = os.environ.get("LIDARR_API_KEY", "")
DEFAULT_ROOT = os.environ.get("MUSICFETCH_ROOT", "/media/music")

HEADERS = {"X-Api-Key": API_KEY, "Content-Type": "application/json"}

# Runtime flags, populated in main().
DEBUG = False

# Quality choices for --quality.
QUALITY_CHOICES = ["best", "320", "m4a", "opus", "flac"]


def dbg(*a):
    if DEBUG:
        print("[DEBUG]", *a)


def err(*a):
    print("[ERROR]", *a, file=sys.stderr)


# ---------------------------------------------------------------------------
# Hit model
# ---------------------------------------------------------------------------
@dataclass
class Hit:
    source: str            # "lidarr" | "youtube"
    kind: str              # "artist" | "album" | "track"
    title: str = ""        # track/album title (display)
    artist: str = ""
    album: str = ""
    year: str = ""
    thumbnail: str = ""
    payload: dict = field(default_factory=dict)  # raw data needed to act

    @property
    def display_title(self) -> str:
        return self.title or self.album or self.artist


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def is_url(s: str) -> bool:
    return bool(re.match(r"https?://", s))


def lidarr_get(path, params=None, timeout=15):
    resp = requests.get(f"{LIDARR_URL}{path}", headers=HEADERS, params=params, timeout=timeout)
    resp.raise_for_status()
    return resp.json()


def lidarr_post(path, payload, timeout=15):
    resp = requests.post(f"{LIDARR_URL}{path}", headers=HEADERS, json=payload, timeout=timeout)
    resp.raise_for_status()
    return resp.json() if resp.content else {}


# ---------------------------------------------------------------------------
# Lidarr search
# ---------------------------------------------------------------------------
def _year_from_album(album: dict) -> str:
    rd = album.get("releaseDate") or album.get("firstReleaseDate") or ""
    return rd[:4] if rd else ""


def _album_to_hit(album: dict) -> Hit:
    artist = (album.get("artist") or {}).get("artistName") or album.get("artistName") or ""
    return Hit(
        source="lidarr",
        kind="album",
        title=album.get("title", ""),
        artist=artist,
        album=album.get("title", ""),
        year=_year_from_album(album),
        payload={"album": album},
    )


def _artist_to_hit(artist: dict) -> Hit:
    return Hit(
        source="lidarr",
        kind="artist",
        title=artist.get("artistName") or artist.get("title", ""),
        artist=artist.get("artistName") or artist.get("title", ""),
        payload={"artist": artist},
    )


def lidarr_search(query: str, limit: int) -> list[Hit]:
    """Universal search via /api/v1/search; fall back to album+artist lookup."""
    if not API_KEY:
        err("LIDARR_API_KEY not set — skipping Lidarr search.")
        return []
    hits: list[Hit] = []
    try:
        results = lidarr_get("/api/v1/search", params={"term": query})
        for item in results:
            # /search returns objects with 'foreignId' and either 'album' or 'artist'.
            if item.get("album"):
                hits.append(_album_to_hit(item["album"]))
            elif item.get("artist"):
                hits.append(_artist_to_hit(item["artist"]))
        if hits:
            return hits[:limit]
        dbg("/api/v1/search returned nothing useful; trying lookup endpoints.")
    except Timeout:
        err("Lidarr universal search timed out.")
    except RequestException as e:
        dbg(f"/api/v1/search unavailable ({e}); falling back to lookup endpoints.")

    # Fallback: album lookup then artist lookup.
    try:
        for album in lidarr_get("/api/v1/album/lookup", params={"term": query}):
            hits.append(_album_to_hit(album))
    except RequestException as e:
        dbg(f"album/lookup failed: {e}")
    try:
        for artist in lidarr_get("/api/v1/artist/lookup", params={"term": query}):
            hits.append(_artist_to_hit(artist))
    except RequestException as e:
        dbg(f"artist/lookup failed: {e}")
    return hits[:limit]


# ---------------------------------------------------------------------------
# YouTube search (ytmusicapi preferred, yt-dlp scrape fallback)
# ---------------------------------------------------------------------------
def _ytm_thumb(item: dict) -> str:
    thumbs = item.get("thumbnails") or []
    return thumbs[-1]["url"] if thumbs else ""


def _ytm_artists(item: dict) -> str:
    arts = item.get("artists") or []
    return ", ".join(a.get("name", "") for a in arts if a.get("name"))


def youtube_search(query: str, limit: int) -> list[Hit]:
    if YTMusic is not None:
        try:
            return _ytmusic_search(query, limit)
        except Exception as e:  # ytmusicapi raises broadly
            dbg(f"ytmusicapi search failed ({e}); falling back to yt-dlp scrape.")
    return _ytdlp_search(query, limit)


def _ytmusic_search(query: str, limit: int) -> list[Hit]:
    yt = YTMusic()
    hits: list[Hit] = []
    # Songs give us videoId + album + artist; that's the best download target.
    for item in yt.search(query, filter="songs", limit=limit):
        vid = item.get("videoId")
        if not vid:
            continue
        album = (item.get("album") or {}).get("name", "") if isinstance(item.get("album"), dict) else (item.get("album") or "")
        hits.append(Hit(
            source="youtube",
            kind="track",
            title=item.get("title", ""),
            artist=_ytm_artists(item),
            album=album,
            year=str(item.get("year") or ""),
            thumbnail=_ytm_thumb(item),
            payload={"videoId": vid},
        ))
        if len(hits) >= limit:
            break
    return hits


def _ytdlp_search(query: str, limit: int) -> list[Hit]:
    try:
        result = subprocess.run(
            ["yt-dlp", "--flat-playlist", "-J", f"ytsearch{limit}:{query}"],
            capture_output=True, text=True, check=True,
        )
        data = json.loads(result.stdout)
    except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
        err(f"yt-dlp search failed: {e}")
        return []
    hits: list[Hit] = []
    for entry in data.get("entries", []):
        vid = entry.get("id")
        if not vid:
            continue
        hits.append(Hit(
            source="youtube",
            kind="track",
            title=entry.get("title", ""),
            artist=entry.get("uploader") or entry.get("channel") or "",
            year="",
            thumbnail="",
            payload={"videoId": vid},
        ))
    return hits[:limit]


# ---------------------------------------------------------------------------
# Picker / rendering
# ---------------------------------------------------------------------------
def _keywords(query: str) -> list[str]:
    return [w for w in re.split(r"[\s\-]+", query.lower()) if len(w) > 1]


def _ansi_bold_match(s: str, keywords: list[str]) -> str:
    if not s:
        return ""
    out = s
    for kw in keywords:
        out = re.sub(f"({re.escape(kw)})", "\033[1m\\1\033[0m", out, flags=re.IGNORECASE)
    return out


def _rich_match(s: str, keywords: list[str]):
    text = Text(s or "")
    low = (s or "").lower()
    for kw in keywords:
        start = 0
        while True:
            idx = low.find(kw, start)
            if idx == -1:
                break
            text.stylize("bold", idx, idx + len(kw))
            start = idx + len(kw)
    return text


def render_picker(hits: list[Hit], query: str, yt_first: bool) -> None:
    keywords = _keywords(query)

    if _console is not None:
        table = Table(show_lines=False, expand=False)
        table.add_column("#", justify="right", style="cyan")
        table.add_column("Src")
        table.add_column("Artist")
        table.add_column("Album / Title")
        table.add_column("Year")
        table.add_column("Type")
        for i, h in enumerate(hits, 1):
            src = "[green]LID[/]" if h.source == "lidarr" else "[red]YT[/]"
            at = h.album if h.kind == "album" else h.display_title
            table.add_row(
                str(i), src,
                _rich_match(h.artist, keywords),
                _rich_match(at, keywords),
                h.year, h.kind,
            )
        _console.print(table)
    else:
        for i, h in enumerate(hits, 1):
            src = "LID" if h.source == "lidarr" else "YT "
            at = h.album if h.kind == "album" else h.display_title
            print(f"{i:>3}  {src}  {_ansi_bold_match(h.artist, keywords):<30} "
                  f"{_ansi_bold_match(at, keywords):<40} {h.year:<6} {h.kind}")


def pick(hits: list[Hit], query: str, noninteractive: bool, yt_first: bool) -> Optional[Hit]:
    if not hits:
        return None
    if noninteractive:
        primary = "youtube" if yt_first else "lidarr"
        for h in hits:
            if h.source == primary:
                return h
        return hits[0]

    render_picker(hits, query, yt_first)
    while True:
        try:
            raw = input("Pick a number (q to quit): ").strip()
        except (EOFError, KeyboardInterrupt):
            print()
            return None
        if raw.lower() in ("q", "quit", ""):
            return None
        if raw.isdigit() and 1 <= int(raw) <= len(hits):
            return hits[int(raw) - 1]
        print("Invalid choice.")


# ---------------------------------------------------------------------------
# Lidarr actions
# ---------------------------------------------------------------------------
def get_existing_artist(name: str) -> Optional[dict]:
    try:
        for artist in lidarr_get("/api/v1/artist", timeout=10):
            if artist.get("artistName", "").lower() == name.lower():
                return artist
    except RequestException as e:
        dbg(f"existing artist check failed: {e}")
    return None


def get_default_metadata_profile_id() -> int:
    try:
        profiles = lidarr_get("/api/v1/metadataprofile", timeout=10)
        if profiles:
            return profiles[0]["id"]
    except RequestException as e:
        dbg(f"metadataprofile fetch failed: {e}")
    return 1


def add_artist(meta: dict, root: str, search_all: bool, dry_run: bool) -> Optional[dict]:
    foreign_id = meta.get("foreignArtistId") or meta.get("id")
    name = meta.get("artistName") or meta.get("title")
    if not foreign_id or not name:
        err("Missing foreignArtistId/artistName; cannot add artist.")
        return None
    payload = {
        "foreignArtistId": foreign_id,
        "artistName": name,
        "qualityProfileId": 1,
        "metadataProfileId": get_default_metadata_profile_id(),
        "rootFolderPath": root,
        "monitored": True,
        "addOptions": {"searchForMissingAlbums": search_all, "monitor": "all"},
    }
    if dry_run:
        print(f"[dry-run] POST /api/v1/artist  {json.dumps(payload)}")
        return {"id": -1, "artistName": name, **payload}
    try:
        return lidarr_post("/api/v1/artist", payload)
    except RequestException as e:
        err(f"add_artist failed: {e}")
        return None


def ensure_album_in_library(album: dict, root: str, search_all: bool, dry_run: bool) -> Optional[dict]:
    """Return a library album dict (with numeric id). Adds artist if needed."""
    # Already in library?
    if album.get("id") and isinstance(album.get("id"), int) and album.get("id") > 0 and not album.get("foreignAlbumId", "").startswith("lookup"):
        # Heuristic: lookup results carry a 0/None id; library albums carry real ids.
        if album.get("artistId"):
            return album

    artist_obj = album.get("artist") or {}
    artist_name = artist_obj.get("artistName") or album.get("artistName") or ""
    existing = get_existing_artist(artist_name) if artist_name else None
    if not existing:
        print(f"Adding artist '{artist_name}' to Lidarr...")
        existing = add_artist(artist_obj or {"artistName": artist_name,
                                             "foreignArtistId": artist_obj.get("foreignArtistId")},
                              root, search_all, dry_run)
        if not existing:
            return None

    if dry_run:
        print(f"[dry-run] would resolve album '{album.get('title')}' under artist id {existing.get('id')}")
        return {**album, "id": album.get("id") or -1, "artistId": existing.get("id")}

    # Find the album in the (now-present) artist's albums by title match.
    try:
        albums = lidarr_get("/api/v1/album", params={"artistId": existing["id"]}, timeout=15)
        for a in albums:
            if a.get("title", "").lower() == album.get("title", "").lower():
                return a
        if albums:
            return albums[0]
    except RequestException as e:
        dbg(f"album list fetch failed: {e}")
    return None


def release_available(album_id: int) -> bool:
    """Interactive search: does any indexer have a release for this album?"""
    try:
        releases = lidarr_get("/api/v1/release", params={"albumId": album_id}, timeout=90)
        dbg(f"interactive search returned {len(releases)} releases for album {album_id}")
        return len(releases) > 0
    except RequestException as e:
        dbg(f"release search failed: {e}")
        return False


def trigger_album_search(album_id: int, dry_run: bool):
    if dry_run:
        print(f"[dry-run] POST /api/v1/command  AlbumSearch albumIds=[{album_id}]")
        return
    lidarr_post("/api/v1/command", {"name": "AlbumSearch", "albumIds": [album_id]})


def act_lidarr_album(hit: Hit, root: str, search_all: bool, dry_run: bool) -> bool:
    """Returns True if Lidarr handled it; False to fall through to YouTube."""
    album = hit.payload["album"]
    lib_album = ensure_album_in_library(album, root, search_all, dry_run)
    if not lib_album:
        err("Could not resolve album in Lidarr.")
        return False
    album_id = lib_album.get("id")
    if dry_run:
        print(f"[dry-run] would interactive-search album id {album_id}; "
              f"if no release found, fall through to YouTube.")
        trigger_album_search(album_id, dry_run)
        return True

    if isinstance(album_id, int) and album_id > 0 and release_available(album_id):
        print(f"Indexer release available — triggering Lidarr grab for '{hit.album}'.")
        trigger_album_search(album_id, dry_run)
        return True
    print("No indexer release found in Lidarr — falling through to YouTube.")
    return False


def act_lidarr_artist(hit: Hit, root: str, search_all: bool, dry_run: bool) -> bool:
    artist = hit.payload["artist"]
    print(f"Adding artist '{hit.artist}' to Lidarr...")
    result = add_artist(artist, root, search_all, dry_run)
    return result is not None


# ---------------------------------------------------------------------------
# YouTube download
# ---------------------------------------------------------------------------
def _quality_args(quality: str) -> list[str]:
    if quality == "best":
        # bestaudio, prefer mp3 320 only if extraction needs a container.
        return ["-f", "bestaudio/best", "-x", "--audio-quality", "0"]
    if quality == "320":
        return ["-f", "bestaudio/best", "-x", "--audio-format", "mp3", "--audio-quality", "0"]
    if quality in ("m4a", "opus", "flac"):
        return ["-f", "bestaudio/best", "-x", "--audio-format", quality, "--audio-quality", "0"]
    return ["-f", "bestaudio/best", "-x"]


def yt_download(url_or_query: str, target_folder: str, quality: str, dry_run: bool,
                hit: Optional[Hit] = None):
    cmd = ["yt-dlp",
           *_quality_args(quality),
           "--embed-metadata",
           "--embed-thumbnail",
           "--no-playlist",
           "-P", target_folder]
    # Override tags from the chosen hit so they don't rely on scraped titles.
    if hit:
        if hit.artist:
            cmd += ["--replace-in-metadata", "artist", ".*", hit.artist]
        if hit.album:
            cmd += ["--parse-metadata", f"{hit.album}:%(album)s"]
        if hit.title:
            cmd += ["--parse-metadata", f"{hit.title}:%(title)s"]
        if hit.year:
            cmd += ["--parse-metadata", f"{hit.year}:%(release_year)s"]
    cmd.append(url_or_query)

    if dry_run:
        print(f"[dry-run] mkdir -p {target_folder}")
        print(f"[dry-run] {' '.join(cmd)}")
        return
    os.makedirs(target_folder, exist_ok=True)
    print(f"Downloading via yt-dlp -> {target_folder}")
    subprocess.run(cmd)


def act_youtube(hit: Hit, root: str, quality: str, dry_run: bool):
    vid = hit.payload.get("videoId")
    # Prefer YouTube Music URL for correct album art / topic metadata.
    url = f"https://music.youtube.com/watch?v={vid}" if vid else f"ytsearch1:{hit.artist} {hit.title}"
    artist_dir = hit.artist.split(",")[0].strip() or "Unknown Artist"
    target = os.path.join(root, artist_dir, "youtube")
    yt_download(url, target, quality, dry_run, hit=hit)


# ---------------------------------------------------------------------------
# URL path
# ---------------------------------------------------------------------------
def run_yt_dlp_get_metadata(url: str) -> Optional[dict]:
    try:
        result = subprocess.run(
            ["yt-dlp", "-j", "--no-playlist", url],
            capture_output=True, text=True, check=True,
        )
        return json.loads(result.stdout)
    except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
        err(f"yt-dlp metadata extraction failed: {e}")
        return None


def get_artist_from_metadata(meta: dict) -> str:
    for key in ("artist", "creator", "uploader", "channel"):
        if meta.get(key):
            return meta[key]
    if "title" in meta and " - " in meta["title"]:
        return meta["title"].split(" - ", 1)[0].strip()
    return "Unknown Artist"


def handle_url(url: str, root: str, quality: str, dry_run: bool):
    meta = run_yt_dlp_get_metadata(url)
    artist = get_artist_from_metadata(meta) if meta else "Unknown Artist"
    target = os.path.join(root, artist, "youtube")
    yt_download(url, target, quality, dry_run)


# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def build_combined_hits(query, limit, yt_first, lidarr_only, yt_only) -> list[Hit]:
    lidarr_hits: list[Hit] = []
    yt_hits: list[Hit] = []
    with ThreadPoolExecutor(max_workers=2) as ex:
        f_lid = None if yt_only else ex.submit(lidarr_search, query, limit)
        f_yt = None if lidarr_only else ex.submit(youtube_search, query, limit)
        if f_lid:
            lidarr_hits = f_lid.result()
        if f_yt:
            yt_hits = f_yt.result()
    return (yt_hits + lidarr_hits) if yt_first else (lidarr_hits + yt_hits)


def parse_args():
    p = argparse.ArgumentParser(
        prog="musicfetch",
        description="Fetch music via Lidarr (preferred) or YouTube Music.")
    p.add_argument("query", nargs="+", help="Free-form query or a URL.")
    p.add_argument("-n", "--noninteractive", action="store_true",
                   help="Auto-pick the top hit, no prompt.")
    p.add_argument("-s", "--ytsearch", action="store_true",
                   help="YouTube first instead of Lidarr first.")
    p.add_argument("-d", "--dry-run", action="store_true",
                   help="Show actions without executing them.")
    p.add_argument("-q", "--quality", choices=QUALITY_CHOICES, default="best",
                   help="Audio quality/format (default: best).")
    p.add_argument("--limit", type=int, default=10, help="Hits per source (default 10).")
    p.add_argument("--lidarr-only", action="store_true", help="Skip YouTube.")
    p.add_argument("--yt-only", action="store_true", help="Skip Lidarr.")
    p.add_argument("-o", "--root", default=DEFAULT_ROOT, help=f"Output root (default {DEFAULT_ROOT}).")
    p.add_argument("--search-all", action="store_true",
                   help="Search all albums when adding an artist to Lidarr.")
    p.add_argument("--debug", action="store_true", help="Verbose output.")
    return p.parse_args()


def main():
    global DEBUG
    args = parse_args()
    DEBUG = args.debug
    query = " ".join(args.query).strip()

    if args.lidarr_only and args.yt_only:
        err("--lidarr-only and --yt-only are mutually exclusive.")
        sys.exit(1)

    if is_url(query):
        handle_url(query, args.root, args.quality, args.dry_run)
        return

    hits = build_combined_hits(query, args.limit, args.ytsearch,
                               args.lidarr_only, args.yt_only)
    if not hits:
        print("No hits found from any source.")
        sys.exit(1)

    chosen = pick(hits, query, args.noninteractive, args.ytsearch)
    if not chosen:
        print("Nothing selected.")
        return

    if chosen.source == "lidarr":
        if chosen.kind == "album":
            handled = act_lidarr_album(chosen, args.root, args.search_all, args.dry_run)
            if not handled and not args.lidarr_only:
                # Fall through to the top YouTube hit for the same query.
                yt_fallback = next((h for h in hits if h.source == "youtube"), None)
                if yt_fallback:
                    print("Using top YouTube hit as fallback.")
                    act_youtube(yt_fallback, args.root, args.quality, args.dry_run)
                else:
                    print("No YouTube fallback available.")
        else:
            act_lidarr_artist(chosen, args.root, args.search_all, args.dry_run)
    else:
        act_youtube(chosen, args.root, args.quality, args.dry_run)


if __name__ == "__main__":
    main()