#!/usr/bin/env python3
"""
PLAYLIST PIRATE v2.0
CSV → resolve → search → build → download
Pipeline:
resolve Parse CSV(s) into *-playlist.md tracking files
search Find YouTube URLs via yt-dlp (resumable, no API key)
build Generate static HTML pages with embedded players
download Download tracks as MP3 (opt-in)
Each step is discrete. Nothing runs automatically.
"""
import csv
import json
import re
import time
import sys
import os
import random
import argparse
import urllib.parse
import urllib.request
from pathlib import Path
from datetime import datetime
from typing import List, Optional, Tuple
# ─── Dependency Check ─────────────────────────────────────────────────────────
missing = []
try:
import yt_dlp
except ImportError:
missing.append("yt-dlp")
try:
from rich.console import Console
from rich.theme import Theme
except ImportError:
missing.append("rich")
try:
from mutagen.id3 import ID3, TIT2, TPE1, TSRC, TALB, ID3NoHeaderError
except ImportError:
missing.append("mutagen")
if missing:
print(f"[FATAL] Missing: {', '.join(missing)}")
print("Install: pip install yt-dlp rich mutagen")
sys.exit(1)
# ─── Terminal ─────────────────────────────────────────────────────────────────
console = Console(
theme=Theme({
"ok": "green",
"accent": "bold bright_green",
"dim": "dim green",
"warn": "yellow",
"err": "bold red",
}),
style="green on black",
highlight=False,
)
LOGO = """\
██████╗ ██╗ █████╗ ██╗ ██╗██╗ ██╗███████╗████████╗
██╔══██╗██║ ██╔══██╗╚██╗ ██╔╝██║ ██║██╔════╝╚══██╔══╝
██████╔╝██║ ███████║ ╚████╔╝ ██║ ██║███████╗ ██║
██╔═══╝ ██║ ██╔══██║ ╚██╔╝ ██║ ██║╚════██║ ██║
██║ ███████╗██║ ██║ ██║ ███████╗██║███████║ ██║
╚═╝ ╚══════╝╚═╝ ╚═╝ ╚═╝ ╚══════╝╚═╝╚══════╝ ╚═╝
██████╗ ██╗██████╗ █████╗ ████████╗███████╗
██╔══██╗██║██╔══██╗██╔══██╗╚══██╔══╝██╔════╝
██████╔╝██║██████╔╝███████║ ██║ █████╗
██╔═══╝ ██║██╔══██╗██╔══██║ ██║ ██╔══╝
██║ ██║██║ ██║██║ ██║ ██║ ███████╗
╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚══════╝"""
DIVIDER = "─" * 60
def boot(module: str):
console.print(f"\n[accent]{LOGO}[/accent]")
console.print(f"[dim]PLAYLIST PIRATE v2.0 // {module.upper()} MODULE[/dim]")
console.print(f"[dim]{DIVIDER}[/dim]\n")
time.sleep(0.2)
def out(msg: str, style: str = "ok"):
console.print(msg, style=style)
# ─── Fire Spectrum ─────────────────────────────────────────────────────────────
# Each playlist gets a unique fire accent. Assigned by sorted alphabetical index.
FIRE_SPECTRUM = [
("#ff3300", "#ff6633", "#cc2200", "rgba(255,51,0,0.18)"), # fire red
("#ff6600", "#ff8833", "#cc4400", "rgba(255,102,0,0.18)"), # orange
("#ff9900", "#ffbb44", "#cc7700", "rgba(255,153,0,0.18)"), # amber-orange
("#ffcc00", "#ffdd55", "#cc9900", "rgba(255,204,0,0.18)"), # gold
("#e8943a", "#f0ad60", "#b86820", "rgba(232,148,58,0.18)"), # fire amber
("#d4654a", "#e07d64", "#a04030", "rgba(212,101,74,0.18)"), # coral
("#cc3333", "#dd5555", "#992222", "rgba(204,51,51,0.18)"), # crimson
("#ff4d6d", "#ff7090", "#cc2244", "rgba(255,77,109,0.18)"), # hot pink-red
("#f472b6", "#f79ed0", "#c04080", "rgba(244,114,182,0.18)"), # fairy pink
("#c558d9", "#d880e8", "#8830a0", "rgba(197,88,217,0.18)"), # orchid
("#a855f7", "#c084fc", "#6d28d9", "rgba(168,85,247,0.18)"), # violet
("#7c3aed", "#a06af0", "#4c1d95", "rgba(124,58,237,0.18)"), # indigo-violet
("#3fbfaf", "#66d0c4", "#288070", "rgba(63,191,175,0.18)"), # waterfall
("#2ac4b3", "#55d4c6", "#1a8077", "rgba(42,196,179,0.18)"), # teal
("#00b4d8", "#33c8e8", "#007a99", "rgba(0,180,216,0.18)"), # sky blue
("#32dc8c", "#66e8aa", "#1a9955", "rgba(50,220,140,0.18)"), # neon green
("#00ff41", "#55ff77", "#00aa22", "rgba(0,255,65,0.18)"), # phosphor
("#ff7f3f", "#ffa066", "#cc5500", "rgba(255,127,63,0.18)"), # paradise
("#ffcf40", "#ffdd77", "#cc9900", "rgba(255,207,64,0.18)"), # toucan
("#8b2020", "#bb4444", "#5a0f0f", "rgba(139,32,32,0.18)"), # deep red
("#ff5500", "#ff7733", "#cc3300", "rgba(255,85,0,0.18)"), # orange-red
]
def get_fire(idx: int) -> dict:
p, b, d, g = FIRE_SPECTRUM[idx % len(FIRE_SPECTRUM)]
return {"primary": p, "bright": b, "deep": d, "glow": g}
# ─── Data Model ───────────────────────────────────────────────────────────────
LINE_RE = re.compile(
r'^- \[( |x|-)\] (.+?) \| (.+?) \| ISRC:([A-Z0-9\-]{3,15}|-) \| SP:([A-Za-z0-9]+|-) \| (.+)$'
)
LINE_RE_LEGACY = re.compile(
r'^- \[( |x|-)\] (.+?) \| (.+?) \| ISRC:([A-Z0-9\-]{3,15}|-) \| (.+)$'
)
PENDING = " "
DONE = "x"
NOT_FOUND = "-"
class Track:
def __init__(self, status, title, artists, isrc, url, album="", spotify_id="-"):
self.status = status
self.title = title.strip()
self.artists = artists.strip()
self.isrc = isrc.strip() if isrc else "-"
self.url = url.strip()
self.album = album.strip()
self.spotify_id = spotify_id.strip() if spotify_id else "-"
@property
def needs_search(self):
return self.url == "?" and self.status == PENDING
@property
def needs_download(self):
return self.url not in ("?", "NOT_FOUND") and self.status == PENDING
@property
def youtube_id(self):
if not self.url or self.url in ("?", "NOT_FOUND"):
return ""
m = re.search(r"youtu\.be/([A-Za-z0-9_\-]{11})", self.url)
if m: return m.group(1)
m = re.search(r"[?&]v=([A-Za-z0-9_\-]{11})", self.url)
if m: return m.group(1)
return ""
@property
def search_query(self):
parts = [a.strip() for a in self.artists.split(",")][:2]
return f"{self.title} {', '.join(parts)}"
def to_md(self):
return (
f"- [{self.status}] {self.title} | {self.artists} "
f"| ISRC:{self.isrc} | SP:{self.spotify_id} | {self.url}"
)
class Playlist:
def __init__(self, name, source, tracks, slug=""):
self.name = name
self.source = source
self.tracks = tracks
self.slug = slug or _make_slug(name)
@classmethod
def from_md(cls, path: Path):
text = path.read_text(encoding="utf-8")
lines = text.splitlines()
name = path.stem.replace("-playlist", "").replace("-", " ").replace("_", " ").title()
if lines and lines[0].startswith("#"):
name = lines[0].lstrip("#").strip()
source = ""
if len(lines) > 1:
m = re.search(r"source:\s*([^|]+)", lines[1])
if m: source = m.group(1).strip()
tracks = []
for line in lines:
m = LINE_RE.match(line.strip())
if m:
tracks.append(Track(m.group(1), m.group(2), m.group(3), m.group(4), m.group(6), spotify_id=m.group(5)))
else:
m = LINE_RE_LEGACY.match(line.strip())
if m:
tracks.append(Track(m.group(1), m.group(2), m.group(3), m.group(4), m.group(5)))
slug = _make_slug(path.stem.replace("-playlist", ""))
return cls(name, source, tracks, slug)
def to_md(self):
ts = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
body = [f"# {self.name}", f"", ""]
body += [t.to_md() for t in self.tracks]
return "\n".join(body) + "\n"
def save(self, path: Path):
path.write_text(self.to_md(), encoding="utf-8")
# ─── CSV Parser ───────────────────────────────────────────────────────────────
TRACK_KEYS = ["track name", "title", "song name", "song", "name", "track"]
ARTIST_KEYS = ["artist name(s)", "artist names", "artist name", "artists", "artist"]
ISRC_KEYS = ["isrc"]
ALBUM_KEYS = ["album name", "album title", "album"]
SPOTIFY_KEYS = ["track uri", "spotify uri", "track id", "spotify id"]
def _find_col(headers, keys):
lower = {h.lower(): h for h in headers}
return next((lower[k] for k in keys if k in lower), None)
def _make_slug(name: str) -> str:
s = name.lower().replace(" ", "-")
s = re.sub(r"[^a-z0-9\-]", "", s)
s = re.sub(r"-{2,}", "-", s)
return s.strip("-")
def _clean_artists(raw: str) -> str:
parts = [a.strip() for a in raw.split(",") if a.strip()]
return ", ".join(parts[:2])
def parse_csv(path: Path) -> Playlist:
with path.open(encoding="utf-8-sig", newline="") as f:
reader = csv.DictReader(f)
headers = list(reader.fieldnames or [])
tc = _find_col(headers, TRACK_KEYS)
ac = _find_col(headers, ARTIST_KEYS)
ic = _find_col(headers, ISRC_KEYS)
lc = _find_col(headers, ALBUM_KEYS)
sc = _find_col(headers, SPOTIFY_KEYS)
if not tc or not ac:
raise ValueError(f"Cannot find track/artist columns.\nHeaders: {headers}")
tracks = []
for row in reader:
title = row[tc].strip()
artists = _clean_artists(row[ac])
isrc = row.get(ic, "").strip().upper() if ic else "-"
album = row.get(lc, "").strip() if lc else ""
spotify_id = "-"
if sc:
raw = row.get(sc, "").strip()
# Accept full URI (spotify:track:ID) or bare ID
m = re.match(r"spotify:track:([A-Za-z0-9]+)", raw)
spotify_id = m.group(1) if m else (raw if re.match(r"^[A-Za-z0-9]{10,}$", raw) else "-")
if not isrc: isrc = "-"
if title:
t = Track(PENDING, title, artists, isrc, "?", album=album, spotify_id=spotify_id)
tracks.append(t)
name = path.stem.replace("-", " ").replace("_", " ").title()
return Playlist(name, path.name, tracks, _make_slug(path.stem))
# ─── Batch helpers ────────────────────────────────────────────────────────────
def resolve_inputs(inputs: List[str], suffix: str) -> List[Path]:
"""Expand inputs (files or directories) to a list of matching Path objects."""
paths = []
for inp in inputs:
p = Path(inp)
if p.is_dir():
paths.extend(sorted(p.glob(f"*{suffix}")))
elif p.exists():
paths.append(p)
else:
out(f"> [WARN] Not found: {inp}", "warn")
return paths
# ─── MusicBrainz ──────────────────────────────────────────────────────────────
MB_HEADERS = {"User-Agent": "PlaylistPirate/2.0 (spaces.exopraxist.org)"}
MB_INTERVAL = 1.2
_last_mb = 0.0
FETCH_FAILED = object()
ARTIST_URL_PRIORITY = [
"official homepage", "bandcamp", "soundcloud", "patreon",
"linktree", "youtube", "instagram", "twitter", "facebook",
"last.fm", "discogs", "wikidata", "wikipedia",
]
def _mb_get(url: str):
global _last_mb
elapsed = time.time() - _last_mb
if elapsed < MB_INTERVAL:
time.sleep(MB_INTERVAL - elapsed)
_last_mb = time.time()
try:
req = urllib.request.Request(url, headers=MB_HEADERS)
with urllib.request.urlopen(req, timeout=15) as r:
return json.loads(r.read().decode("utf-8"))
except urllib.error.HTTPError as e:
if e.code == 404: return {}
if e.code == 429:
out(" MB rate limit — waiting 30s", "warn")
time.sleep(30)
return FETCH_FAILED
except Exception:
return FETCH_FAILED
def mb_isrc_lookup(isrc: str):
url = f"https://musicbrainz.org/ws/2/isrc/{isrc}?inc=artist-credits&fmt=json"
data = _mb_get(url)
if data is FETCH_FAILED: return FETCH_FAILED
result = {"mb_recording_url": "", "mb_artist_id": ""}
recs = data.get("recordings", [])
if not recs: return result
rec = recs[0]
if rec.get("id"):
result["mb_recording_url"] = f"https://musicbrainz.org/recording/{rec['id']}"
for credit in rec.get("artist-credit", []):
if isinstance(credit, dict) and "artist" in credit:
result["mb_artist_id"] = credit["artist"].get("id", "")
break
return result
def mb_artist_lookup(mb_artist_id: str):
url = f"https://musicbrainz.org/ws/2/artist/{mb_artist_id}?inc=url-rels&fmt=json"
data = _mb_get(url)
if data is FETCH_FAILED: return FETCH_FAILED
result = {"artist_url": "", "artist_url_type": ""}
best_rank = len(ARTIST_URL_PRIORITY) + 1
for rel in data.get("relations", []):
rel_type = rel.get("type", "").lower()
href = rel.get("url", {}).get("resource", "")
if not href: continue
for i, ptype in enumerate(ARTIST_URL_PRIORITY):
if ptype in rel_type or ptype in href:
if i < best_rank:
best_rank = i
result["artist_url"] = href
result["artist_url_type"] = rel_type
break
return result
# ─── Build cache ──────────────────────────────────────────────────────────────
def load_build_cache(cache_path: Path) -> dict:
if cache_path.exists():
try: return json.loads(cache_path.read_text("utf-8"))
except Exception: pass
return {}
def save_build_cache(cache: dict, cache_path: Path):
cache_path.write_text(json.dumps(cache, indent=2, ensure_ascii=False), "utf-8")
# ─── HTML helpers ─────────────────────────────────────────────────────────────
GOOGLE_FONTS = (
''
)
def esc(s) -> str:
return (str(s)
.replace("&", "&").replace("<", "<")
.replace(">", ">").replace('"', """))
def ms_to_mmss(ms) -> str:
try:
s = int(ms) // 1000
return f"{s // 60}:{s % 60:02d}"
except Exception: return ""
def ms_to_hhmmss(total_ms: int) -> str:
s = total_ms // 1000
h, m, s = s // 3600, (s % 3600) // 60, s % 60
return f"{h}:{m:02d}:{s:02d}" if h else f"{m}:{s:02d}"
SHARED_CSS = """
:root {{
--bg-void: #04060b;
--text-warm: #e8d5b8;
--text-muted: #7a6f5e;
--fp: {primary};
--fb: {bright};
--fd: {deep};
--fg: {glow};
}}
* {{ box-sizing: border-box; margin: 0; padding: 0; }}
body {{
background: var(--bg-void);
color: var(--text-warm);
font-family: 'Rambla', sans-serif;
line-height: 1.5; min-height: 100vh;
}}
"""
def build_hub_html(playlists_meta: list) -> str:
"""playlists_meta: list of {slug, name, track_count, fire_idx}"""
total = sum(p["track_count"] for p in playlists_meta)
n = len(playlists_meta)
cards = []
for p in playlists_meta:
f = get_fire(p["fire_idx"])
cards.append(
f' \n'
f'
{n} playlists • {total:,} tracks