portable-net-tv/tools/porn-rotation.py

#!/usr/bin/env python3
"""Freshness-aware porn rotation for the VLC TV setup, organized into collections.

The shared watch log only tracks SxxEyy episodes; eporner/PMV files carry no
such marker, so nothing recorded what had already been watched and the queue
kept looping back to the same compilations. This tool keeps its own per-file
play log (basename -> last-played ISO timestamp) and builds a VLC queue from
only the files that have NOT been played within the last N days.

The whole `~/media/porn` library is one pool (the root plus its immediate
subdirs, one level deep — there is no `_new` vs archive split). **Collections**
are named, virtual playlists over that pool: each is an *include* filter (match
any of its substrings) on top of the candidate set — never an override of the
global SKIP floor (the operator's hard no's). A file can belong to several
collections; nothing is moved on disk. Freshness is tracked per *file* in a
single global play-log, so something watched under `goon` won't resurface under
`pmv` tomorrow.

Subcommands:
  queue        scan, restrict to --collection, drop anything played in the last
               --days, shuffle, enqueue --count into VLC (first via in_play, rest
               in_enqueue), and stamp each as played now.
  list         print the fresh candidates for --collection and exit.
  collections  list the defined collections with their fresh/total counts.
  mark         stamp specific files (by basename or path) as played now — used to
               seed the log with things already watched this session.

Collections ship as defaults below and can be extended/overridden without code
edits via a `"collections"` block in the config file (same schema as COLLECTIONS).

Play state: ~/.local/state/portable-net-tv/porn-plays.json
VLC creds:  ~/.config/portable-net-tv/config.json  (vlcHttp block)
"""
from __future__ import annotations

import argparse
import json
import random
import re
import sys
import urllib.parse
import urllib.request
from datetime import datetime, timedelta, timezone
from pathlib import Path

HOME = Path.home()
CONFIG = HOME / ".config" / "portable-net-tv" / "config.json"
STATE = HOME / ".local" / "state" / "portable-net-tv" / "porn-plays.json"
PORN_ROOT = HOME / "media" / "porn"
VIDEO_EXTS = {".mp4", ".mkv", ".m4v", ".avi", ".webm", ".wmv", ".mov"}

# The hard floor: titles never surfaced regardless of freshness or collection.
# No-JOI rule (joi/cei/instruction); cuck/chastity excluded because the operator
# isn't into it and eporner's "chastity" tag skews heavily cuckold (caged /
# keyholder / chastity device are the same cluster).
SKIP_SUBSTRINGS = (
    "joi",
    "cei",
    "jerk off instruction",
    "cuck",
    "chastity",
    "keyholder",
    "caged",
)

# Eporner exports the same clip at several qualities/dupes; basenames carry the
# stable id in [brackets]. Group by it and keep one (the largest file).
EPORNER_ID = re.compile(r"\[([0-9A-Za-z]{6,})\]")

# --- Collections -----------------------------------------------------------
# Each collection: include = substrings (lowercased, OR-matched against the
# filename); a candidate joins the collection if it matches ANY include term.
# `exclude` (optional) drops matches within the collection only; `dirs`
# (optional) appends extra scan dirs beyond DEFAULT_DIRS. The default collection
# "all" has no include filter -> the legacy whole-pool behavior.
Collection = dict  # {"include": [str], "exclude": [str]?, "dirs": [str]?, "desc": str?}

COLLECTIONS: dict[str, Collection] = {
    "all": {
        "include": [],
        "desc": "the whole library (no include filter)",
    },
    "goon": {
        "include": ["goon", "edge", "edging", "encouragement", "mantra", "trance"],
        "desc": "gooning / edging encouragement",
    },
    "pmv": {
        "include": ["pmv", "hmv", "music video", "cock hero"],
        "desc": "PMV / HMV music edits",
    },
    "sissy": {
        "include": ["sissy", "sissif", "trap", "femboy", "feminiz"],
        "desc": "sissy / trap / femboy",
    },
    "bbc": {
        "include": ["bbc", "interracial", "blacked", "split screen", "splitscreen"],
        "desc": "BBC / interracial / split-screen",
    },
    "futa": {
        "include": ["futa", "futanari", "dickgirl"],
        "desc": "futa / futanari",
    },
    "anime": {
        "include": [
            "hentai",
            "uncensored",
            "animated",
            "animation",
            "parody",
            "cartoon",
            "3d porn",
            "sfm",
            "anime",
        ],
        "desc": "hentai / 3D / game-parody animation",
    },
    "comp": {
        "include": ["compilation", "split screen", "splitscreen", " comp ", "cumpilation"],
        "desc": "compilations / split-screen montages",
    },
    # --- Thematic (well-represented across the full library; counts profiled
    # 2026-06-08 over ~1436 floor-clean files) ---
    "breeding": {
        "include": ["breed", "breeding", "creampie", "impregnat", "cum inside"],
        "desc": "breeding / creampie",
    },
    "gangbang": {
        "include": ["gangbang", "gang bang", "orgy", "threesome", "foursome", "bukkake"],
        "desc": "group / gangbang / orgy",
    },
    "milf": {
        "include": ["milf", "stepmom", "step mom", "mommy", "mature", "cougar"],
        "desc": "milf / mommy / mature",
    },
    "cosplay": {
        "include": ["cosplay"],
        "desc": "cosplay",
    },
    "asmr": {
        "include": ["asmr", "ear lick", "ear licking", "whisper", "moans in your ear"],
        "desc": "ASMR / ear / whisper",
    },
    # --- Franchise (the library skews game/anime parody; the two largest. Add
    # more by dropping a `"collections"` block in config.json — data, not code) ---
    "naruto": {
        "include": ["naruto", "hinata", "tsunade", "sakura", "kushina", "boruto"],
        "desc": "Naruto parody",
    },
    "overwatch": {
        "include": ["overwatch", "d.va", "dva", "widowmaker", "mercy", "tracer", "pharah"],
        "desc": "Overwatch parody",
    },
}


def merge_config_collections() -> None:
    """Let the config file extend/override the built-in collections (data, not code)."""
    try:
        cfg = json.loads(CONFIG.read_text())
    except (FileNotFoundError, ValueError):
        return
    extra = cfg.get("collections")
    if isinstance(extra, dict):
        for name, spec in extra.items():
            if isinstance(spec, dict) and isinstance(spec.get("include"), list):
                COLLECTIONS[name] = spec


def load_vlc() -> tuple[str, int, str]:
    cfg = json.loads(CONFIG.read_text())
    h = cfg["vlcHttp"]
    return h.get("host", "127.0.0.1"), int(h.get("port", 8080)), h["password"]


def load_log() -> dict[str, str]:
    try:
        return json.loads(STATE.read_text())
    except (FileNotFoundError, ValueError):
        return {}


def save_log(log: dict[str, str]) -> None:
    STATE.parent.mkdir(parents=True, exist_ok=True)
    STATE.write_text(json.dumps(log, indent=2, sort_keys=True))


def now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()


def _is_partial(name: str) -> bool:
    low = name.lower()
    return low.endswith(".part") or low.endswith(".tmp") or ".mp4_" in low


def scan(dirs: list[Path]) -> list[Path]:
    """Scan dirs (non-recursive) for playable videos, applying the global floor,
    skipping partials, and de-duplicating eporner re-exports by their [id]."""
    by_id: dict[str, Path] = {}
    out: list[Path] = []
    seen: set[Path] = set()
    for d in dirs:
        if not d.is_dir():
            continue
        for entry in sorted(d.iterdir()):
            if not entry.is_file() or entry in seen:
                continue
            if entry.suffix.lower() not in VIDEO_EXTS:
                continue
            if _is_partial(entry.name):
                continue
            low = entry.name.lower()
            if any(s in low for s in SKIP_SUBSTRINGS):
                continue
            seen.add(entry)
            m = EPORNER_ID.search(entry.name)
            if m:
                key = m.group(1)
                prev = by_id.get(key)
                if prev is None:
                    by_id[key] = entry
                    out.append(entry)
                elif entry.stat().st_size > prev.stat().st_size:
                    out[out.index(prev)] = entry  # keep the largest re-export
                    by_id[key] = entry
                continue
            out.append(entry)
    return out


def library_dirs() -> list[Path]:
    """The whole porn library, one level deep: the root plus each immediate
    subdir (e.g. hentai/, ST:TNG-XXX/), skipping VLC's `.trickplay` thumb dirs.
    Collections are virtual filters over this single pool — there is no `_new`
    vs archive split; physical foldering doesn't gate what a collection sees."""
    dirs = [PORN_ROOT]
    if PORN_ROOT.is_dir():
        for entry in PORN_ROOT.iterdir():
            if entry.is_dir() and entry.suffix.lower() != ".trickplay":
                dirs.append(entry)
    return dirs


def in_collection(name: str, filename: str) -> bool:
    spec = COLLECTIONS.get(name)
    if spec is None:
        return True
    low = filename.lower()
    inc = spec.get("include") or []
    if inc and not any(s in low for s in inc):
        return False
    exc = spec.get("exclude") or []
    if any(s in low for s in exc):
        return False
    return True


def candidates(name: str) -> list[Path]:
    """Scanned, floor-filtered, deduped files that belong to collection `name`."""
    return [f for f in scan(library_dirs()) if in_collection(name, f.name)]


def is_fresh(name: str, log: dict[str, str], days: int) -> bool:
    """True if `name` was never played, or last played more than `days` ago."""
    ts = log.get(name)
    if ts is None:
        return True
    try:
        last = datetime.fromisoformat(ts)
    except ValueError:
        return True
    if last.tzinfo is None:
        last = last.replace(tzinfo=timezone.utc)
    return last < datetime.now(timezone.utc) - timedelta(days=days)


def vlc_cmd(host: str, port: int, pw: str, command: str, **params: str) -> bool:
    q = {"command": command, **params}
    url = f"http://{host}:{port}/requests/status.json?" + urllib.parse.urlencode(
        q, quote_via=urllib.parse.quote
    )
    req = urllib.request.Request(url)
    import base64

    token = base64.b64encode(f":{pw}".encode()).decode()
    req.add_header("Authorization", f"Basic {token}")
    try:
        with urllib.request.urlopen(req, timeout=10):
            return True
    except OSError as exc:
        print(f"  ! VLC command {command} failed: {exc}", file=sys.stderr)
        return False


def file_uri(path: Path) -> str:
    return "file://" + urllib.parse.quote(str(path))


def cmd_list(args: argparse.Namespace) -> int:
    log = load_log()
    files = candidates(args.collection)
    fresh = [f for f in files if is_fresh(f.name, log, args.days)]
    print(
        f"[{args.collection}] {len(fresh)} fresh / {len(files)} total "
        f"(unplayed in last {args.days}d)"
    )
    for f in fresh:
        print(f"  {f.name}")
    return 0


def cmd_collections(args: argparse.Namespace) -> int:
    log = load_log()
    rows = []
    for name, spec in COLLECTIONS.items():
        files = candidates(name)
        fresh = sum(1 for f in files if is_fresh(f.name, log, args.days))
        rows.append((name, spec.get("desc", ""), fresh, len(files)))
    if args.json:
        print(json.dumps([
            {"name": n, "desc": d, "fresh": fr, "total": t} for n, d, fr, t in rows
        ]))
        return 0
    print(f"{'collection':<12} {'fresh':>6} {'total':>6}  description")
    for n, d, fr, t in rows:
        print(f"{n:<12} {fr:>6} {t:>6}  {d}")
    return 0


def cmd_paths(args: argparse.Namespace) -> int:
    """Machine-readable: print full fresh file paths for a collection, one per
    line — for the TVAnarchy app to enqueue. `--mark` stamps them played now so
    the shared freshness state advances even though playback happens app-side."""
    log = load_log()
    files = candidates(args.collection)
    fresh = [f for f in files if is_fresh(f.name, log, args.days)]
    random.shuffle(fresh)
    pick = fresh[: args.count]
    if args.mark and pick:
        stamp = now_iso()
        for f in pick:
            log[f.name] = stamp
        save_log(log)
    for f in pick:
        print(str(f))
    return 0


def cmd_mark(args: argparse.Namespace) -> int:
    log = load_log()
    stamp = now_iso()
    for item in args.files:
        name = Path(item).name
        log[name] = stamp
    save_log(log)
    print(f"marked {len(args.files)} file(s) played @ {stamp}")
    return 0


def cmd_queue(args: argparse.Namespace) -> int:
    host, port, pw = load_vlc()
    log = load_log()
    files = candidates(args.collection)
    fresh = [f for f in files if is_fresh(f.name, log, args.days)]
    if not fresh:
        print(
            f"[{args.collection}] no files unplayed in the last {args.days}d "
            f"({len(files)} in collection) — widen --days, pick another "
            f"collection, or fetch more",
            file=sys.stderr,
        )
        return 1
    random.shuffle(fresh)
    pick = fresh[: args.count]
    if not args.no_clear:
        vlc_cmd(host, port, pw, "pl_empty")
    stamp = now_iso()
    for i, f in enumerate(pick):
        command = "in_play" if (i == 0 and not args.no_clear) else "in_enqueue"
        if vlc_cmd(host, port, pw, command, input=file_uri(f)):
            log[f.name] = stamp
            print(f"  {'PLAY' if command == 'in_play' else 'queue'}  {f.name}")
    save_log(log)
    print(
        f"[{args.collection}] queued {len(pick)} fresh file(s); "
        f"{len(fresh) - len(pick)} more available"
    )
    return 0


def main() -> int:
    merge_config_collections()
    p = argparse.ArgumentParser(description="freshness-aware porn rotation for VLC")
    sub = p.add_subparsers(dest="cmd", required=True)

    def add_collection_arg(parser: argparse.ArgumentParser) -> None:
        parser.add_argument(
            "-c",
            "--collection",
            default="all",
            choices=sorted(COLLECTIONS),
            help="which collection to draw from (default: all)",
        )

    q = sub.add_parser("queue", help="enqueue fresh files into VLC")
    add_collection_arg(q)
    q.add_argument("--days", type=int, default=7)
    q.add_argument("--count", type=int, default=12)
    q.add_argument(
        "--no-clear",
        action="store_true",
        help="append to the current playlist instead of replacing it",
    )
    q.set_defaults(func=cmd_queue)

    l = sub.add_parser("list", help="show fresh candidates for a collection")
    add_collection_arg(l)
    l.add_argument("--days", type=int, default=7)
    l.set_defaults(func=cmd_list)

    c = sub.add_parser("collections", help="list collections with fresh/total counts")
    c.add_argument("--days", type=int, default=7)
    c.add_argument("--json", action="store_true", help="machine-readable output")
    c.set_defaults(func=cmd_collections)

    pa = sub.add_parser("paths", help="print fresh file paths for a collection (for the app)")
    add_collection_arg(pa)
    pa.add_argument("--days", type=int, default=7)
    pa.add_argument("--count", type=int, default=20)
    pa.add_argument("--mark", action="store_true", help="stamp the returned files played now")
    pa.set_defaults(func=cmd_paths)

    m = sub.add_parser("mark", help="stamp files as played now")
    m.add_argument("files", nargs="+")
    m.set_defaults(func=cmd_mark)

    args = p.parse_args()
    return args.func(args)


if __name__ == "__main__":
    raise SystemExit(main())