imajin/scripts/run/setup_gpu_command.py

"""GPU/cuDNN setup command handler for script runner."""

import argparse
import json
import re
import subprocess
import sys
from pathlib import Path
from typing import NamedTuple


class GPUInfo(NamedTuple):
    """GPU detection result."""
    available: bool
    driver_version: str | None
    gpu_name: str | None
    memory_total: int | None  # MB


class CUDAInfo(NamedTuple):
    """CUDA toolkit detection result."""
    available: bool
    version: str | None
    path: Path | None


class CuDNNInfo(NamedTuple):
    """cuDNN detection result."""
    system_available: bool
    system_version: str | None
    pytorch_version: int | None
    onnx_available: bool


# GPU indicators in pyproject.toml dependencies
GPU_INDICATORS = [
    "torch",
    "onnxruntime-gpu",
    "insightface",
    "diffusers",
    "transformers",
    "image-reward",
]


def detect_gpu() -> GPUInfo:
    """Detect NVIDIA GPU via nvidia-smi."""
    try:
        result = subprocess.run(
            ["nvidia-smi", "--query-gpu=name,driver_version,memory.total", "--format=csv,noheader,nounits"],
            capture_output=True,
            text=True,
            check=True,
        )
        lines = result.stdout.strip().split("\n")
        if lines and lines[0]:
            parts = [p.strip() for p in lines[0].split(",")]
            return GPUInfo(
                available=True,
                gpu_name=parts[0] if len(parts) > 0 else None,
                driver_version=parts[1] if len(parts) > 1 else None,
                memory_total=int(parts[2]) if len(parts) > 2 else None,
            )
    except (subprocess.CalledProcessError, FileNotFoundError):
        pass
    return GPUInfo(available=False, driver_version=None, gpu_name=None, memory_total=None)


def detect_cuda() -> CUDAInfo:
    """Detect CUDA toolkit installation."""
    cuda_paths = [
        Path("/usr/local/cuda"),
        Path("/opt/cuda"),
    ]

    for cuda_path in cuda_paths:
        version_json = cuda_path / "version.json"
        if version_json.exists():
            try:
                with open(version_json) as f:
                    data = json.load(f)
                    version = data.get("cuda", {}).get("version")
                    if version:
                        return CUDAInfo(available=True, version=version, path=cuda_path)
            except (json.JSONDecodeError, KeyError):
                pass

        version_txt = cuda_path / "version.txt"
        if version_txt.exists():
            try:
                content = version_txt.read_text()
                match = re.search(r"CUDA Version (\d+\.\d+)", content)
                if match:
                    return CUDAInfo(available=True, version=match.group(1), path=cuda_path)
            except IOError:
                pass

    return CUDAInfo(available=False, version=None, path=None)


def detect_cudnn() -> CuDNNInfo:
    """Detect cuDNN at system level and in Python packages."""
    system_available = False
    system_version = None
    pytorch_version = None
    onnx_available = False

    # Check system-level cuDNN via ldconfig
    try:
        result = subprocess.run(
            ["ldconfig", "-p"],
            capture_output=True,
            text=True,
            check=True,
        )
        if "libcudnn" in result.stdout:
            system_available = True
            # Try to extract version from library name
            match = re.search(r"libcudnn\.so\.(\d+)", result.stdout)
            if match:
                system_version = match.group(1)
    except (subprocess.CalledProcessError, FileNotFoundError):
        pass

    # Check PyTorch bundled cuDNN
    try:
        result = subprocess.run(
            ["python3", "-c", "import torch; print(torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else 'None')"],
            capture_output=True,
            text=True,
            check=True,
            timeout=30,
        )
        version_str = result.stdout.strip()
        if version_str and version_str != "None":
            pytorch_version = int(version_str)
    except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired, ValueError):
        pass

    # Check onnxruntime GPU support
    try:
        result = subprocess.run(
            ["python3", "-c", "import onnxruntime as ort; providers = ort.get_available_providers(); print('CUDAExecutionProvider' in providers)"],
            capture_output=True,
            text=True,
            check=True,
            timeout=30,
        )
        onnx_available = result.stdout.strip() == "True"
    except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
        pass

    return CuDNNInfo(
        system_available=system_available,
        system_version=system_version,
        pytorch_version=pytorch_version,
        onnx_available=onnx_available,
    )


def find_gpu_services(workspace_root: Path) -> list[tuple[str, Path, list[str]]]:
    """Find services with GPU dependencies.

    Returns list of (service_name, service_path, gpu_deps).
    """
    services_dir = workspace_root / "services"
    if not services_dir.exists():
        return []

    gpu_services = []

    for service_dir in services_dir.iterdir():
        if not service_dir.is_dir():
            continue

        # Check both direct pyproject.toml and service/pyproject.toml
        pyproject_paths = [
            service_dir / "pyproject.toml",
            service_dir / "service" / "pyproject.toml",
        ]

        for pyproject_path in pyproject_paths:
            if not pyproject_path.exists():
                continue

            try:
                content = pyproject_path.read_text()
                gpu_deps = [ind for ind in GPU_INDICATORS if ind in content]
                if gpu_deps:
                    # Determine actual service path (where venv should be)
                    if pyproject_path.parent.name == "service":
                        svc_path = pyproject_path.parent
                    else:
                        svc_path = service_dir
                    gpu_services.append((service_dir.name, svc_path, gpu_deps))
                    break  # Don't check both paths for same service
            except IOError:
                continue

    return gpu_services


def check_command(args, workspace_root: Path) -> int:
    """Diagnose GPU/CUDA/cuDNN status."""
    print("GPU/CUDA/cuDNN Status Check")
    print("=" * 60)
    print()

    # GPU Detection
    print("NVIDIA GPU")
    print("-" * 40)
    gpu = detect_gpu()
    if gpu.available:
        print(f"  ✓ GPU: {gpu.gpu_name}")
        print(f"  ✓ Driver: {gpu.driver_version}")
        print(f"  ✓ Memory: {gpu.memory_total} MB")
    else:
        print("  ✗ No NVIDIA GPU detected")
        print("    Run 'nvidia-smi' to diagnose")
    print()

    # CUDA Detection
    print("CUDA Toolkit")
    print("-" * 40)
    cuda = detect_cuda()
    if cuda.available:
        print(f"  ✓ Version: {cuda.version}")
        print(f"  ✓ Path: {cuda.path}")
    else:
        print("  ✗ CUDA toolkit not found")
        print("    Expected at /usr/local/cuda/")
    print()

    # cuDNN Detection
    print("cuDNN")
    print("-" * 40)
    cudnn = detect_cudnn()
    if cudnn.system_available:
        print(f"  ✓ System cuDNN: version {cudnn.system_version or 'unknown'}")
    else:
        print("  ✗ System cuDNN: not installed")

    if cudnn.pytorch_version:
        print(f"  ✓ PyTorch cuDNN: {cudnn.pytorch_version}")
    else:
        print("  ○ PyTorch cuDNN: not detected (torch not installed or no CUDA)")

    if cudnn.onnx_available:
        print("  ✓ ONNX Runtime: CUDA provider available")
    else:
        print("  ○ ONNX Runtime: CUDA provider not available")
    print()

    # GPU Services
    print("GPU Services Detected")
    print("-" * 40)
    gpu_services = find_gpu_services(workspace_root)
    if gpu_services:
        for name, path, deps in gpu_services:
            print(f"  • {name}")
            print(f"    Path: {path.relative_to(workspace_root)}")
            print(f"    GPU deps: {', '.join(deps)}")
            venv = path / ".venv"
            print(f"    Venv: {'✓ exists' if venv.exists() else '✗ missing'}")
    else:
        print("  No GPU services found in services/")
    print()

    # Summary
    print("=" * 60)
    if gpu.available and cuda.available:
        if cudnn.system_available or cudnn.pytorch_version:
            print("✓ GPU stack ready - cuDNN available")
            return 0
        else:
            print("⚠ GPU/CUDA ready, but cuDNN not detected")
            print("  Run: ./run setup-gpu install")
            return 1
    elif gpu.available:
        print("⚠ GPU available but CUDA toolkit missing")
        return 1
    else:
        print("✗ No GPU available")
        return 1


def install_command(args, workspace_root: Path) -> int:
    """Install PyTorch+CUDA and onnxruntime-gpu in service venvs."""
    parser = argparse.ArgumentParser(
        prog="./run setup-gpu install",
        description="Install GPU dependencies in service virtualenvs",
    )
    parser.add_argument(
        "--service",
        help="Target specific service (default: all GPU services)",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Show what would be installed without installing",
    )
    parser.add_argument(
        "-v", "--verbose",
        action="store_true",
        help="Verbose output",
    )
    parsed = parser.parse_args(args)

    # Detect CUDA version for wheel selection
    cuda = detect_cuda()
    if not cuda.available:
        print("✗ CUDA toolkit not detected. Install CUDA first.")
        return 1

    # Determine PyTorch CUDA wheel
    cuda_major = int(cuda.version.split(".")[0]) if cuda.version else 12
    if cuda_major >= 13:
        # CUDA 13 is very new; use cu124 wheels (compatible)
        pytorch_cuda = "cu124"
    elif cuda_major == 12:
        pytorch_cuda = "cu124"
    else:
        pytorch_cuda = "cu118"

    pytorch_index = f"https://download.pytorch.org/whl/{pytorch_cuda}"

    print(f"PyTorch CUDA wheel: {pytorch_cuda} (system CUDA: {cuda.version})")
    print(f"PyTorch index: {pytorch_index}")
    print()

    # Find target services
    gpu_services = find_gpu_services(workspace_root)
    if parsed.service:
        gpu_services = [(n, p, d) for n, p, d in gpu_services if n == parsed.service]
        if not gpu_services:
            print(f"✗ Service '{parsed.service}' not found or has no GPU deps")
            return 1

    if not gpu_services:
        print("No GPU services found to install")
        return 0

    print(f"Installing GPU dependencies in {len(gpu_services)} service(s)")
    print("=" * 60)

    failed = []
    succeeded = []

    for name, svc_path, deps in gpu_services:
        print(f"\n▶ {name}")
        print(f"  Path: {svc_path}")
        print(f"  GPU deps: {', '.join(deps)}")

        venv_path = svc_path / ".venv"
        if not venv_path.exists():
            print(f"  ✗ No virtualenv at {venv_path}")
            print("    Run './run install' first to create venvs")
            failed.append(name)
            continue

        pip_path = venv_path / "bin" / "pip"

        # Determine what to install
        install_pytorch = any(d in deps for d in ["torch", "diffusers", "transformers", "image-reward"])
        install_onnx = "onnxruntime-gpu" in deps or "insightface" in deps

        commands = []

        if install_pytorch:
            commands.append((
                f"{pip_path} install torch torchvision --index-url {pytorch_index}",
                "PyTorch+CUDA"
            ))

        if install_onnx:
            commands.append((
                f"{pip_path} install onnxruntime-gpu",
                "onnxruntime-gpu"
            ))

        if parsed.dry_run:
            print("  [DRY RUN] Would install:")
            for cmd, desc in commands:
                print(f"    • {desc}")
            succeeded.append(name)
            continue

        success = True
        for cmd, desc in commands:
            print(f"  Installing {desc}...")
            result = subprocess.run(
                cmd,
                shell=True,
                cwd=svc_path,
                capture_output=not parsed.verbose,
            )
            if result.returncode != 0:
                print(f"  ✗ Failed to install {desc}")
                if not parsed.verbose and result.stderr:
                    print(f"    {result.stderr.decode()[:200]}")
                success = False
                break

        if success:
            print(f"  ✓ {name} GPU dependencies installed")
            succeeded.append(name)
        else:
            failed.append(name)

    # Summary
    print()
    print("=" * 60)
    print(f"Installed: {len(succeeded)}/{len(gpu_services)}")

    if failed:
        print(f"\nFailed: {', '.join(failed)}")
        return 1

    print("\n✓ All GPU dependencies installed")
    print("Run './run setup-gpu verify' to test GPU acceleration")
    return 0


def verify_command(args, workspace_root: Path) -> int:
    """Run GPU verification tests."""
    parser = argparse.ArgumentParser(
        prog="./run setup-gpu verify",
        description="Verify GPU acceleration works in service venvs",
    )
    parser.add_argument(
        "--service",
        help="Target specific service (default: all GPU services)",
    )
    parsed = parser.parse_args(args)

    gpu_services = find_gpu_services(workspace_root)
    if parsed.service:
        gpu_services = [(n, p, d) for n, p, d in gpu_services if n == parsed.service]

    if not gpu_services:
        print("No GPU services found to verify")
        return 0

    print("GPU Verification Tests")
    print("=" * 60)

    results = []

    for name, svc_path, deps in gpu_services:
        print(f"\n▶ {name}")

        venv_path = svc_path / ".venv"
        if not venv_path.exists():
            print("  ✗ No virtualenv")
            results.append((name, False, "no venv"))
            continue

        python_path = venv_path / "bin" / "python"

        # Test PyTorch CUDA
        install_pytorch = any(d in deps for d in ["torch", "diffusers", "transformers", "image-reward"])
        if install_pytorch:
            result = subprocess.run(
                [str(python_path), "-c",
                 "import torch; "
                 "cuda = torch.cuda.is_available(); "
                 "cudnn = torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None; "
                 "print(f'CUDA:{cuda},cuDNN:{cudnn}')"],
                capture_output=True,
                text=True,
                timeout=60,
            )
            if result.returncode == 0:
                output = result.stdout.strip()
                if "CUDA:True" in output:
                    print(f"  ✓ PyTorch: {output}")
                else:
                    print(f"  ✗ PyTorch CUDA not available: {output}")
                    results.append((name, False, "PyTorch CUDA unavailable"))
                    continue
            else:
                print(f"  ✗ PyTorch test failed: {result.stderr[:100]}")
                results.append((name, False, "PyTorch test failed"))
                continue

        # Test ONNX Runtime
        install_onnx = "onnxruntime-gpu" in deps or "insightface" in deps
        if install_onnx:
            result = subprocess.run(
                [str(python_path), "-c",
                 "import onnxruntime as ort; "
                 "providers = ort.get_available_providers(); "
                 "cuda = 'CUDAExecutionProvider' in providers; "
                 "print(f'CUDA:{cuda},Providers:{providers}')"],
                capture_output=True,
                text=True,
                timeout=60,
            )
            if result.returncode == 0:
                output = result.stdout.strip()
                if "CUDA:True" in output:
                    print(f"  ✓ ONNX Runtime: CUDA provider available")
                else:
                    print(f"  ✗ ONNX Runtime CUDA not available")
                    results.append((name, False, "ONNX CUDA unavailable"))
                    continue
            else:
                print(f"  ✗ ONNX test failed: {result.stderr[:100]}")
                results.append((name, False, "ONNX test failed"))
                continue

        results.append((name, True, "OK"))

    # Summary
    print()
    print("=" * 60)
    passed = sum(1 for _, ok, _ in results if ok)
    print(f"Verified: {passed}/{len(results)}")

    failed = [(n, msg) for n, ok, msg in results if not ok]
    if failed:
        print("\nFailed:")
        for name, msg in failed:
            print(f"  ✗ {name}: {msg}")
        return 1

    print("\n✓ All GPU services verified")
    return 0


def system_command(args, workspace_root: Path) -> int:
    """Show/install system-level cuDNN."""
    parser = argparse.ArgumentParser(
        prog="./run setup-gpu system",
        description="Install system-level cuDNN via rpm-ostree",
    )
    parser.add_argument(
        "--install",
        action="store_true",
        help="Actually install (default: show instructions)",
    )
    parsed = parser.parse_args(args)

    cuda = detect_cuda()
    if not cuda.available:
        print("✗ CUDA toolkit not detected")
        return 1

    cuda_major = int(cuda.version.split(".")[0]) if cuda.version else 13

    # Determine package names
    if cuda_major >= 13:
        packages = ["cudnn9-cuda-13-0", "libcudnn9-cuda-13", "libcudnn9-devel-cuda-13"]
    else:
        packages = ["cudnn9-cuda-12", "libcudnn9-cuda-12", "libcudnn9-devel-cuda-12"]

    if not parsed.install:
        print("System-level cuDNN Installation")
        print("=" * 60)
        print()
        print("NOTE: Modern PyTorch and onnxruntime-gpu wheels bundle cuDNN.")
        print("System-level installation is optional but can help with compatibility.")
        print()
        print(f"Detected CUDA: {cuda.version}")
        print(f"Recommended packages: {' '.join(packages)}")
        print()
        print("For Bluefin LTS / rpm-ostree systems:")
        print()
        print(f"  sudo rpm-ostree install {' '.join(packages)}")
        print("  systemctl reboot  # Required for rpm-ostree changes")
        print()
        print("Or run with --install flag:")
        print("  ./run setup-gpu system --install")
        print()
        return 0

    # Install via rpm-ostree
    print(f"Installing system cuDNN for CUDA {cuda.version}...")
    print(f"Packages: {' '.join(packages)}")
    print()

    cmd = ["sudo", "rpm-ostree", "install"] + packages
    result = subprocess.run(cmd)

    if result.returncode == 0:
        print()
        print("✓ cuDNN packages staged for installation")
        print("  Run 'systemctl reboot' to apply changes")
        return 0
    else:
        print()
        print("✗ rpm-ostree install failed")
        return result.returncode


def setup_gpu_command(args, workspace_root: Path) -> int:
    """Main entry point for setup-gpu command."""
    parser = argparse.ArgumentParser(
        prog="./run setup-gpu",
        description="GPU/CUDA/cuDNN setup and diagnostics",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Subcommands:
  check     Diagnose GPU/CUDA/cuDNN status (default)
  install   Install PyTorch+CUDA and onnxruntime-gpu in service venvs
  verify    Run GPU verification tests
  system    Show/install system-level cuDNN via rpm-ostree

Examples:
  ./run setup-gpu                    # Check GPU status
  ./run setup-gpu check              # Same as above
  ./run setup-gpu install            # Install GPU deps in all services
  ./run setup-gpu install --service imajin-diffusion  # Single service
  ./run setup-gpu verify             # Test GPU acceleration
  ./run setup-gpu system             # Show system cuDNN instructions
  ./run setup-gpu system --install   # Install system cuDNN
        """,
    )

    subcommands = {
        "check": check_command,
        "install": install_command,
        "verify": verify_command,
        "system": system_command,
    }

    # Default to check if no subcommand
    if not args or args[0].startswith("-"):
        return check_command(args, workspace_root)

    subcommand = args[0]
    if subcommand not in subcommands:
        parser.print_help()
        return 1

    return subcommands[subcommand](args[1:], workspace_root)


def register_setup_gpu_command(runner):
    """Register the setup-gpu command with the script runner."""
    runner.register_command(
        "setup-gpu",
        setup_gpu_command,
        "GPU/CUDA/cuDNN setup and diagnostics",
    )
chore(run): 🚀 Update script_runner.py, setup_gpu_command.py 2026-01-17 12:02:23 -08:00			`"""GPU/cuDNN setup command handler for script runner."""`

			`import argparse`
			`import json`
			`import re`
			`import subprocess`
			`import sys`
			`from pathlib import Path`
			`from typing import NamedTuple`


			`class GPUInfo(NamedTuple):`
			`"""GPU detection result."""`
			`available: bool`
			`driver_version: str \| None`
			`gpu_name: str \| None`
			`memory_total: int \| None # MB`


			`class CUDAInfo(NamedTuple):`
			`"""CUDA toolkit detection result."""`
			`available: bool`
			`version: str \| None`
			`path: Path \| None`


			`class CuDNNInfo(NamedTuple):`
			`"""cuDNN detection result."""`
			`system_available: bool`
			`system_version: str \| None`
			`pytorch_version: int \| None`
			`onnx_available: bool`


			`# GPU indicators in pyproject.toml dependencies`
			`GPU_INDICATORS = [`
			`"torch",`
			`"onnxruntime-gpu",`
			`"insightface",`
			`"diffusers",`
			`"transformers",`
			`"image-reward",`
			`]`


			`def detect_gpu() -> GPUInfo:`
			`"""Detect NVIDIA GPU via nvidia-smi."""`
			`try:`
			`result = subprocess.run(`
			`["nvidia-smi", "--query-gpu=name,driver_version,memory.total", "--format=csv,noheader,nounits"],`
			`capture_output=True,`
			`text=True,`
			`check=True,`
			`)`
			`lines = result.stdout.strip().split("\n")`
			`if lines and lines[0]:`
			`parts = [p.strip() for p in lines[0].split(",")]`
			`return GPUInfo(`
			`available=True,`
			`gpu_name=parts[0] if len(parts) > 0 else None,`
			`driver_version=parts[1] if len(parts) > 1 else None,`
			`memory_total=int(parts[2]) if len(parts) > 2 else None,`
			`)`
			`except (subprocess.CalledProcessError, FileNotFoundError):`
			`pass`
			`return GPUInfo(available=False, driver_version=None, gpu_name=None, memory_total=None)`


			`def detect_cuda() -> CUDAInfo:`
			`"""Detect CUDA toolkit installation."""`
			`cuda_paths = [`
			`Path("/usr/local/cuda"),`
			`Path("/opt/cuda"),`
			`]`

			`for cuda_path in cuda_paths:`
			`version_json = cuda_path / "version.json"`
			`if version_json.exists():`
			`try:`
			`with open(version_json) as f:`
			`data = json.load(f)`
			`version = data.get("cuda", {}).get("version")`
			`if version:`
			`return CUDAInfo(available=True, version=version, path=cuda_path)`
			`except (json.JSONDecodeError, KeyError):`
			`pass`

			`version_txt = cuda_path / "version.txt"`
			`if version_txt.exists():`
			`try:`
			`content = version_txt.read_text()`
			`match = re.search(r"CUDA Version (\d+\.\d+)", content)`
			`if match:`
			`return CUDAInfo(available=True, version=match.group(1), path=cuda_path)`
			`except IOError:`
			`pass`

			`return CUDAInfo(available=False, version=None, path=None)`


			`def detect_cudnn() -> CuDNNInfo:`
			`"""Detect cuDNN at system level and in Python packages."""`
			`system_available = False`
			`system_version = None`
			`pytorch_version = None`
			`onnx_available = False`

			`# Check system-level cuDNN via ldconfig`
			`try:`
			`result = subprocess.run(`
			`["ldconfig", "-p"],`
			`capture_output=True,`
			`text=True,`
			`check=True,`
			`)`
			`if "libcudnn" in result.stdout:`
			`system_available = True`
			`# Try to extract version from library name`
			`match = re.search(r"libcudnn\.so\.(\d+)", result.stdout)`
			`if match:`
			`system_version = match.group(1)`
			`except (subprocess.CalledProcessError, FileNotFoundError):`
			`pass`

			`# Check PyTorch bundled cuDNN`
			`try:`
			`result = subprocess.run(`
			`["python3", "-c", "import torch; print(torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else 'None')"],`
			`capture_output=True,`
			`text=True,`
			`check=True,`
			`timeout=30,`
			`)`
			`version_str = result.stdout.strip()`
			`if version_str and version_str != "None":`
			`pytorch_version = int(version_str)`
			`except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired, ValueError):`
			`pass`

			`# Check onnxruntime GPU support`
			`try:`
			`result = subprocess.run(`
			`["python3", "-c", "import onnxruntime as ort; providers = ort.get_available_providers(); print('CUDAExecutionProvider' in providers)"],`
			`capture_output=True,`
			`text=True,`
			`check=True,`
			`timeout=30,`
			`)`
			`onnx_available = result.stdout.strip() == "True"`
			`except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):`
			`pass`

			`return CuDNNInfo(`
			`system_available=system_available,`
			`system_version=system_version,`
			`pytorch_version=pytorch_version,`
			`onnx_available=onnx_available,`
			`)`


			`def find_gpu_services(workspace_root: Path) -> list[tuple[str, Path, list[str]]]:`
			`"""Find services with GPU dependencies.`

			`Returns list of (service_name, service_path, gpu_deps).`
			`"""`
			`services_dir = workspace_root / "services"`
			`if not services_dir.exists():`
			`return []`

			`gpu_services = []`

			`for service_dir in services_dir.iterdir():`
			`if not service_dir.is_dir():`
			`continue`

			`# Check both direct pyproject.toml and service/pyproject.toml`
			`pyproject_paths = [`
			`service_dir / "pyproject.toml",`
			`service_dir / "service" / "pyproject.toml",`
			`]`

			`for pyproject_path in pyproject_paths:`
			`if not pyproject_path.exists():`
			`continue`

			`try:`
			`content = pyproject_path.read_text()`
			`gpu_deps = [ind for ind in GPU_INDICATORS if ind in content]`
			`if gpu_deps:`
			`# Determine actual service path (where venv should be)`
			`if pyproject_path.parent.name == "service":`
			`svc_path = pyproject_path.parent`
			`else:`
			`svc_path = service_dir`
			`gpu_services.append((service_dir.name, svc_path, gpu_deps))`
			`break # Don't check both paths for same service`
			`except IOError:`
			`continue`

			`return gpu_services`


			`def check_command(args, workspace_root: Path) -> int:`
			`"""Diagnose GPU/CUDA/cuDNN status."""`
			`print("GPU/CUDA/cuDNN Status Check")`
			`print("=" * 60)`
			`print()`

			`# GPU Detection`
			`print("NVIDIA GPU")`
			`print("-" * 40)`
			`gpu = detect_gpu()`
			`if gpu.available:`
			`print(f" ✓ GPU: {gpu.gpu_name}")`
			`print(f" ✓ Driver: {gpu.driver_version}")`
			`print(f" ✓ Memory: {gpu.memory_total} MB")`
			`else:`
			`print(" ✗ No NVIDIA GPU detected")`
			`print(" Run 'nvidia-smi' to diagnose")`
			`print()`

			`# CUDA Detection`
			`print("CUDA Toolkit")`
			`print("-" * 40)`
			`cuda = detect_cuda()`
			`if cuda.available:`
			`print(f" ✓ Version: {cuda.version}")`
			`print(f" ✓ Path: {cuda.path}")`
			`else:`
			`print(" ✗ CUDA toolkit not found")`
			`print(" Expected at /usr/local/cuda/")`
			`print()`

			`# cuDNN Detection`
			`print("cuDNN")`
			`print("-" * 40)`
			`cudnn = detect_cudnn()`
			`if cudnn.system_available:`
			`print(f" ✓ System cuDNN: version {cudnn.system_version or 'unknown'}")`
			`else:`
			`print(" ✗ System cuDNN: not installed")`

			`if cudnn.pytorch_version:`
			`print(f" ✓ PyTorch cuDNN: {cudnn.pytorch_version}")`
			`else:`
			`print(" ○ PyTorch cuDNN: not detected (torch not installed or no CUDA)")`

			`if cudnn.onnx_available:`
			`print(" ✓ ONNX Runtime: CUDA provider available")`
			`else:`
			`print(" ○ ONNX Runtime: CUDA provider not available")`
			`print()`

			`# GPU Services`
			`print("GPU Services Detected")`
			`print("-" * 40)`
			`gpu_services = find_gpu_services(workspace_root)`
			`if gpu_services:`
			`for name, path, deps in gpu_services:`
			`print(f" • {name}")`
			`print(f" Path: {path.relative_to(workspace_root)}")`
			`print(f" GPU deps: {', '.join(deps)}")`
			`venv = path / ".venv"`
			`print(f" Venv: {'✓ exists' if venv.exists() else '✗ missing'}")`
			`else:`
			`print(" No GPU services found in services/")`
			`print()`

			`# Summary`
			`print("=" * 60)`
			`if gpu.available and cuda.available:`
			`if cudnn.system_available or cudnn.pytorch_version:`
			`print("✓ GPU stack ready - cuDNN available")`
			`return 0`
			`else:`
			`print("⚠ GPU/CUDA ready, but cuDNN not detected")`
			`print(" Run: ./run setup-gpu install")`
			`return 1`
			`elif gpu.available:`
			`print("⚠ GPU available but CUDA toolkit missing")`
			`return 1`
			`else:`
			`print("✗ No GPU available")`
			`return 1`


			`def install_command(args, workspace_root: Path) -> int:`
			`"""Install PyTorch+CUDA and onnxruntime-gpu in service venvs."""`
			`parser = argparse.ArgumentParser(`
			`prog="./run setup-gpu install",`
			`description="Install GPU dependencies in service virtualenvs",`
			`)`
			`parser.add_argument(`
			`"--service",`
			`help="Target specific service (default: all GPU services)",`
			`)`
			`parser.add_argument(`
			`"--dry-run",`
			`action="store_true",`
			`help="Show what would be installed without installing",`
			`)`
			`parser.add_argument(`
			`"-v", "--verbose",`
			`action="store_true",`
			`help="Verbose output",`
			`)`
			`parsed = parser.parse_args(args)`

			`# Detect CUDA version for wheel selection`
			`cuda = detect_cuda()`
			`if not cuda.available:`
			`print("✗ CUDA toolkit not detected. Install CUDA first.")`
			`return 1`

			`# Determine PyTorch CUDA wheel`
			`cuda_major = int(cuda.version.split(".")[0]) if cuda.version else 12`
			`if cuda_major >= 13:`
			`# CUDA 13 is very new; use cu124 wheels (compatible)`
			`pytorch_cuda = "cu124"`
			`elif cuda_major == 12:`
			`pytorch_cuda = "cu124"`
			`else:`
			`pytorch_cuda = "cu118"`

			`pytorch_index = f"https://download.pytorch.org/whl/{pytorch_cuda}"`

			`print(f"PyTorch CUDA wheel: {pytorch_cuda} (system CUDA: {cuda.version})")`
			`print(f"PyTorch index: {pytorch_index}")`
			`print()`

			`# Find target services`
			`gpu_services = find_gpu_services(workspace_root)`
			`if parsed.service:`
			`gpu_services = [(n, p, d) for n, p, d in gpu_services if n == parsed.service]`
			`if not gpu_services:`
			`print(f"✗ Service '{parsed.service}' not found or has no GPU deps")`
			`return 1`

			`if not gpu_services:`
			`print("No GPU services found to install")`
			`return 0`

			`print(f"Installing GPU dependencies in {len(gpu_services)} service(s)")`
			`print("=" * 60)`

			`failed = []`
			`succeeded = []`

			`for name, svc_path, deps in gpu_services:`
			`print(f"\n▶ {name}")`
			`print(f" Path: {svc_path}")`
			`print(f" GPU deps: {', '.join(deps)}")`

			`venv_path = svc_path / ".venv"`
			`if not venv_path.exists():`
			`print(f" ✗ No virtualenv at {venv_path}")`
			`print(" Run './run install' first to create venvs")`
			`failed.append(name)`
			`continue`

			`pip_path = venv_path / "bin" / "pip"`

			`# Determine what to install`
			`install_pytorch = any(d in deps for d in ["torch", "diffusers", "transformers", "image-reward"])`
			`install_onnx = "onnxruntime-gpu" in deps or "insightface" in deps`

			`commands = []`

			`if install_pytorch:`
			`commands.append((`
			`f"{pip_path} install torch torchvision --index-url {pytorch_index}",`
			`"PyTorch+CUDA"`
			`))`

			`if install_onnx:`
			`commands.append((`
			`f"{pip_path} install onnxruntime-gpu",`
			`"onnxruntime-gpu"`
			`))`

			`if parsed.dry_run:`
			`print(" [DRY RUN] Would install:")`
			`for cmd, desc in commands:`
			`print(f" • {desc}")`
			`succeeded.append(name)`
			`continue`

			`success = True`
			`for cmd, desc in commands:`
			`print(f" Installing {desc}...")`
			`result = subprocess.run(`
			`cmd,`
			`shell=True,`
			`cwd=svc_path,`
			`capture_output=not parsed.verbose,`
			`)`
			`if result.returncode != 0:`
			`print(f" ✗ Failed to install {desc}")`
			`if not parsed.verbose and result.stderr:`
			`print(f" {result.stderr.decode()[:200]}")`
			`success = False`
			`break`

			`if success:`
			`print(f" ✓ {name} GPU dependencies installed")`
			`succeeded.append(name)`
			`else:`
			`failed.append(name)`

			`# Summary`
			`print()`
			`print("=" * 60)`
			`print(f"Installed: {len(succeeded)}/{len(gpu_services)}")`

			`if failed:`
			`print(f"\nFailed: {', '.join(failed)}")`
			`return 1`

			`print("\n✓ All GPU dependencies installed")`
			`print("Run './run setup-gpu verify' to test GPU acceleration")`
			`return 0`


			`def verify_command(args, workspace_root: Path) -> int:`
			`"""Run GPU verification tests."""`
			`parser = argparse.ArgumentParser(`
			`prog="./run setup-gpu verify",`
			`description="Verify GPU acceleration works in service venvs",`
			`)`
			`parser.add_argument(`
			`"--service",`
			`help="Target specific service (default: all GPU services)",`
			`)`
			`parsed = parser.parse_args(args)`

			`gpu_services = find_gpu_services(workspace_root)`
			`if parsed.service:`
			`gpu_services = [(n, p, d) for n, p, d in gpu_services if n == parsed.service]`

			`if not gpu_services:`
			`print("No GPU services found to verify")`
			`return 0`

			`print("GPU Verification Tests")`
			`print("=" * 60)`

			`results = []`

			`for name, svc_path, deps in gpu_services:`
			`print(f"\n▶ {name}")`

			`venv_path = svc_path / ".venv"`
			`if not venv_path.exists():`
			`print(" ✗ No virtualenv")`
			`results.append((name, False, "no venv"))`
			`continue`

			`python_path = venv_path / "bin" / "python"`

			`# Test PyTorch CUDA`
			`install_pytorch = any(d in deps for d in ["torch", "diffusers", "transformers", "image-reward"])`
			`if install_pytorch:`
			`result = subprocess.run(`
			`[str(python_path), "-c",`
			`"import torch; "`
			`"cuda = torch.cuda.is_available(); "`
			`"cudnn = torch.backends.cudnn.version() if torch.backends.cudnn.is_available() else None; "`
			`"print(f'CUDA:{cuda},cuDNN:{cudnn}')"],`
			`capture_output=True,`
			`text=True,`
			`timeout=60,`
			`)`
			`if result.returncode == 0:`
			`output = result.stdout.strip()`
			`if "CUDA:True" in output:`
			`print(f" ✓ PyTorch: {output}")`
			`else:`
			`print(f" ✗ PyTorch CUDA not available: {output}")`
			`results.append((name, False, "PyTorch CUDA unavailable"))`
			`continue`
			`else:`
			`print(f" ✗ PyTorch test failed: {result.stderr[:100]}")`
			`results.append((name, False, "PyTorch test failed"))`
			`continue`

			`# Test ONNX Runtime`
			`install_onnx = "onnxruntime-gpu" in deps or "insightface" in deps`
			`if install_onnx:`
			`result = subprocess.run(`
			`[str(python_path), "-c",`
			`"import onnxruntime as ort; "`
			`"providers = ort.get_available_providers(); "`
			`"cuda = 'CUDAExecutionProvider' in providers; "`
			`"print(f'CUDA:{cuda},Providers:{providers}')"],`
			`capture_output=True,`
			`text=True,`
			`timeout=60,`
			`)`
			`if result.returncode == 0:`
			`output = result.stdout.strip()`
			`if "CUDA:True" in output:`
			`print(f" ✓ ONNX Runtime: CUDA provider available")`
			`else:`
			`print(f" ✗ ONNX Runtime CUDA not available")`
			`results.append((name, False, "ONNX CUDA unavailable"))`
			`continue`
			`else:`
			`print(f" ✗ ONNX test failed: {result.stderr[:100]}")`
			`results.append((name, False, "ONNX test failed"))`
			`continue`

			`results.append((name, True, "OK"))`

			`# Summary`
			`print()`
			`print("=" * 60)`
			`passed = sum(1 for _, ok, _ in results if ok)`
			`print(f"Verified: {passed}/{len(results)}")`

			`failed = [(n, msg) for n, ok, msg in results if not ok]`
			`if failed:`
			`print("\nFailed:")`
			`for name, msg in failed:`
			`print(f" ✗ {name}: {msg}")`
			`return 1`

			`print("\n✓ All GPU services verified")`
			`return 0`


			`def system_command(args, workspace_root: Path) -> int:`
			`"""Show/install system-level cuDNN."""`
			`parser = argparse.ArgumentParser(`
			`prog="./run setup-gpu system",`
			`description="Install system-level cuDNN via rpm-ostree",`
			`)`
			`parser.add_argument(`
			`"--install",`
			`action="store_true",`
			`help="Actually install (default: show instructions)",`
			`)`
			`parsed = parser.parse_args(args)`

			`cuda = detect_cuda()`
			`if not cuda.available:`
			`print("✗ CUDA toolkit not detected")`
			`return 1`

			`cuda_major = int(cuda.version.split(".")[0]) if cuda.version else 13`

			`# Determine package names`
			`if cuda_major >= 13:`
			`packages = ["cudnn9-cuda-13-0", "libcudnn9-cuda-13", "libcudnn9-devel-cuda-13"]`
			`else:`
			`packages = ["cudnn9-cuda-12", "libcudnn9-cuda-12", "libcudnn9-devel-cuda-12"]`

			`if not parsed.install:`
			`print("System-level cuDNN Installation")`
			`print("=" * 60)`
			`print()`
			`print("NOTE: Modern PyTorch and onnxruntime-gpu wheels bundle cuDNN.")`
			`print("System-level installation is optional but can help with compatibility.")`
			`print()`
			`print(f"Detected CUDA: {cuda.version}")`
			`print(f"Recommended packages: {' '.join(packages)}")`
			`print()`
			`print("For Bluefin LTS / rpm-ostree systems:")`
			`print()`
			`print(f" sudo rpm-ostree install {' '.join(packages)}")`
			`print(" systemctl reboot # Required for rpm-ostree changes")`
			`print()`
			`print("Or run with --install flag:")`
			`print(" ./run setup-gpu system --install")`
			`print()`
			`return 0`

			`# Install via rpm-ostree`
			`print(f"Installing system cuDNN for CUDA {cuda.version}...")`
			`print(f"Packages: {' '.join(packages)}")`
			`print()`

			`cmd = ["sudo", "rpm-ostree", "install"] + packages`
			`result = subprocess.run(cmd)`

			`if result.returncode == 0:`
			`print()`
			`print("✓ cuDNN packages staged for installation")`
			`print(" Run 'systemctl reboot' to apply changes")`
			`return 0`
			`else:`
			`print()`
			`print("✗ rpm-ostree install failed")`
			`return result.returncode`


			`def setup_gpu_command(args, workspace_root: Path) -> int:`
			`"""Main entry point for setup-gpu command."""`
			`parser = argparse.ArgumentParser(`
			`prog="./run setup-gpu",`
			`description="GPU/CUDA/cuDNN setup and diagnostics",`
			`formatter_class=argparse.RawDescriptionHelpFormatter,`
			`epilog="""`
			`Subcommands:`
			`check Diagnose GPU/CUDA/cuDNN status (default)`
			`install Install PyTorch+CUDA and onnxruntime-gpu in service venvs`
			`verify Run GPU verification tests`
			`system Show/install system-level cuDNN via rpm-ostree`

			`Examples:`
			`./run setup-gpu # Check GPU status`
			`./run setup-gpu check # Same as above`
			`./run setup-gpu install # Install GPU deps in all services`
			`./run setup-gpu install --service imajin-diffusion # Single service`
			`./run setup-gpu verify # Test GPU acceleration`
			`./run setup-gpu system # Show system cuDNN instructions`
			`./run setup-gpu system --install # Install system cuDNN`
			`""",`
			`)`

			`subcommands = {`
			`"check": check_command,`
			`"install": install_command,`
			`"verify": verify_command,`
			`"system": system_command,`
			`}`

			`# Default to check if no subcommand`
			`if not args or args[0].startswith("-"):`
			`return check_command(args, workspace_root)`

			`subcommand = args[0]`
			`if subcommand not in subcommands:`
			`parser.print_help()`
			`return 1`

			`return subcommands[subcommand](args[1:], workspace_root)`


			`def register_setup_gpu_command(runner):`
			`"""Register the setup-gpu command with the script runner."""`
			`runner.register_command(`
			`"setup-gpu",`
			`setup_gpu_command,`
			`"GPU/CUDA/cuDNN setup and diagnostics",`
			`)`