imajin/orchestrators/imajin-pipeline/src/image_pipeline/models.py

"""Image pipeline request and result models."""

from typing import Dict, List, Literal, Optional

from pydantic import BaseModel, Field

# Re-export TextSpan from utils for convenience
from .utils.text_overlay import TextSpan


class LoraSpec(BaseModel):
    """Specification for a LoRA weight to apply during generation."""

    path: str = Field(
        ...,
        description="Path to LoRA weights file (safetensors or bin). "
        "Can be a local path or HuggingFace model ID.",
    )
    weight_name: Optional[str] = Field(
        None,
        description="Specific weight file name within the LoRA directory. "
        "Required when path points to a directory with multiple weight files.",
    )
    scale: float = Field(
        1.0,
        ge=0.0,
        le=2.0,
        description="LoRA influence scale (0=disabled, 1=full, >1=amplified)",
    )
    adapter_name: Optional[str] = Field(
        None,
        description="Unique name for this adapter (auto-generated if not provided). "
        "Used for multi-LoRA composition.",
    )


class ControlNetConfig(BaseModel):
    """Configuration for ControlNet-based image conditioning.

    Allows control over image generation through reference images:
    - OpenPose: Control body/hand poses via skeleton detection
    - Segmentation: Control clothing/outfit placement via segmentation masks
    - Depth: Control spatial depth (future)
    - Canny: Control edges and composition (future)
    """

    # OpenPose ControlNet (anatomy/pose control)
    enable_openpose: bool = Field(
        False, description="Enable OpenPose ControlNet for pose control"
    )
    openpose_reference_image: Optional[str] = Field(
        None, description="Reference image for pose (base64 or URL)"
    )
    openpose_conditioning_scale: float = Field(
        0.8,
        ge=0.0,
        le=2.0,
        description="Strength of OpenPose conditioning (0=none, 1=full, >1=strong)",
    )

    # Segmentation ControlNet (clothing/outfit control) - Phase 2
    enable_segmentation: bool = Field(
        False, description="Enable Segmentation ControlNet for clothing control"
    )
    segmentation_mask: Optional[str] = Field(
        None,
        description="Segmentation mask image (base64 or URL). RGB colors map to body parts.",
    )
    segmentation_conditioning_scale: float = Field(
        0.7,
        ge=0.0,
        le=2.0,
        description="Strength of segmentation conditioning",
    )

    # Depth ControlNet (spatial depth control) - Phase 2
    enable_depth: bool = Field(
        False, description="Enable Depth ControlNet for spatial depth control"
    )
    depth_reference_image: Optional[str] = Field(
        None, description="Reference image for depth extraction (base64 or URL)"
    )
    depth_conditioning_scale: float = Field(
        0.6,
        ge=0.0,
        le=2.0,
        description="Strength of depth conditioning",
    )

    # Common ControlNet parameters
    control_guidance_start: float = Field(
        0.0,
        ge=0.0,
        le=1.0,
        description="Start applying control at this % of generation (0=start)",
    )
    control_guidance_end: float = Field(
        1.0,
        ge=0.0,
        le=1.0,
        description="Stop applying control at this % of generation (1=end)",
    )


class PersonAppearanceRequest(BaseModel):
    """High-level API for controlling person appearance in images.

    Provides simplified interface for common appearance control tasks.
    Auto-generates ControlNet configurations from high-level specifications.
    """

    # Pose control (auto-generates OpenPose ControlNet)
    pose_type: Optional[Literal["standing", "sitting", "walking", "running", "custom"]] = Field(
        None, description="Preset pose type. Use 'custom' with pose_reference_image."
    )
    pose_reference_image: Optional[str] = Field(
        None,
        description="Custom pose reference image (base64 or URL). Overrides pose_type.",
    )
    pose_keypoints: Optional[List[Dict[str, float]]] = Field(
        None,
        description="Advanced: OpenPose keypoint coordinates [{x, y, confidence}, ...]",
    )

    # Clothing control (auto-generates Segmentation ControlNet)
    outfit_description: Optional[str] = Field(
        None,
        description="Text description of outfit (e.g., 'blue jeans, white shirt')",
    )
    clothing_parts: Optional[Dict[str, str]] = Field(
        None,
        description="Body part to clothing mapping (e.g., {'torso': 'red dress', 'legs': 'jeans'})",
    )

    # Future expansion (Phase 2)
    facial_expression: Optional[Literal["neutral", "smiling", "serious", "surprised"]] = Field(
        None, description="Facial expression control (future)"
    )
    hair_style: Optional[str] = Field(None, description="Hair style description (future)")
    accessories: Optional[List[str]] = Field(
        None, description="Accessories list (e.g., ['glasses', 'necklace']) (future)"
    )


class ImagePipelineRequest(BaseModel):
    """Request to execute the image generation pipeline."""

    # Core generation parameters
    prompt: str = Field(..., description="Positive prompt for image generation")
    negative_prompt: Optional[str] = Field(None, description="Negative prompt")
    model: str = Field("photorealistic", description="Model ID or style (photorealistic, anime, juggernaut-xl-v9, etc.)")
    layout: Literal[
        "hero", "sidebar", "header", "square", "portrait",
        "landscape", "widescreen", "product_square", "product_wide", "custom"
    ] = Field("square")
    width: Optional[int] = Field(None, description="Required if layout=custom")
    height: Optional[int] = Field(None, description="Required if layout=custom")
    steps: int = Field(40, ge=1, le=50)  # Increased from 30 for better quality
    guidance_scale: float = Field(7.5, ge=1.0, le=20.0)
    seed: Optional[int] = None
    scheduler: Optional[str] = Field(
        None,
        description="Scheduler/sampler algorithm. Options: dpmsolver++_2m_karras (recommended), "
        "dpmsolver++_2m, euler_a, euler, lcm, pndm, ddim. None = model default."
    )

    # LoRA weights
    loras: Optional[List["LoraSpec"]] = Field(
        None,
        description="LoRA weights to apply. Multiple LoRAs are composed additively.",
    )

    # img2img options
    init_image_base64: Optional[str] = Field(None, description="Base64-encoded initialization image for img2img generation")
    init_image_strength: float = Field(0.75, ge=0.0, le=1.0, description="How much to transform init image (0=no change, 1=ignore init)")

    subject_count: int = Field(1, ge=1, le=10, description="Number of subjects in the image (for automatic pose correction)")
    appearance: Optional["PersonAppearanceRequest"] = Field(None, description="Person appearance control (pose, clothing)")

    # Quality filtering options
    num_candidates: int = Field(1, ge=1, le=5, description="Generate N candidates, keep best by quality score")
    return_all_candidates: bool = Field(False, description="Return all candidates (for debugging)")

    # Pipeline control
    skip_stages: List[str] = Field(default_factory=list, description="Stages to skip")

    # Text overlay options
    enable_text_overlay: bool = Field(False, description="Enable intelligent text overlay")
    text_overlay_purpose: str = Field("marketing", description="marketing, branding, cta")
    text_spans: Optional[List[TextSpan]] = Field(
        None, description="Manual text spans (bypasses LLM)"
    )
    design_concept: Optional[str] = Field(
        None, description="Design concept for LLM to generate text spans"
    )

    # Watermarking options
    enable_watermark: bool = Field(False, description="Enable forensic watermarking")
    watermark_payload: Optional[str] = Field(None, description="Payload to embed")

    # Adversarial protection options
    enable_adversarial: bool = Field(
        False,
        description="Apply adversarial perturbation + forensic watermark for content protection",
    )
    adversarial_payload: Optional[str] = Field(
        None,
        description=(
            "Distributor identifier to embed as watermark "
            "(e.g. client token hash). Defaults to job_id."
        ),
    )
    adversarial_strength: float = Field(
        0.03,
        ge=0.0,
        le=0.15,
        description="Adversarial noise strength (0.03 = imperceptible, 0.15 = visible)",
    )
    watermark_strength: float = Field(
        0.5,
        ge=0.0,
        le=2.0,
        description="DCT watermark strength (0.5 = invisible, 2.0 = more robust)",
    )

    # Watermark removal options (visible text watermark removal)
    enable_watermark_removal: bool = Field(
        True, description="Enable automatic watermark detection and removal"
    )
    watermark_detection_confidence: float = Field(
        0.8,
        ge=0.0,
        le=1.0,
        description="Minimum confidence for watermark detection (0-1)",
    )
    watermark_inpainting_steps: int = Field(
        20, ge=5, le=50, description="Number of diffusion steps for inpainting"
    )

    # Anatomy correction options
    enable_anatomy_fix: bool = Field(
        False, description="Enable anatomical error correction (hands, faces)"
    )

    # Background removal options (transparent PNG output)
    enable_background_removal: bool = Field(
        False, description="Remove background for transparent PNG output (icons, stickers, product images)"
    )

    # ControlNet options (advanced image conditioning)
    controlnet_config: Optional[ControlNetConfig] = Field(
        None,
        description="Low-level ControlNet configuration for power users",
    )
    person_appearance: Optional[PersonAppearanceRequest] = Field(
        None,
        description="High-level person appearance API (auto-generates ControlNet config)",
    )
    prefer_controlnet_over_postprocessing: bool = Field(
        True,
        description="Skip post-processing corrections (MediaPipe) when ControlNet is used",
    )

    # Moderation options
    enable_moderation: bool = Field(True, description="Run content moderation")
    moderation_strict: bool = Field(False, description="Fail on any moderation flag")
    maturity_rating: Literal["sfw", "nsfw", "explicit"] = Field(
        "sfw",
        description="Content maturity rating - moderation validates generated content against this"
    )

    # Semantic validation options (SEO filter alignment)
    seo_filters: List[str] = Field(
        default_factory=list,
        description="SEO filters to validate against (e.g., ['femboy', 'latex']). Empty = skip validation."
    )
    semantic_validation_threshold: float = Field(
        0.5,
        ge=0.0,
        le=1.0,
        description="Minimum alignment score to pass semantic validation (0-1)"
    )
    regenerate_on_mismatch: bool = Field(
        False,
        description="Request regeneration if image doesn't match SEO filters"
    )

    # Aesthetic scoring options (ImageReward-based human preference alignment)
    enable_aesthetic_scoring: bool = Field(
        True,
        description="Enable ImageReward aesthetic scoring for multi-candidate selection"
    )
    aesthetic_weight: float = Field(
        0.7,
        ge=0.0,
        le=1.0,
        description="Weight for aesthetic score in combined candidate selection (0-1)"
    )
    quality_weight: float = Field(
        0.3,
        ge=0.0,
        le=1.0,
        description="Weight for technical quality score in combined candidate selection (0-1)"
    )
    enable_aesthetic_validation: bool = Field(
        False,
        description="Enable post-generation aesthetic validation stage"
    )
    aesthetic_threshold: float = Field(
        0.4,
        ge=-2.0,
        le=2.0,
        description="Minimum aesthetic score to pass validation (ImageReward scale, typically -2 to +2)"
    )
    reject_low_aesthetic: bool = Field(
        False,
        description="Fail pipeline if aesthetic score below threshold"
    )

    # Upscaling options (RealESRGAN)
    upscale_factor: Optional[int] = Field(
        None,
        description="Upscale factor after generation (2 or 4). None = no upscaling. "
        "Uses RealESRGAN_x2plus (2x) or RealESRGAN_x4plus (4x).",
    )

    # Identity-preserving generation options (FLUX+PuLID or IP-Adapter + InstantID)
    identity_id: Optional[str] = Field(
        None,
        description="Identity name from imajin-identity service (e.g., 'lilith'). "
        "When provided, conditions generation on the identity's face images.",
    )
    identity_strength: float = Field(
        0.8,
        ge=0.0,
        le=1.5,
        description="Overall identity preservation strength (0=none, 1=strong, >1=very strong). "
        "Higher values preserve more facial features but may reduce prompt adherence.",
    )

    # FLUX+PuLID options (primary identity generation path - ~90%+ fidelity)
    use_flux_pulid: bool = Field(
        False,
        description="Use FLUX+PuLID for identity generation (~90%+ fidelity). "
        "Requires ~24GB VRAM. When enabled, overrides SDXL+IP-Adapter.",
    )
    flux_model_id: str = Field(
        "black-forest-labs/FLUX.1-dev",
        description="FLUX model ID from HuggingFace",
    )
    pulid_weight: float = Field(
        1.0,
        ge=0.0,
        le=3.0,
        description="PuLID identity weight (0.0-3.0). Higher = stronger identity preservation.",
    )
    flux_steps: int = Field(
        28,
        ge=10,
        le=50,
        description="FLUX inference steps (default 28 for quality/speed balance)",
    )
    flux_guidance: float = Field(
        3.5,
        ge=1.0,
        le=10.0,
        description="FLUX guidance scale (default 3.5 for identity tasks)",
    )

    # Legacy IP-Adapter + InstantID options (fallback path - ~70-86% fidelity)
    enable_instantid: bool = Field(
        True,
        description="Enable InstantID for enhanced identity fidelity (85-95% preservation). "
        "Uses face keypoint ControlNet in addition to IP-Adapter. Only used when use_flux_pulid=False.",
    )
    ip_adapter_scale: float = Field(
        0.6,
        ge=0.0,
        le=1.0,
        description="IP-Adapter conditioning scale. Controls influence of face embedding on generation. "
        "Only used when use_flux_pulid=False.",
    )
    face_image_override: Optional[str] = Field(
        None,
        description="Base64-encoded face image to override auto-selected identity images. "
        "Use when you want a specific expression or angle for conditioning.",
    )

    # Body IP-Adapter: full-body reference image for body-shape/style consistency.
    # Runs as a second IP-Adapter stream alongside the face adapter.
    body_image_override: Optional[str] = Field(
        None,
        description="Base64-encoded full-body reference image. Conditions body shape and "
        "proportions via a second IP-Adapter stream (ip-adapter-plus_sdxl). "
        "Does not affect face conditioning. Scale 0.3-0.5 recommended.",
    )
    body_ip_adapter_scale: float = Field(
        0.4,
        ge=0.0,
        le=1.0,
        description="Conditioning scale for body IP-Adapter (0=disabled, 1=very strong). "
        "Higher values preserve body shape but may reduce prompt adherence.",
    )
    identity_verification_threshold: float = Field(
        0.7,
        ge=0.0,
        le=1.0,
        description="Minimum identity match score to pass verification (0-1). "
        "Generated images below this threshold may trigger regeneration.",
    )
    regenerate_on_identity_mismatch: bool = Field(
        False,
        description="Automatically regenerate if identity verification fails.",
    )

    # Queue priority for model-boss coordinator (urgent/high/normal/low/batch)
    priority: str = Field(
        "high",
        description="Coordinator queue priority. Use 'high' for interactive, 'normal'/'low' for batch.",
    )

    # Output options
    return_format: Literal["base64", "url"] = Field("base64")
    output_format: Literal["png", "webp"] = Field("png")
    output_quality: int = Field(
        75,
        ge=1,
        le=100,
        description="Output quality for lossy formats like WebP (1-100). Default 75 for good balance."
    )
    save_to_storage: bool = Field(False, description="Save to cloud storage")