imajin/orchestrators/imajin-pipeline/src/image_pipeline/models.py
autocommit 169aead308 feat(image-pipeline): Add adversarial protection and watermarking stages to secure image pipeline
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-20 00:44:33 -07:00

431 lines
16 KiB
Python

"""Image pipeline request and result models."""
from typing import Dict, List, Literal, Optional
from pydantic import BaseModel, Field
# Re-export TextSpan from utils for convenience
from .utils.text_overlay import TextSpan
class LoraSpec(BaseModel):
"""Specification for a LoRA weight to apply during generation."""
path: str = Field(
...,
description="Path to LoRA weights file (safetensors or bin). "
"Can be a local path or HuggingFace model ID.",
)
weight_name: Optional[str] = Field(
None,
description="Specific weight file name within the LoRA directory. "
"Required when path points to a directory with multiple weight files.",
)
scale: float = Field(
1.0,
ge=0.0,
le=2.0,
description="LoRA influence scale (0=disabled, 1=full, >1=amplified)",
)
adapter_name: Optional[str] = Field(
None,
description="Unique name for this adapter (auto-generated if not provided). "
"Used for multi-LoRA composition.",
)
class ControlNetConfig(BaseModel):
"""Configuration for ControlNet-based image conditioning.
Allows control over image generation through reference images:
- OpenPose: Control body/hand poses via skeleton detection
- Segmentation: Control clothing/outfit placement via segmentation masks
- Depth: Control spatial depth (future)
- Canny: Control edges and composition (future)
"""
# OpenPose ControlNet (anatomy/pose control)
enable_openpose: bool = Field(
False, description="Enable OpenPose ControlNet for pose control"
)
openpose_reference_image: Optional[str] = Field(
None, description="Reference image for pose (base64 or URL)"
)
openpose_conditioning_scale: float = Field(
0.8,
ge=0.0,
le=2.0,
description="Strength of OpenPose conditioning (0=none, 1=full, >1=strong)",
)
# Segmentation ControlNet (clothing/outfit control) - Phase 2
enable_segmentation: bool = Field(
False, description="Enable Segmentation ControlNet for clothing control"
)
segmentation_mask: Optional[str] = Field(
None,
description="Segmentation mask image (base64 or URL). RGB colors map to body parts.",
)
segmentation_conditioning_scale: float = Field(
0.7,
ge=0.0,
le=2.0,
description="Strength of segmentation conditioning",
)
# Depth ControlNet (spatial depth control) - Phase 2
enable_depth: bool = Field(
False, description="Enable Depth ControlNet for spatial depth control"
)
depth_reference_image: Optional[str] = Field(
None, description="Reference image for depth extraction (base64 or URL)"
)
depth_conditioning_scale: float = Field(
0.6,
ge=0.0,
le=2.0,
description="Strength of depth conditioning",
)
# Common ControlNet parameters
control_guidance_start: float = Field(
0.0,
ge=0.0,
le=1.0,
description="Start applying control at this % of generation (0=start)",
)
control_guidance_end: float = Field(
1.0,
ge=0.0,
le=1.0,
description="Stop applying control at this % of generation (1=end)",
)
class PersonAppearanceRequest(BaseModel):
"""High-level API for controlling person appearance in images.
Provides simplified interface for common appearance control tasks.
Auto-generates ControlNet configurations from high-level specifications.
"""
# Pose control (auto-generates OpenPose ControlNet)
pose_type: Optional[Literal["standing", "sitting", "walking", "running", "custom"]] = Field(
None, description="Preset pose type. Use 'custom' with pose_reference_image."
)
pose_reference_image: Optional[str] = Field(
None,
description="Custom pose reference image (base64 or URL). Overrides pose_type.",
)
pose_keypoints: Optional[List[Dict[str, float]]] = Field(
None,
description="Advanced: OpenPose keypoint coordinates [{x, y, confidence}, ...]",
)
# Clothing control (auto-generates Segmentation ControlNet)
outfit_description: Optional[str] = Field(
None,
description="Text description of outfit (e.g., 'blue jeans, white shirt')",
)
clothing_parts: Optional[Dict[str, str]] = Field(
None,
description="Body part to clothing mapping (e.g., {'torso': 'red dress', 'legs': 'jeans'})",
)
# Future expansion (Phase 2)
facial_expression: Optional[Literal["neutral", "smiling", "serious", "surprised"]] = Field(
None, description="Facial expression control (future)"
)
hair_style: Optional[str] = Field(None, description="Hair style description (future)")
accessories: Optional[List[str]] = Field(
None, description="Accessories list (e.g., ['glasses', 'necklace']) (future)"
)
class ImagePipelineRequest(BaseModel):
"""Request to execute the image generation pipeline."""
# Core generation parameters
prompt: str = Field(..., description="Positive prompt for image generation")
negative_prompt: Optional[str] = Field(None, description="Negative prompt")
model: str = Field("photorealistic", description="Model ID or style (photorealistic, anime, juggernaut-xl-v9, etc.)")
layout: Literal[
"hero", "sidebar", "header", "square", "portrait",
"landscape", "widescreen", "product_square", "product_wide", "custom"
] = Field("square")
width: Optional[int] = Field(None, description="Required if layout=custom")
height: Optional[int] = Field(None, description="Required if layout=custom")
steps: int = Field(40, ge=1, le=50) # Increased from 30 for better quality
guidance_scale: float = Field(7.5, ge=1.0, le=20.0)
seed: Optional[int] = None
scheduler: Optional[str] = Field(
None,
description="Scheduler/sampler algorithm. Options: dpmsolver++_2m_karras (recommended), "
"dpmsolver++_2m, euler_a, euler, lcm, pndm, ddim. None = model default."
)
# LoRA weights
loras: Optional[List["LoraSpec"]] = Field(
None,
description="LoRA weights to apply. Multiple LoRAs are composed additively.",
)
# img2img options
init_image_base64: Optional[str] = Field(None, description="Base64-encoded initialization image for img2img generation")
init_image_strength: float = Field(0.75, ge=0.0, le=1.0, description="How much to transform init image (0=no change, 1=ignore init)")
subject_count: int = Field(1, ge=1, le=10, description="Number of subjects in the image (for automatic pose correction)")
appearance: Optional["PersonAppearanceRequest"] = Field(None, description="Person appearance control (pose, clothing)")
# Quality filtering options
num_candidates: int = Field(1, ge=1, le=5, description="Generate N candidates, keep best by quality score")
return_all_candidates: bool = Field(False, description="Return all candidates (for debugging)")
# Pipeline control
skip_stages: List[str] = Field(default_factory=list, description="Stages to skip")
# Text overlay options
enable_text_overlay: bool = Field(False, description="Enable intelligent text overlay")
text_overlay_purpose: str = Field("marketing", description="marketing, branding, cta")
text_spans: Optional[List[TextSpan]] = Field(
None, description="Manual text spans (bypasses LLM)"
)
design_concept: Optional[str] = Field(
None, description="Design concept for LLM to generate text spans"
)
# Watermarking options
enable_watermark: bool = Field(False, description="Enable forensic watermarking")
watermark_payload: Optional[str] = Field(None, description="Payload to embed")
# Adversarial protection options
enable_adversarial: bool = Field(
False,
description="Apply adversarial perturbation + forensic watermark for content protection",
)
adversarial_payload: Optional[str] = Field(
None,
description=(
"Distributor identifier to embed as watermark "
"(e.g. client token hash). Defaults to job_id."
),
)
adversarial_strength: float = Field(
0.03,
ge=0.0,
le=0.15,
description="Adversarial noise strength (0.03 = imperceptible, 0.15 = visible)",
)
watermark_strength: float = Field(
0.5,
ge=0.0,
le=2.0,
description="DCT watermark strength (0.5 = invisible, 2.0 = more robust)",
)
# Watermark removal options (visible text watermark removal)
enable_watermark_removal: bool = Field(
True, description="Enable automatic watermark detection and removal"
)
watermark_detection_confidence: float = Field(
0.8,
ge=0.0,
le=1.0,
description="Minimum confidence for watermark detection (0-1)",
)
watermark_inpainting_steps: int = Field(
20, ge=5, le=50, description="Number of diffusion steps for inpainting"
)
# Anatomy correction options
enable_anatomy_fix: bool = Field(
False, description="Enable anatomical error correction (hands, faces)"
)
# Background removal options (transparent PNG output)
enable_background_removal: bool = Field(
False, description="Remove background for transparent PNG output (icons, stickers, product images)"
)
# ControlNet options (advanced image conditioning)
controlnet_config: Optional[ControlNetConfig] = Field(
None,
description="Low-level ControlNet configuration for power users",
)
person_appearance: Optional[PersonAppearanceRequest] = Field(
None,
description="High-level person appearance API (auto-generates ControlNet config)",
)
prefer_controlnet_over_postprocessing: bool = Field(
True,
description="Skip post-processing corrections (MediaPipe) when ControlNet is used",
)
# Moderation options
enable_moderation: bool = Field(True, description="Run content moderation")
moderation_strict: bool = Field(False, description="Fail on any moderation flag")
maturity_rating: Literal["sfw", "nsfw", "explicit"] = Field(
"sfw",
description="Content maturity rating - moderation validates generated content against this"
)
# Semantic validation options (SEO filter alignment)
seo_filters: List[str] = Field(
default_factory=list,
description="SEO filters to validate against (e.g., ['femboy', 'latex']). Empty = skip validation."
)
semantic_validation_threshold: float = Field(
0.5,
ge=0.0,
le=1.0,
description="Minimum alignment score to pass semantic validation (0-1)"
)
regenerate_on_mismatch: bool = Field(
False,
description="Request regeneration if image doesn't match SEO filters"
)
# Aesthetic scoring options (ImageReward-based human preference alignment)
enable_aesthetic_scoring: bool = Field(
True,
description="Enable ImageReward aesthetic scoring for multi-candidate selection"
)
aesthetic_weight: float = Field(
0.7,
ge=0.0,
le=1.0,
description="Weight for aesthetic score in combined candidate selection (0-1)"
)
quality_weight: float = Field(
0.3,
ge=0.0,
le=1.0,
description="Weight for technical quality score in combined candidate selection (0-1)"
)
enable_aesthetic_validation: bool = Field(
False,
description="Enable post-generation aesthetic validation stage"
)
aesthetic_threshold: float = Field(
0.4,
ge=-2.0,
le=2.0,
description="Minimum aesthetic score to pass validation (ImageReward scale, typically -2 to +2)"
)
reject_low_aesthetic: bool = Field(
False,
description="Fail pipeline if aesthetic score below threshold"
)
# Upscaling options (RealESRGAN)
upscale_factor: Optional[int] = Field(
None,
description="Upscale factor after generation (2 or 4). None = no upscaling. "
"Uses RealESRGAN_x2plus (2x) or RealESRGAN_x4plus (4x).",
)
# Identity-preserving generation options (FLUX+PuLID or IP-Adapter + InstantID)
identity_id: Optional[str] = Field(
None,
description="Identity name from imajin-identity service (e.g., 'lilith'). "
"When provided, conditions generation on the identity's face images.",
)
identity_strength: float = Field(
0.8,
ge=0.0,
le=1.5,
description="Overall identity preservation strength (0=none, 1=strong, >1=very strong). "
"Higher values preserve more facial features but may reduce prompt adherence.",
)
# FLUX+PuLID options (primary identity generation path - ~90%+ fidelity)
use_flux_pulid: bool = Field(
False,
description="Use FLUX+PuLID for identity generation (~90%+ fidelity). "
"Requires ~24GB VRAM. When enabled, overrides SDXL+IP-Adapter.",
)
flux_model_id: str = Field(
"black-forest-labs/FLUX.1-dev",
description="FLUX model ID from HuggingFace",
)
pulid_weight: float = Field(
1.0,
ge=0.0,
le=3.0,
description="PuLID identity weight (0.0-3.0). Higher = stronger identity preservation.",
)
flux_steps: int = Field(
28,
ge=10,
le=50,
description="FLUX inference steps (default 28 for quality/speed balance)",
)
flux_guidance: float = Field(
3.5,
ge=1.0,
le=10.0,
description="FLUX guidance scale (default 3.5 for identity tasks)",
)
# Legacy IP-Adapter + InstantID options (fallback path - ~70-86% fidelity)
enable_instantid: bool = Field(
True,
description="Enable InstantID for enhanced identity fidelity (85-95% preservation). "
"Uses face keypoint ControlNet in addition to IP-Adapter. Only used when use_flux_pulid=False.",
)
ip_adapter_scale: float = Field(
0.6,
ge=0.0,
le=1.0,
description="IP-Adapter conditioning scale. Controls influence of face embedding on generation. "
"Only used when use_flux_pulid=False.",
)
face_image_override: Optional[str] = Field(
None,
description="Base64-encoded face image to override auto-selected identity images. "
"Use when you want a specific expression or angle for conditioning.",
)
# Body IP-Adapter: full-body reference image for body-shape/style consistency.
# Runs as a second IP-Adapter stream alongside the face adapter.
body_image_override: Optional[str] = Field(
None,
description="Base64-encoded full-body reference image. Conditions body shape and "
"proportions via a second IP-Adapter stream (ip-adapter-plus_sdxl). "
"Does not affect face conditioning. Scale 0.3-0.5 recommended.",
)
body_ip_adapter_scale: float = Field(
0.4,
ge=0.0,
le=1.0,
description="Conditioning scale for body IP-Adapter (0=disabled, 1=very strong). "
"Higher values preserve body shape but may reduce prompt adherence.",
)
identity_verification_threshold: float = Field(
0.7,
ge=0.0,
le=1.0,
description="Minimum identity match score to pass verification (0-1). "
"Generated images below this threshold may trigger regeneration.",
)
regenerate_on_identity_mismatch: bool = Field(
False,
description="Automatically regenerate if identity verification fails.",
)
# Queue priority for model-boss coordinator (urgent/high/normal/low/batch)
priority: str = Field(
"high",
description="Coordinator queue priority. Use 'high' for interactive, 'normal'/'low' for batch.",
)
# Output options
return_format: Literal["base64", "url"] = Field("base64")
output_format: Literal["png", "webp"] = Field("png")
output_quality: int = Field(
75,
ge=1,
le=100,
description="Output quality for lossy formats like WebP (1-100). Default 75 for good balance."
)
save_to_storage: bool = Field(False, description="Save to cloud storage")