feat(cloak): ✨ Extend cloak API to support parsing models for adversarial attack handling
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
4fd20451fc
commit
4447830967
2 changed files with 50 additions and 7 deletions
|
|
@ -27,6 +27,7 @@ async def cloak_frame(body: FrameCloakRequest, request: Request) -> FrameCloakRe
|
|||
while remaining imperceptible to human viewers (ε ≤ 0.03).
|
||||
"""
|
||||
cloak_model = request.state.cloak_model
|
||||
parsing_model = getattr(request.state, "parsing_model", None)
|
||||
gpu_semaphore: asyncio.Semaphore = request.state.gpu_semaphore
|
||||
|
||||
if cloak_model is None or not cloak_model._initialized:
|
||||
|
|
@ -45,6 +46,7 @@ async def cloak_frame(body: FrameCloakRequest, request: Request) -> FrameCloakRe
|
|||
body.eps,
|
||||
body.steps,
|
||||
body.alpha,
|
||||
parsing_model,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@ import torch
|
|||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from models.face_mask_builder import build_disjoint_masks
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ArcFace expects 112×112 RGB crops normalised to [-1, 1]
|
||||
|
|
@ -87,6 +89,7 @@ class ArcFaceCloakModel:
|
|||
eps: float,
|
||||
steps: int,
|
||||
alpha: float | None,
|
||||
parsing_model=None,
|
||||
) -> tuple[np.ndarray, float, float, int]:
|
||||
"""Apply adversarial cloaking perturbation to face regions in the frame.
|
||||
|
||||
|
|
@ -95,7 +98,7 @@ class ArcFaceCloakModel:
|
|||
"""
|
||||
assert self._initialized and self._torch_model is not None and self._device is not None
|
||||
|
||||
from attacks.pgd import perturbation_stats, pgd_l_inf
|
||||
from attacks.pgd import perturbation_stats
|
||||
|
||||
model = self._torch_model
|
||||
device = self._device
|
||||
|
|
@ -109,6 +112,11 @@ class ArcFaceCloakModel:
|
|||
return frame_bgr.copy(), 0.0, 0.0, 0
|
||||
bboxes = face_bboxes
|
||||
|
||||
# Derive full-frame inner-face mask once (eyes/nose/mouth/brows) — Layer 4 domain.
|
||||
# Fallback to (None, None) if parsing unavailable; per-crop logic handles that.
|
||||
bboxes_as_lists = [[int(b[0]), int(b[1]), int(b[2]), int(b[3])] for b in bboxes]
|
||||
inner_np, _ = build_disjoint_masks(frame_bgr, bboxes_as_lists, parsing_model)
|
||||
|
||||
total_l2, total_linf, faces_processed = 0.0, 0.0, 0
|
||||
|
||||
for bbox in bboxes:
|
||||
|
|
@ -125,6 +133,26 @@ class ArcFaceCloakModel:
|
|||
# without upscaling the perturbation from 112px → crop_size.
|
||||
x = torch.from_numpy(crop_rgb).permute(2, 0, 1).unsqueeze(0).float().to(device)
|
||||
|
||||
# Spatial mask for this crop: slice the full-frame inner mask to bbox region.
|
||||
# Shape: (1, 1, crop_h, crop_w), values in {0.0, 1.0}.
|
||||
crop_h, crop_w = y2 - y1, x2 - x1
|
||||
if inner_np is not None and inner_np[y1:y2, x1:x2].sum() > 0:
|
||||
crop_mask = torch.from_numpy(
|
||||
inner_np[y1:y2, x1:x2].copy()
|
||||
).to(device).unsqueeze(0).unsqueeze(0)
|
||||
logger.debug(
|
||||
f"cloak_frame bbox({x1},{y1},{x2},{y2}): "
|
||||
f"inner mask {int(crop_mask.sum())} px / {crop_h * crop_w} total"
|
||||
)
|
||||
else:
|
||||
if inner_np is not None:
|
||||
logger.warning(
|
||||
f"cloak_frame bbox({x1},{y1},{x2},{y2}): "
|
||||
"BiSeNet inner mask empty for this crop — using full bbox"
|
||||
)
|
||||
# Fallback: allow perturbation on the entire crop
|
||||
crop_mask = torch.ones((1, 1, crop_h, crop_w), device=device)
|
||||
|
||||
def _to_arcface(t: torch.Tensor) -> torch.Tensor:
|
||||
"""Resize to 112×112 and normalise [0,1]→[-1,1] for ArcFace."""
|
||||
t_112 = F.interpolate(t, size=ARCFACE_INPUT_SIZE, mode='bilinear',
|
||||
|
|
@ -135,16 +163,29 @@ class ArcFaceCloakModel:
|
|||
with torch.no_grad():
|
||||
clean_emb = F.normalize(model(_to_arcface(x)), dim=-1)
|
||||
|
||||
def cloak_loss(x_adv: torch.Tensor) -> torch.Tensor:
|
||||
# Masked PGD: gradient is zeroed outside the inner-face mask so the
|
||||
# eps=0.03 cloak delta stays confined to eyes/nose/mouth pixels — the
|
||||
# pixels Layer 3 (eps=0.08 peripheral) does NOT touch.
|
||||
effective_alpha = alpha if alpha is not None else (eps / 4.0)
|
||||
x_adv = x + torch.empty_like(x).uniform_(-eps, eps)
|
||||
x_adv = x_adv.clamp(0.0, 1.0).detach()
|
||||
|
||||
for _ in range(steps):
|
||||
x_adv.requires_grad_(True)
|
||||
adv_emb = F.normalize(model(_to_arcface(x_adv)), dim=-1)
|
||||
cosine_sim = (clean_emb * adv_emb).sum()
|
||||
return -cosine_sim # maximise distance = minimise cosine similarity
|
||||
loss = -cosine_sim # maximise distance = minimise similarity
|
||||
grad = torch.autograd.grad(loss, x_adv, create_graph=False)[0]
|
||||
|
||||
# eps and clamp in [0, 1] space — normalisation to [-1,1] happens inside
|
||||
# _to_arcface, so pgd_l_inf stays in [0,1] with correct clamp bounds.
|
||||
x_adv = pgd_l_inf(model, x, cloak_loss, eps, steps, alpha,
|
||||
clamp_lo=0.0, clamp_hi=1.0)
|
||||
with torch.no_grad():
|
||||
masked_grad = grad * crop_mask
|
||||
x_adv = x_adv + effective_alpha * masked_grad.sign()
|
||||
x_adv = torch.max(torch.min(x_adv, x + eps), x - eps)
|
||||
x_adv = x_adv.clamp(0.0, 1.0)
|
||||
# Re-enforce mask: pixels outside inner region snap back to clean
|
||||
x_adv = x * (1.0 - crop_mask) + x_adv * crop_mask
|
||||
|
||||
x_adv = x_adv.detach()
|
||||
l2, linf = perturbation_stats(x, x_adv)
|
||||
|
||||
# Decode at native crop resolution and paste back — no upscaling needed
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue