claire/tests/test_chat_nl.py

"""Tests for the natural-language fallback in the Claire chat.

We never hit the real Anthropic API: tests inject a `Mock()` client into
`nl.interpret` or monkeypatch `claire.web.chat.nl.interpret` at the route
boundary.
"""

from __future__ import annotations

from pathlib import Path
from types import SimpleNamespace
from unittest.mock import MagicMock

import pytest
from fastapi.testclient import TestClient

from claire.domain import ChatScope
from claire.web.chat import nl
from claire.web.chat.commands import ScopeCtx


# ---------------------------------------------------------------------------
# Helpers — fabricate Anthropic-shaped message objects without importing the
# SDK. The interpret() loop reads `.stop_reason` and iterates `.content`,
# whose blocks expose `.type`, `.name`, `.input`, `.text`.
# ---------------------------------------------------------------------------


def _tool_use_block(name: str, input_payload: dict) -> SimpleNamespace:
    return SimpleNamespace(type="tool_use", name=name, input=input_payload, text=None)


def _text_block(text: str) -> SimpleNamespace:
    return SimpleNamespace(type="text", text=text, name=None, input=None)


def _fake_message(blocks: list[SimpleNamespace], stop_reason: str = "tool_use"):
    return SimpleNamespace(stop_reason=stop_reason, content=blocks)


def _fake_client(message) -> MagicMock:
    client = MagicMock()
    client.messages.create.return_value = message
    return client


# ---------------------------------------------------------------------------
# Unit tests for nl.interpret / tool_to_slash
# ---------------------------------------------------------------------------


def test_interpret_returns_proposed_action_with_slash() -> None:
    msg = _fake_message(
        [
            _text_block("This looks like a status query."),
            _tool_use_block("status", {}),
        ]
    )
    client = _fake_client(msg)
    ctx = ScopeCtx(scope=ChatScope.ORCHESTRATOR, scope_ref=None)

    action = nl.interpret("how are things looking?", ctx, client=client)

    assert action.slash == "/status"
    assert "status query" in action.explanation
    assert 0.0 <= action.confidence <= 1.0
    # System prompt + tool schema should have been passed.
    kwargs = client.messages.create.call_args.kwargs
    assert kwargs["tool_choice"] == {"type": "any"}
    assert any(t["name"] == "status" for t in kwargs["tools"])
    assert kwargs["system"][0].get("cache_control") == {"type": "ephemeral"}


def test_interpret_nl_unavailable_without_api_key(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
    # Force the lazy singleton to be re-resolved.
    monkeypatch.setattr(nl, "_client_singleton", None)
    ctx = ScopeCtx(scope=ChatScope.ORCHESTRATOR, scope_ref=None)
    with pytest.raises(nl.NLUnavailable):
        nl.interpret("anything at all", ctx)


def test_interpret_raises_when_model_picks_no_tool() -> None:
    msg = _fake_message([_text_block("I'm not sure.")], stop_reason="end_turn")
    client = _fake_client(msg)
    ctx = ScopeCtx(scope=ChatScope.ORCHESTRATOR, scope_ref=None)
    with pytest.raises(nl.NLUnavailable):
        nl.interpret("...", ctx, client=client)


def test_interpret_wraps_sdk_exception() -> None:
    client = MagicMock()
    client.messages.create.side_effect = RuntimeError("boom")
    ctx = ScopeCtx(scope=ChatScope.ORCHESTRATOR, scope_ref=None)
    with pytest.raises(nl.NLUnavailable):
        nl.interpret("anything", ctx, client=client)


def test_tool_to_slash_task_new_quotes_values() -> None:
    slash = nl.tool_to_slash(
        "task_new",
        {"title": "fix the bug", "priority": 1, "project": "alpha"},
    )
    # The title must be quoted so shlex.split sees one token.
    assert slash.startswith("/task new ")
    assert "'fix the bug'" in slash
    assert "--priority 1" in slash
    assert "--project alpha" in slash


def test_tool_to_slash_assign() -> None:
    slash = nl.tool_to_slash(
        "assign", {"task_ref": "abc12345", "session_ref": "def67890"}
    )
    assert slash == "/assign abc12345 def67890"


def test_tool_to_slash_help_pull_status() -> None:
    assert nl.tool_to_slash("help", {}) == "/help"
    assert nl.tool_to_slash("pull", {}) == "/pull"
    assert nl.tool_to_slash("status", {}) == "/status"


def test_tool_to_slash_project_new_optional_flags() -> None:
    bare = nl.tool_to_slash("project_new", {"name": "alpha"})
    assert bare == "/project new alpha"
    full = nl.tool_to_slash(
        "project_new",
        {"name": "alpha", "goal": "ship it", "owner": "natalie"},
    )
    assert "/project new alpha" in full
    assert "--goal 'ship it'" in full
    assert "--owner natalie" in full


def test_tool_to_slash_rejects_invalid_input() -> None:
    # `task_new` requires non-empty title (min_length=1).
    with pytest.raises(nl.NLUnavailable):
        nl.tool_to_slash("task_new", {"title": ""})


# ---------------------------------------------------------------------------
# HTTP-level tests — monkeypatch nl.interpret to skip the real model call.
# ---------------------------------------------------------------------------


@pytest.fixture
def client(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> TestClient:
    monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "data"))
    monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "config"))
    from claire.web.app import create_app

    return TestClient(create_app())


def test_json_api_posts_preview_for_bare_text(
    client: TestClient, monkeypatch: pytest.MonkeyPatch
) -> None:
    # NL fallback runs in project/session scopes. Orchestrator scope relays
    # to a managed Claude session instead (see test_orchestrator_turn.py).
    def fake_interpret(text: str, ctx: ScopeCtx, *, client=None) -> nl.ProposedAction:
        return nl.ProposedAction(
            slash="/status", explanation="fleet check", confidence=1.0,
        )

    monkeypatch.setattr("claire.web.chat.nl.interpret", fake_interpret)
    client.post("/api/v1/projects", json={"name": "alpha"})
    r = client.post(
        "/api/v1/chat",
        json={"scope": "project", "scope_ref": "alpha", "body": "any news?"},
    )
    assert r.status_code == 201, r.text
    payload = r.json()
    assert len(payload["replies"]) == 2
    assert payload["replies"][0]["meta"]["kind"] == "nl_preview"
    assert payload["replies"][0]["meta"]["slash"] == "/status"
    assert "project(s)" in payload["replies"][1]["body"]


def test_json_route_dispatches_after_preview(
    client: TestClient, monkeypatch: pytest.MonkeyPatch
) -> None:
    # The old HTMX /chat/post route was deleted in R6. JSON API covers the
    # same flow: NL preview at replies[0], auto-dispatched result at replies[1].
    def fake_interpret(text: str, ctx: ScopeCtx, *, client=None) -> nl.ProposedAction:
        return nl.ProposedAction(
            slash="/status", explanation="fleet check", confidence=1.0,
        )

    monkeypatch.setattr("claire.web.chat.nl.interpret", fake_interpret)
    client.post("/api/v1/projects", json={"name": "alpha"})
    r = client.post(
        "/api/v1/chat",
        json={"scope": "project", "scope_ref": "alpha", "body": "any news?"},
    )
    assert r.status_code == 201
    payload = r.json()
    assert payload["replies"][0]["meta"]["kind"] == "nl_preview"
    assert "project(s)" in payload["replies"][1]["body"]
    assert "session(s)" in payload["replies"][1]["body"]