202 lines
7.3 KiB
Python
202 lines
7.3 KiB
Python
"""Tests for the natural-language fallback in the Claire chat.
|
|
|
|
We never hit the real Anthropic API: tests inject a `Mock()` client into
|
|
`nl.interpret` or monkeypatch `claire.web.chat.nl.interpret` at the route
|
|
boundary.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
from claire.domain import ChatScope
|
|
from claire.web.chat import nl
|
|
from claire.web.chat.commands import ScopeCtx
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers — fabricate Anthropic-shaped message objects without importing the
|
|
# SDK. The interpret() loop reads `.stop_reason` and iterates `.content`,
|
|
# whose blocks expose `.type`, `.name`, `.input`, `.text`.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _tool_use_block(name: str, input_payload: dict) -> SimpleNamespace:
|
|
return SimpleNamespace(type="tool_use", name=name, input=input_payload, text=None)
|
|
|
|
|
|
def _text_block(text: str) -> SimpleNamespace:
|
|
return SimpleNamespace(type="text", text=text, name=None, input=None)
|
|
|
|
|
|
def _fake_message(blocks: list[SimpleNamespace], stop_reason: str = "tool_use"):
|
|
return SimpleNamespace(stop_reason=stop_reason, content=blocks)
|
|
|
|
|
|
def _fake_client(message) -> MagicMock:
|
|
client = MagicMock()
|
|
client.messages.create.return_value = message
|
|
return client
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Unit tests for nl.interpret / tool_to_slash
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_interpret_returns_proposed_action_with_slash() -> None:
|
|
msg = _fake_message(
|
|
[
|
|
_text_block("This looks like a status query."),
|
|
_tool_use_block("status", {}),
|
|
]
|
|
)
|
|
client = _fake_client(msg)
|
|
ctx = ScopeCtx(scope=ChatScope.ORCHESTRATOR, scope_ref=None)
|
|
|
|
action = nl.interpret("how are things looking?", ctx, client=client)
|
|
|
|
assert action.slash == "/status"
|
|
assert "status query" in action.explanation
|
|
assert 0.0 <= action.confidence <= 1.0
|
|
# System prompt + tool schema should have been passed.
|
|
kwargs = client.messages.create.call_args.kwargs
|
|
assert kwargs["tool_choice"] == {"type": "any"}
|
|
assert any(t["name"] == "status" for t in kwargs["tools"])
|
|
assert kwargs["system"][0].get("cache_control") == {"type": "ephemeral"}
|
|
|
|
|
|
def test_interpret_nl_unavailable_without_api_key(
|
|
monkeypatch: pytest.MonkeyPatch,
|
|
) -> None:
|
|
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
|
|
# Force the lazy singleton to be re-resolved.
|
|
monkeypatch.setattr(nl, "_client_singleton", None)
|
|
ctx = ScopeCtx(scope=ChatScope.ORCHESTRATOR, scope_ref=None)
|
|
with pytest.raises(nl.NLUnavailable):
|
|
nl.interpret("anything at all", ctx)
|
|
|
|
|
|
def test_interpret_raises_when_model_picks_no_tool() -> None:
|
|
msg = _fake_message([_text_block("I'm not sure.")], stop_reason="end_turn")
|
|
client = _fake_client(msg)
|
|
ctx = ScopeCtx(scope=ChatScope.ORCHESTRATOR, scope_ref=None)
|
|
with pytest.raises(nl.NLUnavailable):
|
|
nl.interpret("...", ctx, client=client)
|
|
|
|
|
|
def test_interpret_wraps_sdk_exception() -> None:
|
|
client = MagicMock()
|
|
client.messages.create.side_effect = RuntimeError("boom")
|
|
ctx = ScopeCtx(scope=ChatScope.ORCHESTRATOR, scope_ref=None)
|
|
with pytest.raises(nl.NLUnavailable):
|
|
nl.interpret("anything", ctx, client=client)
|
|
|
|
|
|
def test_tool_to_slash_task_new_quotes_values() -> None:
|
|
slash = nl.tool_to_slash(
|
|
"task_new",
|
|
{"title": "fix the bug", "priority": 1, "project": "alpha"},
|
|
)
|
|
# The title must be quoted so shlex.split sees one token.
|
|
assert slash.startswith("/task new ")
|
|
assert "'fix the bug'" in slash
|
|
assert "--priority 1" in slash
|
|
assert "--project alpha" in slash
|
|
|
|
|
|
def test_tool_to_slash_assign() -> None:
|
|
slash = nl.tool_to_slash(
|
|
"assign", {"task_ref": "abc12345", "session_ref": "def67890"}
|
|
)
|
|
assert slash == "/assign abc12345 def67890"
|
|
|
|
|
|
def test_tool_to_slash_help_pull_status() -> None:
|
|
assert nl.tool_to_slash("help", {}) == "/help"
|
|
assert nl.tool_to_slash("pull", {}) == "/pull"
|
|
assert nl.tool_to_slash("status", {}) == "/status"
|
|
|
|
|
|
def test_tool_to_slash_project_new_optional_flags() -> None:
|
|
bare = nl.tool_to_slash("project_new", {"name": "alpha"})
|
|
assert bare == "/project new alpha"
|
|
full = nl.tool_to_slash(
|
|
"project_new",
|
|
{"name": "alpha", "goal": "ship it", "owner": "natalie"},
|
|
)
|
|
assert "/project new alpha" in full
|
|
assert "--goal 'ship it'" in full
|
|
assert "--owner natalie" in full
|
|
|
|
|
|
def test_tool_to_slash_rejects_invalid_input() -> None:
|
|
# `task_new` requires non-empty title (min_length=1).
|
|
with pytest.raises(nl.NLUnavailable):
|
|
nl.tool_to_slash("task_new", {"title": ""})
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# HTTP-level tests — monkeypatch nl.interpret to skip the real model call.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture
|
|
def client(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> TestClient:
|
|
monkeypatch.setenv("XDG_DATA_HOME", str(tmp_path / "data"))
|
|
monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "config"))
|
|
from claire.web.app import create_app
|
|
|
|
return TestClient(create_app())
|
|
|
|
|
|
def test_json_api_posts_preview_for_bare_text(
|
|
client: TestClient, monkeypatch: pytest.MonkeyPatch
|
|
) -> None:
|
|
# NL fallback runs in project/session scopes. Orchestrator scope relays
|
|
# to a managed Claude session instead (see test_orchestrator_turn.py).
|
|
def fake_interpret(text: str, ctx: ScopeCtx, *, client=None) -> nl.ProposedAction:
|
|
return nl.ProposedAction(
|
|
slash="/status", explanation="fleet check", confidence=1.0,
|
|
)
|
|
|
|
monkeypatch.setattr("claire.web.chat.nl.interpret", fake_interpret)
|
|
client.post("/api/v1/projects", json={"name": "alpha"})
|
|
r = client.post(
|
|
"/api/v1/chat",
|
|
json={"scope": "project", "scope_ref": "alpha", "body": "any news?"},
|
|
)
|
|
assert r.status_code == 201, r.text
|
|
payload = r.json()
|
|
assert len(payload["replies"]) == 2
|
|
assert payload["replies"][0]["meta"]["kind"] == "nl_preview"
|
|
assert payload["replies"][0]["meta"]["slash"] == "/status"
|
|
assert "project(s)" in payload["replies"][1]["body"]
|
|
|
|
|
|
def test_json_route_dispatches_after_preview(
|
|
client: TestClient, monkeypatch: pytest.MonkeyPatch
|
|
) -> None:
|
|
# The old HTMX /chat/post route was deleted in R6. JSON API covers the
|
|
# same flow: NL preview at replies[0], auto-dispatched result at replies[1].
|
|
def fake_interpret(text: str, ctx: ScopeCtx, *, client=None) -> nl.ProposedAction:
|
|
return nl.ProposedAction(
|
|
slash="/status", explanation="fleet check", confidence=1.0,
|
|
)
|
|
|
|
monkeypatch.setattr("claire.web.chat.nl.interpret", fake_interpret)
|
|
client.post("/api/v1/projects", json={"name": "alpha"})
|
|
r = client.post(
|
|
"/api/v1/chat",
|
|
json={"scope": "project", "scope_ref": "alpha", "body": "any news?"},
|
|
)
|
|
assert r.status_code == 201
|
|
payload = r.json()
|
|
assert payload["replies"][0]["meta"]["kind"] == "nl_preview"
|
|
assert "project(s)" in payload["replies"][1]["body"]
|
|
assert "session(s)" in payload["replies"][1]["body"]
|