deepseek-cursor-proxy/tests/test_protocol.py

"""End-to-end protocol tests against an in-process fake DeepSeek.

Each test boots the proxy in-process against a small fake upstream and walks
a real HTTP request scenario. The strict variant rejects with HTTP 400 — the
same error real DeepSeek emits — whenever an assistant message that
participated in a tool-calling turn lacks `reasoning_content`. So if the
proxy is protocol-compliant, every turn succeeds; if not, the upstream
short-circuits and the test fails fast.

This file is the ground truth for "does the proxy speak DeepSeek correctly?"
"""

from __future__ import annotations

from copy import deepcopy
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
import json
import threading
import time
import unittest
from typing import Any
from urllib.error import HTTPError
from urllib.request import Request, urlopen

from deepseek_cursor_proxy.config import ProxyConfig
from deepseek_cursor_proxy.reasoning_store import ReasoningStore
from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer


# Canonical fake-DeepSeek reasoning/answer text reused across tests.
THINKING_1_1 = "Thinking 1.1 - need to look up the date."
THINKING_1_2 = "Thinking 1.2 - I have the date, now I need the weather."
THINKING_1_3 = "Thinking 1.3 - tool results suffice for the answer."
THINKING_2_1 = "Thinking 2.1 - a brand new user turn."
ANSWER_1 = "Answer 1: Tomorrow is sunny on 2026-04-24."
ANSWER_2 = "Answer 2: Acknowledged follow-up."
CALL_ID_1 = "call_get_date"
CALL_ID_2 = "call_get_weather"

TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "get_date",
            "parameters": {"type": "object", "properties": {}},
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "parameters": {
                "type": "object",
                "properties": {"date": {"type": "string"}},
                "required": ["date"],
            },
        },
    },
]


# ---------------------------------------------------------------------------
# Fake upstreams
# ---------------------------------------------------------------------------


def _completion(
    *,
    chat_id: str,
    finish_reason: str,
    content: str = "",
    reasoning: str | None = None,
    tool_calls: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
    message: dict[str, Any] = {"role": "assistant", "content": content}
    if reasoning is not None:
        message["reasoning_content"] = reasoning
    if tool_calls is not None:
        message["tool_calls"] = tool_calls
    return {
        "id": chat_id,
        "object": "chat.completion",
        "created": 1,
        "model": "deepseek-v4-pro",
        "choices": [{"index": 0, "finish_reason": finish_reason, "message": message}],
    }


class StrictFakeDeepSeek(BaseHTTPRequestHandler):
    """DeepSeek protocol contract: tool-turn assistants must carry
    `reasoning_content` (string). Returns canned canonical responses keyed
    on the request shape; rejects with 400 otherwise."""

    requests: list[dict[str, Any]] = []
    auth_headers: list[str] = []

    def log_message(self, fmt: str, *args: Any) -> None:
        return

    def do_POST(self) -> None:
        length = int(self.headers.get("Content-Length") or 0)
        payload = json.loads(self.rfile.read(length).decode("utf-8"))
        self.__class__.requests.append(payload)
        self.__class__.auth_headers.append(self.headers.get("Authorization", ""))

        messages = payload.get("messages") or []
        for index, message in enumerate(messages):
            if not isinstance(message, dict) or message.get("role") != "assistant":
                continue
            if _is_tool_turn_assistant(messages, index) and not isinstance(
                message.get("reasoning_content"), str
            ):
                return self._send(
                    400,
                    {
                        "error": {
                            "message": (
                                "The reasoning_content in the thinking mode "
                                "must be passed back to the API."
                            ),
                            "type": "invalid_request_error",
                            "code": "invalid_request_error",
                            "missing_index": index,
                        }
                    },
                )

        last_user = _last_index(messages, "user")
        last_tool = _last_index(messages, "tool")
        last_assistant = _last_index(messages, "assistant")
        if last_user == 0 and last_assistant == -1 and last_tool == -1:
            return self._send(
                200,
                _completion(
                    chat_id="chatcmpl-1-1",
                    finish_reason="tool_calls",
                    reasoning=THINKING_1_1,
                    tool_calls=[
                        {
                            "id": CALL_ID_1,
                            "type": "function",
                            "function": {"name": "get_date", "arguments": "{}"},
                        }
                    ],
                ),
            )
        if last_user > 0 and last_user > last_tool:
            return self._send(
                200,
                _completion(
                    chat_id="chatcmpl-2-1",
                    finish_reason="stop",
                    content=ANSWER_2,
                    reasoning=THINKING_2_1,
                ),
            )
        if (
            last_tool != -1
            and messages[last_tool].get("tool_call_id") == CALL_ID_1
            and last_assistant < last_tool
        ):
            return self._send(
                200,
                _completion(
                    chat_id="chatcmpl-1-2",
                    finish_reason="tool_calls",
                    reasoning=THINKING_1_2,
                    tool_calls=[
                        {
                            "id": CALL_ID_2,
                            "type": "function",
                            "function": {
                                "name": "get_weather",
                                "arguments": '{"date":"2026-04-24"}',
                            },
                        }
                    ],
                ),
            )
        if last_tool != -1 and messages[last_tool].get("tool_call_id") == CALL_ID_2:
            return self._send(
                200,
                _completion(
                    chat_id="chatcmpl-1-3",
                    finish_reason="stop",
                    content=ANSWER_1,
                    reasoning=THINKING_1_3,
                ),
            )
        return self._send(
            400,
            {
                "error": {
                    "message": (
                        "test fake: unexpected shape: "
                        f"roles={[m.get('role') for m in messages]}"
                    )
                }
            },
        )

    def _send(self, status: int, body: dict[str, Any]) -> None:
        encoded = json.dumps(body).encode("utf-8")
        self.send_response(status)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(encoded)))
        self.end_headers()
        self.wfile.write(encoded)


def _is_tool_turn_assistant(messages: list[dict[str, Any]], index: int) -> bool:
    message = messages[index]
    if message.get("tool_calls"):
        return True
    for prior in reversed(messages[:index]):
        role = prior.get("role")
        if role == "tool":
            return True
        if role in {"user", "system"}:
            return False
    return False


def _last_index(messages: list[dict[str, Any]], role: str) -> int:
    for i in range(len(messages) - 1, -1, -1):
        if messages[i].get("role") == role:
            return i
    return -1


# ---------------------------------------------------------------------------
# HTTP fixtures
# ---------------------------------------------------------------------------


class _Fixture:
    def __init__(self, server: ThreadingHTTPServer) -> None:
        self.server = server
        self.thread = threading.Thread(target=server.serve_forever, daemon=True)
        self.thread.start()

    @property
    def url(self) -> str:
        host, port = self.server.server_address
        return f"http://{host}:{port}"

    def close(self) -> None:
        self.server.shutdown()
        self.server.server_close()
        self.thread.join(timeout=5)


def _start_strict_upstream() -> _Fixture:
    StrictFakeDeepSeek.requests = []
    StrictFakeDeepSeek.auth_headers = []
    return _Fixture(ThreadingHTTPServer(("127.0.0.1", 0), StrictFakeDeepSeek))


def _start_proxy(
    upstream_url: str,
    store: ReasoningStore,
    **config_overrides: Any,
) -> _Fixture:
    proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
    proxy.config = ProxyConfig(
        upstream_base_url=upstream_url,
        upstream_model="deepseek-v4-pro",
        ngrok=False,
        verbose=False,
        cors=False,
        **config_overrides,
    )
    proxy.reasoning_store = store
    return _Fixture(proxy)


def _post(
    url: str,
    payload: dict[str, Any],
    authorization: str = "Bearer sk-test",
) -> tuple[int, dict[str, Any]]:
    request = Request(
        url,
        data=json.dumps(payload).encode("utf-8"),
        method="POST",
        headers={"Authorization": authorization, "Content-Type": "application/json"},
    )
    try:
        with urlopen(request, timeout=10) as response:
            return response.status, json.loads(response.read().decode("utf-8"))
    except HTTPError as exc:
        return exc.code, json.loads(exc.read().decode("utf-8"))


def _drop_reasoning(message: dict[str, Any]) -> dict[str, Any]:
    cleaned = deepcopy(message)
    cleaned.pop("reasoning_content", None)
    return cleaned


# ---------------------------------------------------------------------------
# Test suites
# ---------------------------------------------------------------------------


class _StrictUpstreamCase(unittest.TestCase):
    """Common setup: strict fake DeepSeek + proxy."""

    config_overrides: dict[str, Any] = {}

    def setUp(self) -> None:
        self.upstream = _start_strict_upstream()
        self.store = ReasoningStore(":memory:")
        self.proxy = _start_proxy(
            self.upstream.url, self.store, **self.config_overrides
        )

    def tearDown(self) -> None:
        self.proxy.close()
        self.upstream.close()
        self.store.close()


class CanonicalLoopTests(_StrictUpstreamCase):
    def test_canonical_four_turn_tool_call_loop(self) -> None:
        """Cursor strips reasoning_content from history; the proxy must
        patch every prior assistant message that participated in the tool
        chain so the strict upstream accepts each turn."""
        # Turn 1.1: bare user message.
        status, response_1_1 = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "What's the weather tomorrow?"}
                ],
                "tools": TOOLS,
            },
        )
        self.assertEqual(status, 200, response_1_1)
        first = response_1_1["choices"][0]["message"]
        self.assertEqual(first["reasoning_content"], THINKING_1_1)
        self.assertEqual(first["tool_calls"][0]["id"], CALL_ID_1)

        # Turn 1.2: append tool result; Cursor drops reasoning.
        status, response_1_2 = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "What's the weather tomorrow?"},
                    _drop_reasoning(first),
                    {
                        "role": "tool",
                        "tool_call_id": CALL_ID_1,
                        "content": "2026-04-24",
                    },
                ],
                "tools": TOOLS,
            },
        )
        self.assertEqual(status, 200, response_1_2)
        second = response_1_2["choices"][0]["message"]
        self.assertEqual(second["tool_calls"][0]["id"], CALL_ID_2)
        upstream_1_2 = StrictFakeDeepSeek.requests[1]["messages"]
        self.assertEqual(upstream_1_2[1]["reasoning_content"], THINKING_1_1)

        # Turn 1.3: both prior assistants need patching.
        status, response_1_3 = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "What's the weather tomorrow?"},
                    _drop_reasoning(first),
                    {
                        "role": "tool",
                        "tool_call_id": CALL_ID_1,
                        "content": "2026-04-24",
                    },
                    _drop_reasoning(second),
                    {"role": "tool", "tool_call_id": CALL_ID_2, "content": "sunny"},
                ],
                "tools": TOOLS,
            },
        )
        self.assertEqual(status, 200, response_1_3)
        third = response_1_3["choices"][0]["message"]
        self.assertIn(ANSWER_1, third["content"])
        upstream_1_3 = StrictFakeDeepSeek.requests[2]["messages"]
        self.assertEqual(upstream_1_3[1]["reasoning_content"], THINKING_1_1)
        self.assertEqual(upstream_1_3[3]["reasoning_content"], THINKING_1_2)

        # Turn 2.1: brand new user turn; the prior final assistant also
        # needs patching since DeepSeek treats it as part of the tool turn.
        status, response_2_1 = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "What's the weather tomorrow?"},
                    _drop_reasoning(first),
                    {
                        "role": "tool",
                        "tool_call_id": CALL_ID_1,
                        "content": "2026-04-24",
                    },
                    _drop_reasoning(second),
                    {"role": "tool", "tool_call_id": CALL_ID_2, "content": "sunny"},
                    _drop_reasoning(third),
                    {"role": "user", "content": "Thanks. What about Saturday?"},
                ],
                "tools": TOOLS,
            },
        )
        self.assertEqual(status, 200, response_2_1)
        self.assertIn(ANSWER_2, response_2_1["choices"][0]["message"]["content"])
        upstream_2_1 = StrictFakeDeepSeek.requests[3]["messages"]
        self.assertEqual(upstream_2_1[1]["reasoning_content"], THINKING_1_1)
        self.assertEqual(upstream_2_1[3]["reasoning_content"], THINKING_1_2)
        self.assertEqual(upstream_2_1[5]["reasoning_content"], THINKING_1_3)

    def test_authorization_namespace_isolation(self) -> None:
        """A second user with the same conversation prefix must NOT see
        cached reasoning from the first user."""
        # Prime cache as user A.
        _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "What's the weather tomorrow?"}
                ],
                "tools": TOOLS,
            },
            authorization="Bearer sk-USER-A",
        )

        # User B replays a tool history with the exact same shape.
        status, _ = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "What's the weather tomorrow?"},
                    {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [
                            {
                                "id": CALL_ID_1,
                                "type": "function",
                                "function": {"name": "get_date", "arguments": "{}"},
                            }
                        ],
                    },
                    {
                        "role": "tool",
                        "tool_call_id": CALL_ID_1,
                        "content": "2026-04-24",
                    },
                ],
                "tools": TOOLS,
            },
            authorization="Bearer sk-USER-B",
        )
        self.assertEqual(status, 200)
        sent = StrictFakeDeepSeek.requests[-1]
        leaked = any(
            m.get("role") == "assistant" and m.get("reasoning_content") == THINKING_1_1
            for m in sent["messages"]
        )
        self.assertFalse(leaked)


class StrictRejectModeTests(_StrictUpstreamCase):
    config_overrides = {"missing_reasoning_strategy": "reject"}

    def test_returns_409_without_calling_upstream(self) -> None:
        status, payload = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "go"},
                    {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [
                            {
                                "id": CALL_ID_1,
                                "type": "function",
                                "function": {"name": "get_date", "arguments": "{}"},
                            }
                        ],
                    },
                    {
                        "role": "tool",
                        "tool_call_id": CALL_ID_1,
                        "content": "2026-04-24",
                    },
                ],
            },
        )
        self.assertEqual(status, 409, payload)
        self.assertEqual(payload["error"]["missing_reasoning_messages"], 1)
        self.assertEqual(StrictFakeDeepSeek.requests, [])


class ThinkingDisabledTests(_StrictUpstreamCase):
    config_overrides = {"thinking": "disabled"}

    def test_disabled_does_not_inject_reasoning(self) -> None:
        _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "ping"},
                    {
                        "role": "assistant",
                        "content": "",
                        "reasoning_content": "Should be discarded by proxy.",
                        "tool_calls": [
                            {
                                "id": CALL_ID_1,
                                "type": "function",
                                "function": {"name": "get_date", "arguments": "{}"},
                            }
                        ],
                    },
                    {
                        "role": "tool",
                        "tool_call_id": CALL_ID_1,
                        "content": "2026-04-24",
                    },
                ],
            },
        )
        sent = StrictFakeDeepSeek.requests[-1]
        self.assertEqual(sent["thinking"], {"type": "disabled"})
        self.assertNotIn("reasoning_content", sent["messages"][1])


class RecoveryTests(_StrictUpstreamCase):
    def test_cold_cache_recovers_to_latest_user_with_notice(self) -> None:
        """Stale tool history with no cached reasoning: proxy keeps only
        the latest user message + recovery system message and prefixes a
        user-facing notice into the response."""
        status, response = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "system", "content": "Be brief."},
                    {"role": "user", "content": "old work"},
                    {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [
                            {
                                "id": CALL_ID_1,
                                "type": "function",
                                "function": {"name": "get_date", "arguments": "{}"},
                            }
                        ],
                    },
                    {
                        "role": "tool",
                        "tool_call_id": CALL_ID_1,
                        "content": "2026-04-24",
                    },
                    {"role": "user", "content": "Thanks. What about Saturday?"},
                ],
            },
        )
        self.assertEqual(status, 200, response)
        sent = StrictFakeDeepSeek.requests[-1]
        self.assertEqual(
            [m["role"] for m in sent["messages"]], ["system", "system", "user"]
        )
        self.assertEqual(
            sent["messages"][-1]["content"], "Thanks. What about Saturday?"
        )
        self.assertIn(
            "[deepseek-cursor-proxy] Refreshed reasoning",
            response["choices"][0]["message"]["content"],
        )

    def test_recovery_notice_is_stripped_before_upstream_replay(self) -> None:
        """When Cursor echoes a previous response (which carried the proxy's
        recovery notice) back as assistant content, the notice serves as a
        boundary marker for the proxy but must not be replayed upstream as
        if DeepSeek had written it."""
        # Trigger initial recovery so the response carries the notice.
        status, first = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "old work"},
                    {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [
                            {
                                "id": CALL_ID_1,
                                "type": "function",
                                "function": {"name": "get_date", "arguments": "{}"},
                            }
                        ],
                    },
                    {
                        "role": "tool",
                        "tool_call_id": CALL_ID_1,
                        "content": "2026-04-24",
                    },
                    {"role": "user", "content": "Thanks. What about Saturday?"},
                ],
            },
        )
        self.assertEqual(status, 200)

        # Cursor faithfully echoes the response (including the notice prefix).
        echoed = _drop_reasoning(first["choices"][0]["message"])
        status, _ = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "old work"},
                    {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [
                            {
                                "id": CALL_ID_1,
                                "type": "function",
                                "function": {"name": "get_date", "arguments": "{}"},
                            }
                        ],
                    },
                    {
                        "role": "tool",
                        "tool_call_id": CALL_ID_1,
                        "content": "2026-04-24",
                    },
                    {"role": "user", "content": "Thanks. What about Saturday?"},
                    echoed,
                    {"role": "user", "content": "And Sunday?"},
                ],
            },
        )
        self.assertEqual(status, 200)
        sent = StrictFakeDeepSeek.requests[-1]
        for message in sent["messages"]:
            if message.get("role") != "assistant":
                continue
            self.assertNotIn("deepseek-cursor-proxy", message.get("content", ""))


# ---------------------------------------------------------------------------
# Streaming behaviour
# ---------------------------------------------------------------------------


def _sse_chunks(*chunks: dict[str, Any]) -> bytes:
    out = b""
    for chunk in chunks:
        out += f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
    out += b"data: [DONE]\n\n"
    return out


class _StreamingThenJsonHandler(BaseHTTPRequestHandler):
    """First request streams a tool call (with reasoning); subsequent
    non-streaming requests echo a final answer if the assistant message
    carries the previously-streamed reasoning_content."""

    requests: list[dict[str, Any]] = []

    def log_message(self, fmt: str, *args: Any) -> None:
        return

    def do_POST(self) -> None:
        length = int(self.headers.get("Content-Length") or 0)
        payload = json.loads(self.rfile.read(length).decode("utf-8"))
        self.__class__.requests.append(payload)

        if payload.get("stream"):
            self.send_response(200)
            self.send_header("Content-Type", "text/event-stream")
            self.end_headers()
            self.wfile.write(
                _sse_chunks(
                    {
                        "id": "stream-1",
                        "object": "chat.completion.chunk",
                        "created": 1,
                        "model": "deepseek-v4-pro",
                        "choices": [
                            {
                                "index": 0,
                                "delta": {
                                    "role": "assistant",
                                    "reasoning_content": THINKING_1_1,
                                    "tool_calls": [
                                        {
                                            "index": 0,
                                            "id": CALL_ID_1,
                                            "type": "function",
                                            "function": {
                                                "name": "get_date",
                                                "arguments": "{}",
                                            },
                                        }
                                    ],
                                },
                                "finish_reason": None,
                            }
                        ],
                    },
                    {
                        "id": "stream-1",
                        "object": "chat.completion.chunk",
                        "created": 1,
                        "model": "deepseek-v4-pro",
                        "choices": [
                            {"index": 0, "delta": {}, "finish_reason": "tool_calls"}
                        ],
                    },
                )
            )
            self.wfile.flush()
            return

        # Non-streaming follow-up: enforce that proxy patched the prior
        # streamed reasoning_content into history.
        assistant = payload["messages"][1]
        if assistant.get("reasoning_content") != THINKING_1_1:
            self._send(400, {"error": {"message": "missing streamed reasoning"}})
            return
        self._send(
            200,
            _completion(
                chat_id="follow-up",
                finish_reason="stop",
                content="follow-up accepted",
                reasoning="post-tool reasoning",
            ),
        )

    def _send(self, status: int, body: dict[str, Any]) -> None:
        encoded = json.dumps(body).encode("utf-8")
        self.send_response(status)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(encoded)))
        self.end_headers()
        self.wfile.write(encoded)


class StreamingThenNonStreamingTests(unittest.TestCase):
    def setUp(self) -> None:
        _StreamingThenJsonHandler.requests = []
        self.upstream = _Fixture(
            ThreadingHTTPServer(("127.0.0.1", 0), _StreamingThenJsonHandler)
        )
        self.store = ReasoningStore(":memory:")
        self.proxy = _start_proxy(self.upstream.url, self.store)

    def tearDown(self) -> None:
        self.proxy.close()
        self.upstream.close()
        self.store.close()

    def test_streamed_reasoning_is_replayed_in_next_non_streaming_request(
        self,
    ) -> None:
        """Cursor often streams Turn 1.1 then issues Turn 1.2 as a non-stream
        POST. The proxy must repair reasoning_content from the streaming
        cache before forwarding the follow-up."""
        # Stream a tool call.
        request = Request(
            f"{self.proxy.url}/v1/chat/completions",
            data=json.dumps(
                {
                    "model": "deepseek-v4-pro",
                    "stream": True,
                    "messages": [{"role": "user", "content": "go"}],
                    "tools": [TOOLS[0]],
                }
            ).encode("utf-8"),
            method="POST",
            headers={
                "Authorization": "Bearer sk-test",
                "Content-Type": "application/json",
            },
        )
        with urlopen(request, timeout=5) as response:
            self.assertIn("data: [DONE]", response.read().decode("utf-8"))

        # Non-streaming follow-up (Cursor strips reasoning_content).
        status, payload = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "go"},
                    {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [
                            {
                                "id": CALL_ID_1,
                                "type": "function",
                                "function": {
                                    "name": "get_date",
                                    "arguments": "{}",
                                },
                            }
                        ],
                    },
                    {
                        "role": "tool",
                        "tool_call_id": CALL_ID_1,
                        "content": "2026-04-24",
                    },
                ],
                "tools": [TOOLS[0]],
            },
        )
        self.assertEqual(status, 200, payload)
        sent = _StreamingThenJsonHandler.requests[-1]
        self.assertEqual(sent["messages"][1]["reasoning_content"], THINKING_1_1)


class _ReasoningStreamHandler(BaseHTTPRequestHandler):
    """Streams reasoning_content tokens followed by a content answer."""

    def log_message(self, fmt: str, *args: Any) -> None:
        return

    def do_POST(self) -> None:
        length = int(self.headers.get("Content-Length") or 0)
        self.rfile.read(length)
        self.send_response(200)
        self.send_header("Content-Type", "text/event-stream")
        self.end_headers()
        self.wfile.write(
            _sse_chunks(
                {
                    "id": "stream-r",
                    "object": "chat.completion.chunk",
                    "created": 1,
                    "model": "deepseek-v4-pro",
                    "choices": [
                        {
                            "index": 0,
                            "delta": {
                                "role": "assistant",
                                "reasoning_content": "Need ",
                            },
                            "finish_reason": None,
                        }
                    ],
                },
                {
                    "id": "stream-r",
                    "object": "chat.completion.chunk",
                    "created": 1,
                    "model": "deepseek-v4-pro",
                    "choices": [
                        {
                            "index": 0,
                            "delta": {"reasoning_content": "context."},
                            "finish_reason": None,
                        }
                    ],
                },
                {
                    "id": "stream-r",
                    "object": "chat.completion.chunk",
                    "created": 1,
                    "model": "deepseek-v4-pro",
                    "choices": [
                        {
                            "index": 0,
                            "delta": {"content": "Final."},
                            "finish_reason": None,
                        }
                    ],
                },
                {
                    "id": "stream-r",
                    "object": "chat.completion.chunk",
                    "created": 1,
                    "model": "deepseek-v4-pro",
                    "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
                },
            )
        )
        self.wfile.flush()


class StreamingDisplayTests(unittest.TestCase):
    def setUp(self) -> None:
        self.upstream = _Fixture(
            ThreadingHTTPServer(("127.0.0.1", 0), _ReasoningStreamHandler)
        )
        self.store = ReasoningStore(":memory:")
        self.proxy = _start_proxy(self.upstream.url, self.store)

    def tearDown(self) -> None:
        self.proxy.close()
        self.upstream.close()
        self.store.close()

    def test_streaming_response_mirrors_reasoning_into_details_block(self) -> None:
        request = Request(
            f"{self.proxy.url}/v1/chat/completions",
            data=json.dumps(
                {
                    "model": "deepseek-v4-pro",
                    "stream": True,
                    "messages": [{"role": "user", "content": "stream"}],
                }
            ).encode("utf-8"),
            method="POST",
            headers={
                "Authorization": "Bearer sk-test",
                "Content-Type": "application/json",
            },
        )
        with urlopen(request, timeout=2) as response:
            body = response.read().decode("utf-8")

        chunks = [
            json.loads(line.removeprefix("data: "))
            for line in body.splitlines()
            if line.startswith("data: {")
        ]
        self.assertEqual(
            chunks[0]["choices"][0]["delta"]["content"],
            "<details>\n<summary>Thinking</summary>\n\nNeed ",
        )
        self.assertEqual(
            chunks[2]["choices"][0]["delta"]["content"],
            "\n</details>\n\nFinal.",
        )


class NonStreamingDisplayTests(_StrictUpstreamCase):
    def test_non_streaming_response_mirrors_reasoning_into_details_block(
        self,
    ) -> None:
        """The README claims thinking tokens are displayed in Cursor; this
        must hold for non-streaming responses too, not only streaming ones."""
        status, response = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "What's the weather tomorrow?"}
                ],
                "tools": TOOLS,
            },
        )
        self.assertEqual(status, 200, response)
        # Turn 1.1 has empty content + reasoning + tool calls.
        content = response["choices"][0]["message"]["content"]
        self.assertEqual(
            content,
            f"<details>\n<summary>Thinking</summary>\n\n{THINKING_1_1}\n</details>\n\n",
        )


# ---------------------------------------------------------------------------
# Concurrent threads (independent fake to keep the canonical strict fake
# stateless across the suite).
# ---------------------------------------------------------------------------


class _PerThreadFakeDeepSeek(BaseHTTPRequestHandler):
    """Strict fake keyed on a thread-name embedded in the user message; each
    thread expects its own reasoning text. Catches cross-thread cache leaks."""

    requests: list[dict[str, Any]] = []

    def log_message(self, fmt: str, *args: Any) -> None:
        return

    def do_POST(self) -> None:
        length = int(self.headers.get("Content-Length") or 0)
        payload = json.loads(self.rfile.read(length).decode("utf-8"))
        self.__class__.requests.append(payload)

        thread = self._thread_name(payload.get("messages") or [])
        expected_tool = f"tool reasoning {thread}"
        expected_final = f"final reasoning {thread}"
        final_content = f"answer {thread}"

        for index, message in enumerate(payload.get("messages") or []):
            if message.get("role") != "assistant":
                continue
            if (
                message.get("tool_calls")
                and message.get("reasoning_content") != expected_tool
            ):
                return self._send(400, {"error": {"missing_index": index}})
            if (
                message.get("content") == final_content
                and message.get("reasoning_content") != expected_final
            ):
                return self._send(400, {"error": {"missing_index": index}})

        messages = payload.get("messages") or []
        if len(messages) == 1:
            return self._send(
                200,
                _completion(
                    chat_id=f"tool-{thread}",
                    finish_reason="tool_calls",
                    reasoning=expected_tool,
                    tool_calls=[
                        {
                            "id": "call_reused",
                            "type": "function",
                            "function": {"name": "lookup", "arguments": "{}"},
                        }
                    ],
                ),
            )
        if len(messages) == 3:
            return self._send(
                200,
                _completion(
                    chat_id=f"final-{thread}",
                    finish_reason="stop",
                    content=final_content,
                    reasoning=expected_final,
                ),
            )
        return self._send(
            200,
            _completion(
                chat_id=f"followup-{thread}",
                finish_reason="stop",
                content=f"follow-up {thread}",
                reasoning="follow-up reasoning",
            ),
        )

    @staticmethod
    def _thread_name(messages: list[dict[str, Any]]) -> str:
        if not messages:
            return "?"
        text = str(messages[0].get("content") or "")
        if "thread A" in text:
            return "A"
        if "thread B" in text:
            return "B"
        return "?"

    def _send(self, status: int, body: dict[str, Any]) -> None:
        encoded = json.dumps(body).encode("utf-8")
        self.send_response(status)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(encoded)))
        self.end_headers()
        self.wfile.write(encoded)


class ConcurrentThreadTests(unittest.TestCase):
    def setUp(self) -> None:
        _PerThreadFakeDeepSeek.requests = []
        self.upstream = _Fixture(
            ThreadingHTTPServer(("127.0.0.1", 0), _PerThreadFakeDeepSeek)
        )
        self.store = ReasoningStore(":memory:")
        self.proxy = _start_proxy(self.upstream.url, self.store)

    def tearDown(self) -> None:
        self.proxy.close()
        self.upstream.close()
        self.store.close()

    def test_two_interleaved_threads_do_not_leak_reasoning(self) -> None:
        """Tool-call IDs are reused across both threads. The proxy must
        scope cache by conversation, not by tool_call_id, so each thread
        sees only its own reasoning."""
        tools = [
            {
                "type": "function",
                "function": {
                    "name": "lookup",
                    "parameters": {"type": "object", "properties": {}},
                },
            }
        ]

        def first(thread: str) -> dict[str, Any]:
            return {
                "model": "deepseek-v4-pro",
                "messages": [{"role": "user", "content": f"Start thread {thread}."}],
                "tools": tools,
            }

        def second(thread: str, assistant: dict[str, Any]) -> dict[str, Any]:
            return {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": f"Start thread {thread}."},
                    _drop_reasoning(assistant),
                    {
                        "role": "tool",
                        "tool_call_id": "call_reused",
                        "content": f"tool result {thread}",
                    },
                ],
                "tools": tools,
            }

        status, first_a = _post(f"{self.proxy.url}/v1/chat/completions", first("A"))
        self.assertEqual(status, 200)
        status, first_b = _post(f"{self.proxy.url}/v1/chat/completions", first("B"))
        self.assertEqual(status, 200)
        status, _ = _post(
            f"{self.proxy.url}/v1/chat/completions",
            second("B", first_b["choices"][0]["message"]),
        )
        self.assertEqual(status, 200)
        status, _ = _post(
            f"{self.proxy.url}/v1/chat/completions",
            second("A", first_a["choices"][0]["message"]),
        )
        self.assertEqual(status, 200)

        # If the cache leaked, the upstream's strict reasoning check would
        # have rejected one of these turns with 400.
        upstream_b = _PerThreadFakeDeepSeek.requests[2]["messages"]
        upstream_a = _PerThreadFakeDeepSeek.requests[3]["messages"]
        self.assertEqual(upstream_b[1]["reasoning_content"], "tool reasoning B")
        self.assertEqual(upstream_a[1]["reasoning_content"], "tool reasoning A")


# ---------------------------------------------------------------------------
# Streaming-cache timing: tool reasoning must be available before [DONE].
# ---------------------------------------------------------------------------


class _SlowToolStreamHandler(BaseHTTPRequestHandler):
    """Streams reasoning + tool_calls, then waits before sending [DONE].
    Lets us verify the proxy stores reasoning at finish_reason=tool_calls,
    not at [DONE]."""

    def log_message(self, fmt: str, *args: Any) -> None:
        return

    def do_POST(self) -> None:
        length = int(self.headers.get("Content-Length") or 0)
        payload = json.loads(self.rfile.read(length).decode("utf-8"))

        if payload.get("stream"):
            self.send_response(200)
            self.send_header("Content-Type", "text/event-stream")
            self.end_headers()
            chunks = [
                {
                    "id": "stream-tool",
                    "object": "chat.completion.chunk",
                    "created": 1,
                    "model": "deepseek-v4-pro",
                    "choices": [
                        {
                            "index": 0,
                            "delta": {
                                "role": "assistant",
                                "reasoning_content": "Streamed tool reasoning.",
                                "tool_calls": [
                                    {
                                        "index": 0,
                                        "id": "call_stream_tool",
                                        "type": "function",
                                        "function": {
                                            "name": "lookup",
                                            "arguments": "{}",
                                        },
                                    }
                                ],
                            },
                            "finish_reason": None,
                        }
                    ],
                },
                {
                    "id": "stream-tool",
                    "object": "chat.completion.chunk",
                    "created": 1,
                    "model": "deepseek-v4-pro",
                    "choices": [
                        {"index": 0, "delta": {}, "finish_reason": "tool_calls"}
                    ],
                },
            ]
            for chunk in chunks:
                self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode("utf-8"))
                self.wfile.flush()
                if chunk["choices"][0]["finish_reason"] is None:
                    time.sleep(0.2)
            time.sleep(1)  # delay [DONE] so the follow-up beats it.
            self.wfile.write(b"data: [DONE]\n\n")
            self.wfile.flush()
            return

        # Non-streaming follow-up.
        messages = payload.get("messages") or []
        if (
            len(messages) >= 2
            and messages[1].get("reasoning_content") == "Streamed tool reasoning."
        ):
            self._send(
                200,
                _completion(
                    chat_id="follow",
                    finish_reason="stop",
                    content="follow-up accepted",
                    reasoning="post-tool",
                ),
            )
            return
        self._send(400, {"error": {"message": "missing streamed reasoning"}})

    def _send(self, status: int, body: dict[str, Any]) -> None:
        encoded = json.dumps(body).encode("utf-8")
        self.send_response(status)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(encoded)))
        self.end_headers()
        self.wfile.write(encoded)


class StreamingCacheTimingTests(unittest.TestCase):
    def setUp(self) -> None:
        self.upstream = _Fixture(
            ThreadingHTTPServer(("127.0.0.1", 0), _SlowToolStreamHandler)
        )
        self.store = ReasoningStore(":memory:")
        self.proxy = _start_proxy(self.upstream.url, self.store)

    def tearDown(self) -> None:
        self.proxy.close()
        self.upstream.close()
        self.store.close()

    def test_tool_reasoning_is_cached_before_done(self) -> None:
        """A follow-up POST issued before [DONE] must still find the
        streamed reasoning in cache."""
        tools = [
            {
                "type": "function",
                "function": {
                    "name": "lookup",
                    "parameters": {"type": "object", "properties": {}},
                },
            }
        ]
        request = Request(
            f"{self.proxy.url}/v1/chat/completions",
            data=json.dumps(
                {
                    "model": "deepseek-v4-pro",
                    "stream": True,
                    "messages": [{"role": "user", "content": "stream tool"}],
                    "tools": tools,
                }
            ).encode("utf-8"),
            method="POST",
            headers={
                "Authorization": "Bearer sk-test",
                "Content-Type": "application/json",
            },
        )
        with urlopen(request, timeout=3) as response:
            # Read until we see the tool_calls chunk; the upstream then
            # delays [DONE] for ~1s, giving us a window to send a follow-up.
            while True:
                line = response.readline().decode("utf-8")
                self.assertNotEqual(line, "")
                if '"finish_reason":"tool_calls"' in line:
                    break

            status, payload = _post(
                f"{self.proxy.url}/v1/chat/completions",
                {
                    "model": "deepseek-v4-pro",
                    "messages": [
                        {"role": "user", "content": "stream tool"},
                        {
                            "role": "assistant",
                            "content": "",
                            "tool_calls": [
                                {
                                    "id": "call_stream_tool",
                                    "type": "function",
                                    "function": {"name": "lookup", "arguments": "{}"},
                                }
                            ],
                        },
                        {
                            "role": "tool",
                            "tool_call_id": "call_stream_tool",
                            "content": "tool result",
                        },
                    ],
                    "tools": tools,
                },
            )
            response.read()
        self.assertEqual(status, 200, payload)
        self.assertEqual(
            payload["choices"][0]["message"]["content"].split("</details>")[-1],
            "\n\nfollow-up accepted",
        )


if __name__ == "__main__":
    unittest.main()