diff --git a/.gitignore b/.gitignore
index 8966628..d3ef3b3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+# AIs
+.claude/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[codz]
diff --git a/README.md b/README.md
index 5750d35..6fd581e 100644
--- a/README.md
+++ b/README.md
@@ -134,7 +134,7 @@ Select `deepseek-v4-pro` in Cursor and use chat or agent mode as usual.
 
 ## How It Works
 
-- **Core fix:** DeepSeek's [thinking mode](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) requires `reasoning_content` from assistant tool-call messages to be passed back in subsequent requests, but Cursor omits this field, causing a 400 error. The proxy (`Cursor → ngrok → proxy → DeepSeek API`) stores `reasoning_content` from every DeepSeek response in a local SQLite cache, keyed by message signature, tool-call ID, and tool-call function signature, and patches outgoing requests with missing `reasoning_content` before they reach DeepSeek. On a cold cache (proxy restart, model switch), it logs and drops unrecoverable history, continues from the latest user request, and prefixes the next Cursor response with a notice.
+- **Core fix:** DeepSeek [thinking-mode tool calls](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) require the complete **multi-round** `reasoning_content` chain to be sent back in later requests. Cursor omits that field, causing a 400 error. The proxy (`Cursor -> ngrok -> proxy -> DeepSeek API`) stores DeepSeek's original `reasoning_content` and patches missing blocks back into outgoing tool-call history.
 - **Multi-conversation isolation:** To avoid collisions across concurrent conversations, the proxy scopes cache keys by a SHA-256 hash of the canonical conversation prefix (roles, content, and tool calls, excluding `reasoning_content`) plus the upstream model, configuration, and an API-key hash. Different threads get different scopes, so reused tool-call IDs do not collide. Byte-identical cloned histories produce identical scopes.
 - **Context caching compatibility:** The proxy preserves compatibility by never injecting synthetic thread IDs, timestamps, or cache-control messages. It restores `reasoning_content` as the exact original string, so repeated prefixes remain intact for [DeepSeek context cache](https://api-docs.deepseek.com/guides/kv_cache). Cache hit rates are logged in the terminal output.
 - **Additional compatibility fixes:** Beyond reasoning repair, the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases, strips mirrored thinking display blocks from assistant content, flattens multi-part content arrays to plain text, and mirrors `reasoning_content` into Cursor-visible Markdown details blocks.
diff --git a/src/deepseek_cursor_proxy/config.py b/src/deepseek_cursor_proxy/config.py
index 427887b..8c10e22 100644
--- a/src/deepseek_cursor_proxy/config.py
+++ b/src/deepseek_cursor_proxy/config.py
@@ -172,8 +172,6 @@ def settings_from_config(
 
 def normalize_thinking(value: Any) -> str:
     thinking = as_str(value, DEFAULT_THINKING).strip().lower()
-    if thinking in {"passthrough", "pass-through", "pass_through"}:
-        return "pass-through"
     if thinking in {"enabled", "disabled"}:
         return thinking
     return DEFAULT_THINKING
diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py
index 7356e4f..6479d31 100644
--- a/src/deepseek_cursor_proxy/server.py
+++ b/src/deepseek_cursor_proxy/server.py
@@ -540,6 +540,8 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
                 scope=record_response_scope,
                 prior_messages=record_response_messages,
                 recording_contexts=record_response_contexts,
+                display_reasoning=self.config.display_reasoning,
+                collapsible_reasoning=self.config.collapsible_reasoning,
             )
         except (json.JSONDecodeError, UnicodeDecodeError) as exc:
             LOG.warning("failed to rewrite upstream JSON response: %s", exc)
@@ -812,7 +814,7 @@ def build_arg_parser() -> argparse.ArgumentParser:
     )
     parser.add_argument(
         "--thinking",
-        choices=["enabled", "disabled", "pass-through"],
+        choices=["enabled", "disabled"],
         help="DeepSeek thinking mode, default from config or enabled",
     )
     parser.add_argument(
diff --git a/src/deepseek_cursor_proxy/streaming.py b/src/deepseek_cursor_proxy/streaming.py
index 9157d1c..8d6ee95 100644
--- a/src/deepseek_cursor_proxy/streaming.py
+++ b/src/deepseek_cursor_proxy/streaming.py
@@ -292,3 +292,34 @@ class CursorReasoningDisplayAdapter:
         }
         if metadata:
             self._last_chunk_metadata.update(metadata)
+
+
+def fold_reasoning_into_content(
+    response_payload: dict[str, Any],
+    collapsible: bool,
+) -> None:
+    """Mirror `reasoning_content` into the visible `content` field for
+    non-streaming responses, matching the streaming `<details>` layout."""
+    block_start = (
+        COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
+    )
+    block_end = COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
+    choices = response_payload.get("choices")
+    if not isinstance(choices, list):
+        return
+    for choice in choices:
+        if not isinstance(choice, dict):
+            continue
+        message = choice.get("message")
+        if not isinstance(message, dict):
+            continue
+        reasoning = message.get("reasoning_content")
+        if not isinstance(reasoning, str) or not reasoning:
+            continue
+        content = message.get("content")
+        message["content"] = (
+            block_start
+            + reasoning
+            + block_end
+            + (content if isinstance(content, str) else "")
+        )
diff --git a/src/deepseek_cursor_proxy/transform.py b/src/deepseek_cursor_proxy/transform.py
index 1ea53ee..53f22ee 100644
--- a/src/deepseek_cursor_proxy/transform.py
+++ b/src/deepseek_cursor_proxy/transform.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 from dataclasses import dataclass, field
 import hashlib
 import json
+import logging
 import re
 from typing import Any
 
@@ -15,6 +16,10 @@ from .reasoning_store import (
     tool_call_signature,
     turn_context_signature,
 )
+from .streaming import fold_reasoning_into_content
+
+
+LOG = logging.getLogger("deepseek_cursor_proxy")
 
 
 SUPPORTED_REQUEST_FIELDS = {
@@ -35,6 +40,13 @@ SUPPORTED_REQUEST_FIELDS = {
     "frequency_penalty",
     "logprobs",
     "top_logprobs",
+    # Standard OpenAI Chat Completions fields that DeepSeek either honors or
+    # safely ignores. Cursor and most OpenAI SDKs send these unconditionally,
+    # so forwarding keeps clients happy and avoids log spam.
+    "user",
+    "seed",
+    "n",
+    "logit_bias",
 }
 
 MESSAGE_FIELDS = {
@@ -83,10 +95,6 @@ CURSOR_THINKING_BLOCK_RE = re.compile(
 )
 
 RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history."
-LEGACY_RECOVERY_NOTICE_TEXT = (
-    "Note: recovered this DeepSeek chat because older tool-call reasoning "
-    "was unavailable; continuing with recent context only."
-)
 RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n"
 RECOVERY_SYSTEM_CONTENT = (
     "deepseek-cursor-proxy recovered this request because older DeepSeek "
@@ -460,10 +468,33 @@ def has_recovery_notice(message: dict[str, Any]) -> bool:
     return (
         message.get("role") == "assistant"
         and isinstance(content, str)
-        and content.startswith((RECOVERY_NOTICE_TEXT, LEGACY_RECOVERY_NOTICE_TEXT))
+        and content.startswith(RECOVERY_NOTICE_TEXT)
     )
 
 
+def strip_recovery_notice_for_upstream(
+    messages: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+    """Cursor echoes the proxy's recovery notice back to us in later turns.
+    The notice serves as a boundary marker for the proxy, but DeepSeek must
+    not see proxy-generated prose. Return a copy with assistant prefixes
+    stripped; leave the input untouched so cache scopes/recording contexts
+    keep matching the with-prefix history that Cursor will send next time."""
+    stripped: list[dict[str, Any]] = []
+    for message in messages:
+        if message.get("role") != "assistant":
+            stripped.append(message)
+            continue
+        content = message.get("content")
+        if not isinstance(content, str) or not content.startswith(RECOVERY_NOTICE_TEXT):
+            stripped.append(message)
+            continue
+        cleaned = dict(message)
+        cleaned["content"] = content[len(RECOVERY_NOTICE_TEXT) :].lstrip("\r\n")
+        stripped.append(cleaned)
+    return stripped
+
+
 def leading_system_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
     leading_messages: list[dict[str, Any]] = []
     for message in messages:
@@ -628,6 +659,11 @@ def assistant_needs_reasoning_for_tool_context(
 def upstream_model_for(original_model: str, config: ProxyConfig) -> str:
     if original_model.startswith("deepseek-"):
         return original_model
+    LOG.warning(
+        "rewriting non-DeepSeek model %r to configured fallback %r",
+        original_model,
+        config.upstream_model,
+    )
     return config.upstream_model
 
 
@@ -688,6 +724,16 @@ def prepare_upstream_request(
     prepared = {
         key: value for key, value in payload.items() if key in SUPPORTED_REQUEST_FIELDS
     }
+    dropped_fields = sorted(
+        key
+        for key in payload.keys()
+        if key not in SUPPORTED_REQUEST_FIELDS
+        and key not in {"max_completion_tokens", "functions", "function_call"}
+    )
+    if dropped_fields:
+        LOG.warning(
+            "dropping unsupported request field(s): %s", ", ".join(dropped_fields)
+        )
     if "max_tokens" not in prepared and "max_completion_tokens" in payload:
         prepared["max_tokens"] = payload["max_completion_tokens"]
 
@@ -719,14 +765,9 @@ def prepare_upstream_request(
         if tool_choice is not None:
             prepared["tool_choice"] = tool_choice
 
-    if config.thinking != "pass-through":
-        prepared["thinking"] = {"type": config.thinking}
-
-    thinking = prepared.get("thinking")
-    thinking_enabled = isinstance(thinking, dict) and thinking.get("type") == "enabled"
-    thinking_disabled = (
-        isinstance(thinking, dict) and thinking.get("type") == "disabled"
-    )
+    prepared["thinking"] = {"type": config.thinking}
+    thinking_enabled = config.thinking == "enabled"
+    thinking_disabled = config.thinking == "disabled"
     if thinking_enabled:
         prepared["reasoning_effort"] = normalize_reasoning_effort(
             prepared.get("reasoning_effort") or config.reasoning_effort
@@ -797,12 +838,12 @@ def prepare_upstream_request(
             keep_reasoning=not thinking_disabled,
         )
         reasoning_diagnostics.extend(latest_diagnostics)
-    prepared["messages"] = messages
     active_record_response_scope = conversation_scope(messages, cache_namespace)
     record_response_contexts = response_recording_contexts(
         (record_response_scope, record_response_messages),
         (active_record_response_scope, messages),
     )
+    prepared["messages"] = strip_recovery_notice_for_upstream(messages)
 
     return PreparedRequest(
         payload=prepared,
@@ -874,6 +915,8 @@ def rewrite_response_body(
     scope: str | None = None,
     prior_messages: list[dict[str, Any]] | None = None,
     recording_contexts: list[tuple[str, list[dict[str, Any]]]] | None = None,
+    display_reasoning: bool = False,
+    collapsible_reasoning: bool = True,
 ) -> bytes:
     response_payload = json.loads(body.decode("utf-8"))
     if isinstance(response_payload, dict):
@@ -888,6 +931,8 @@ def rewrite_response_body(
             prior_messages=prior_messages,
             recording_contexts=recording_contexts,
         )
+        if display_reasoning:
+            fold_reasoning_into_content(response_payload, collapsible_reasoning)
         if "model" in response_payload:
             response_payload["model"] = original_model
     return json.dumps(
diff --git a/tests/test_config.py b/tests/test_config.py
index 68651e8..118ee67 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -121,7 +121,7 @@ class ConfigTests(unittest.TestCase):
                     [
                         "base_url: https://example.com/v1/",
                         "model: deepseek-v4-flash",
-                        "thinking: pass_through",
+                        "thinking: disabled",
                         "reasoning_effort: max",
                         "port: 9100",
                         "host: 0.0.0.0",
@@ -145,7 +145,7 @@ class ConfigTests(unittest.TestCase):
 
         self.assertEqual(config.upstream_base_url, "https://example.com/v1")
         self.assertEqual(config.upstream_model, "deepseek-v4-flash")
-        self.assertEqual(config.thinking, "pass-through")
+        self.assertEqual(config.thinking, "disabled")
         self.assertEqual(config.reasoning_effort, "max")
         self.assertEqual(config.host, "0.0.0.0")
         self.assertEqual(config.port, 9100)
diff --git a/tests/test_live_deepseek_cursor_proxy.py b/tests/test_live.py
similarity index 100%
rename from tests/test_live_deepseek_cursor_proxy.py
rename to tests/test_live.py
diff --git a/tests/test_protocol.py b/tests/test_protocol.py
new file mode 100644
index 0000000..6f565ae
--- /dev/null
+++ b/tests/test_protocol.py
@@ -0,0 +1,1327 @@
+"""End-to-end protocol tests against an in-process fake DeepSeek.
+
+Each test boots the proxy in-process against a small fake upstream and walks
+a real HTTP request scenario. The strict variant rejects with HTTP 400 — the
+same error real DeepSeek emits — whenever an assistant message that
+participated in a tool-calling turn lacks `reasoning_content`. So if the
+proxy is protocol-compliant, every turn succeeds; if not, the upstream
+short-circuits and the test fails fast.
+
+This file is the ground truth for "does the proxy speak DeepSeek correctly?"
+"""
+
+from __future__ import annotations
+
+from copy import deepcopy
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+import json
+import threading
+import time
+import unittest
+from typing import Any
+from urllib.error import HTTPError
+from urllib.request import Request, urlopen
+
+from deepseek_cursor_proxy.config import ProxyConfig
+from deepseek_cursor_proxy.reasoning_store import ReasoningStore
+from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
+
+
+# Canonical fake-DeepSeek reasoning/answer text reused across tests.
+THINKING_1_1 = "Thinking 1.1 - need to look up the date."
+THINKING_1_2 = "Thinking 1.2 - I have the date, now I need the weather."
+THINKING_1_3 = "Thinking 1.3 - tool results suffice for the answer."
+THINKING_2_1 = "Thinking 2.1 - a brand new user turn."
+ANSWER_1 = "Answer 1: Tomorrow is sunny on 2026-04-24."
+ANSWER_2 = "Answer 2: Acknowledged follow-up."
+CALL_ID_1 = "call_get_date"
+CALL_ID_2 = "call_get_weather"
+
+TOOLS = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_date",
+            "parameters": {"type": "object", "properties": {}},
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "parameters": {
+                "type": "object",
+                "properties": {"date": {"type": "string"}},
+                "required": ["date"],
+            },
+        },
+    },
+]
+
+
+# ---------------------------------------------------------------------------
+# Fake upstreams
+# ---------------------------------------------------------------------------
+
+
+def _completion(
+    *,
+    chat_id: str,
+    finish_reason: str,
+    content: str = "",
+    reasoning: str | None = None,
+    tool_calls: list[dict[str, Any]] | None = None,
+) -> dict[str, Any]:
+    message: dict[str, Any] = {"role": "assistant", "content": content}
+    if reasoning is not None:
+        message["reasoning_content"] = reasoning
+    if tool_calls is not None:
+        message["tool_calls"] = tool_calls
+    return {
+        "id": chat_id,
+        "object": "chat.completion",
+        "created": 1,
+        "model": "deepseek-v4-pro",
+        "choices": [{"index": 0, "finish_reason": finish_reason, "message": message}],
+    }
+
+
+class StrictFakeDeepSeek(BaseHTTPRequestHandler):
+    """DeepSeek protocol contract: tool-turn assistants must carry
+    `reasoning_content` (string). Returns canned canonical responses keyed
+    on the request shape; rejects with 400 otherwise."""
+
+    requests: list[dict[str, Any]] = []
+    auth_headers: list[str] = []
+
+    def log_message(self, fmt: str, *args: Any) -> None:
+        return
+
+    def do_POST(self) -> None:
+        length = int(self.headers.get("Content-Length") or 0)
+        payload = json.loads(self.rfile.read(length).decode("utf-8"))
+        self.__class__.requests.append(payload)
+        self.__class__.auth_headers.append(self.headers.get("Authorization", ""))
+
+        messages = payload.get("messages") or []
+        for index, message in enumerate(messages):
+            if not isinstance(message, dict) or message.get("role") != "assistant":
+                continue
+            if _is_tool_turn_assistant(messages, index) and not isinstance(
+                message.get("reasoning_content"), str
+            ):
+                return self._send(
+                    400,
+                    {
+                        "error": {
+                            "message": (
+                                "The reasoning_content in the thinking mode "
+                                "must be passed back to the API."
+                            ),
+                            "type": "invalid_request_error",
+                            "code": "invalid_request_error",
+                            "missing_index": index,
+                        }
+                    },
+                )
+
+        last_user = _last_index(messages, "user")
+        last_tool = _last_index(messages, "tool")
+        last_assistant = _last_index(messages, "assistant")
+        if last_user == 0 and last_assistant == -1 and last_tool == -1:
+            return self._send(
+                200,
+                _completion(
+                    chat_id="chatcmpl-1-1",
+                    finish_reason="tool_calls",
+                    reasoning=THINKING_1_1,
+                    tool_calls=[
+                        {
+                            "id": CALL_ID_1,
+                            "type": "function",
+                            "function": {"name": "get_date", "arguments": "{}"},
+                        }
+                    ],
+                ),
+            )
+        if last_user > 0 and last_user > last_tool:
+            return self._send(
+                200,
+                _completion(
+                    chat_id="chatcmpl-2-1",
+                    finish_reason="stop",
+                    content=ANSWER_2,
+                    reasoning=THINKING_2_1,
+                ),
+            )
+        if (
+            last_tool != -1
+            and messages[last_tool].get("tool_call_id") == CALL_ID_1
+            and last_assistant < last_tool
+        ):
+            return self._send(
+                200,
+                _completion(
+                    chat_id="chatcmpl-1-2",
+                    finish_reason="tool_calls",
+                    reasoning=THINKING_1_2,
+                    tool_calls=[
+                        {
+                            "id": CALL_ID_2,
+                            "type": "function",
+                            "function": {
+                                "name": "get_weather",
+                                "arguments": '{"date":"2026-04-24"}',
+                            },
+                        }
+                    ],
+                ),
+            )
+        if last_tool != -1 and messages[last_tool].get("tool_call_id") == CALL_ID_2:
+            return self._send(
+                200,
+                _completion(
+                    chat_id="chatcmpl-1-3",
+                    finish_reason="stop",
+                    content=ANSWER_1,
+                    reasoning=THINKING_1_3,
+                ),
+            )
+        return self._send(
+            400,
+            {
+                "error": {
+                    "message": (
+                        "test fake: unexpected shape: "
+                        f"roles={[m.get('role') for m in messages]}"
+                    )
+                }
+            },
+        )
+
+    def _send(self, status: int, body: dict[str, Any]) -> None:
+        encoded = json.dumps(body).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(encoded)))
+        self.end_headers()
+        self.wfile.write(encoded)
+
+
+def _is_tool_turn_assistant(messages: list[dict[str, Any]], index: int) -> bool:
+    message = messages[index]
+    if message.get("tool_calls"):
+        return True
+    for prior in reversed(messages[:index]):
+        role = prior.get("role")
+        if role == "tool":
+            return True
+        if role in {"user", "system"}:
+            return False
+    return False
+
+
+def _last_index(messages: list[dict[str, Any]], role: str) -> int:
+    for i in range(len(messages) - 1, -1, -1):
+        if messages[i].get("role") == role:
+            return i
+    return -1
+
+
+# ---------------------------------------------------------------------------
+# HTTP fixtures
+# ---------------------------------------------------------------------------
+
+
+class _Fixture:
+    def __init__(self, server: ThreadingHTTPServer) -> None:
+        self.server = server
+        self.thread = threading.Thread(target=server.serve_forever, daemon=True)
+        self.thread.start()
+
+    @property
+    def url(self) -> str:
+        host, port = self.server.server_address
+        return f"http://{host}:{port}"
+
+    def close(self) -> None:
+        self.server.shutdown()
+        self.server.server_close()
+        self.thread.join(timeout=5)
+
+
+def _start_strict_upstream() -> _Fixture:
+    StrictFakeDeepSeek.requests = []
+    StrictFakeDeepSeek.auth_headers = []
+    return _Fixture(ThreadingHTTPServer(("127.0.0.1", 0), StrictFakeDeepSeek))
+
+
+def _start_proxy(
+    upstream_url: str,
+    store: ReasoningStore,
+    **config_overrides: Any,
+) -> _Fixture:
+    proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
+    proxy.config = ProxyConfig(
+        upstream_base_url=upstream_url,
+        upstream_model="deepseek-v4-pro",
+        ngrok=False,
+        verbose=False,
+        cors=False,
+        **config_overrides,
+    )
+    proxy.reasoning_store = store
+    return _Fixture(proxy)
+
+
+def _post(
+    url: str,
+    payload: dict[str, Any],
+    authorization: str = "Bearer sk-test",
+) -> tuple[int, dict[str, Any]]:
+    request = Request(
+        url,
+        data=json.dumps(payload).encode("utf-8"),
+        method="POST",
+        headers={"Authorization": authorization, "Content-Type": "application/json"},
+    )
+    try:
+        with urlopen(request, timeout=10) as response:
+            return response.status, json.loads(response.read().decode("utf-8"))
+    except HTTPError as exc:
+        return exc.code, json.loads(exc.read().decode("utf-8"))
+
+
+def _drop_reasoning(message: dict[str, Any]) -> dict[str, Any]:
+    cleaned = deepcopy(message)
+    cleaned.pop("reasoning_content", None)
+    return cleaned
+
+
+# ---------------------------------------------------------------------------
+# Test suites
+# ---------------------------------------------------------------------------
+
+
+class _StrictUpstreamCase(unittest.TestCase):
+    """Common setup: strict fake DeepSeek + proxy."""
+
+    config_overrides: dict[str, Any] = {}
+
+    def setUp(self) -> None:
+        self.upstream = _start_strict_upstream()
+        self.store = ReasoningStore(":memory:")
+        self.proxy = _start_proxy(
+            self.upstream.url, self.store, **self.config_overrides
+        )
+
+    def tearDown(self) -> None:
+        self.proxy.close()
+        self.upstream.close()
+        self.store.close()
+
+
+class CanonicalLoopTests(_StrictUpstreamCase):
+    def test_canonical_four_turn_tool_call_loop(self) -> None:
+        """Cursor strips reasoning_content from history; the proxy must
+        patch every prior assistant message that participated in the tool
+        chain so the strict upstream accepts each turn."""
+        # Turn 1.1: bare user message.
+        status, response_1_1 = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "What's the weather tomorrow?"}
+                ],
+                "tools": TOOLS,
+            },
+        )
+        self.assertEqual(status, 200, response_1_1)
+        first = response_1_1["choices"][0]["message"]
+        self.assertEqual(first["reasoning_content"], THINKING_1_1)
+        self.assertEqual(first["tool_calls"][0]["id"], CALL_ID_1)
+
+        # Turn 1.2: append tool result; Cursor drops reasoning.
+        status, response_1_2 = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "What's the weather tomorrow?"},
+                    _drop_reasoning(first),
+                    {
+                        "role": "tool",
+                        "tool_call_id": CALL_ID_1,
+                        "content": "2026-04-24",
+                    },
+                ],
+                "tools": TOOLS,
+            },
+        )
+        self.assertEqual(status, 200, response_1_2)
+        second = response_1_2["choices"][0]["message"]
+        self.assertEqual(second["tool_calls"][0]["id"], CALL_ID_2)
+        upstream_1_2 = StrictFakeDeepSeek.requests[1]["messages"]
+        self.assertEqual(upstream_1_2[1]["reasoning_content"], THINKING_1_1)
+
+        # Turn 1.3: both prior assistants need patching.
+        status, response_1_3 = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "What's the weather tomorrow?"},
+                    _drop_reasoning(first),
+                    {
+                        "role": "tool",
+                        "tool_call_id": CALL_ID_1,
+                        "content": "2026-04-24",
+                    },
+                    _drop_reasoning(second),
+                    {"role": "tool", "tool_call_id": CALL_ID_2, "content": "sunny"},
+                ],
+                "tools": TOOLS,
+            },
+        )
+        self.assertEqual(status, 200, response_1_3)
+        third = response_1_3["choices"][0]["message"]
+        self.assertIn(ANSWER_1, third["content"])
+        upstream_1_3 = StrictFakeDeepSeek.requests[2]["messages"]
+        self.assertEqual(upstream_1_3[1]["reasoning_content"], THINKING_1_1)
+        self.assertEqual(upstream_1_3[3]["reasoning_content"], THINKING_1_2)
+
+        # Turn 2.1: brand new user turn; the prior final assistant also
+        # needs patching since DeepSeek treats it as part of the tool turn.
+        status, response_2_1 = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "What's the weather tomorrow?"},
+                    _drop_reasoning(first),
+                    {
+                        "role": "tool",
+                        "tool_call_id": CALL_ID_1,
+                        "content": "2026-04-24",
+                    },
+                    _drop_reasoning(second),
+                    {"role": "tool", "tool_call_id": CALL_ID_2, "content": "sunny"},
+                    _drop_reasoning(third),
+                    {"role": "user", "content": "Thanks. What about Saturday?"},
+                ],
+                "tools": TOOLS,
+            },
+        )
+        self.assertEqual(status, 200, response_2_1)
+        self.assertIn(ANSWER_2, response_2_1["choices"][0]["message"]["content"])
+        upstream_2_1 = StrictFakeDeepSeek.requests[3]["messages"]
+        self.assertEqual(upstream_2_1[1]["reasoning_content"], THINKING_1_1)
+        self.assertEqual(upstream_2_1[3]["reasoning_content"], THINKING_1_2)
+        self.assertEqual(upstream_2_1[5]["reasoning_content"], THINKING_1_3)
+
+    def test_authorization_namespace_isolation(self) -> None:
+        """A second user with the same conversation prefix must NOT see
+        cached reasoning from the first user."""
+        # Prime cache as user A.
+        _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "What's the weather tomorrow?"}
+                ],
+                "tools": TOOLS,
+            },
+            authorization="Bearer sk-USER-A",
+        )
+
+        # User B replays a tool history with the exact same shape.
+        status, _ = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "What's the weather tomorrow?"},
+                    {
+                        "role": "assistant",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": CALL_ID_1,
+                                "type": "function",
+                                "function": {"name": "get_date", "arguments": "{}"},
+                            }
+                        ],
+                    },
+                    {
+                        "role": "tool",
+                        "tool_call_id": CALL_ID_1,
+                        "content": "2026-04-24",
+                    },
+                ],
+                "tools": TOOLS,
+            },
+            authorization="Bearer sk-USER-B",
+        )
+        self.assertEqual(status, 200)
+        sent = StrictFakeDeepSeek.requests[-1]
+        leaked = any(
+            m.get("role") == "assistant" and m.get("reasoning_content") == THINKING_1_1
+            for m in sent["messages"]
+        )
+        self.assertFalse(leaked)
+
+
+class StrictRejectModeTests(_StrictUpstreamCase):
+    config_overrides = {"missing_reasoning_strategy": "reject"}
+
+    def test_returns_409_without_calling_upstream(self) -> None:
+        status, payload = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "go"},
+                    {
+                        "role": "assistant",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": CALL_ID_1,
+                                "type": "function",
+                                "function": {"name": "get_date", "arguments": "{}"},
+                            }
+                        ],
+                    },
+                    {
+                        "role": "tool",
+                        "tool_call_id": CALL_ID_1,
+                        "content": "2026-04-24",
+                    },
+                ],
+            },
+        )
+        self.assertEqual(status, 409, payload)
+        self.assertEqual(payload["error"]["missing_reasoning_messages"], 1)
+        self.assertEqual(StrictFakeDeepSeek.requests, [])
+
+
+class ThinkingDisabledTests(_StrictUpstreamCase):
+    config_overrides = {"thinking": "disabled"}
+
+    def test_disabled_does_not_inject_reasoning(self) -> None:
+        _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "ping"},
+                    {
+                        "role": "assistant",
+                        "content": "",
+                        "reasoning_content": "Should be discarded by proxy.",
+                        "tool_calls": [
+                            {
+                                "id": CALL_ID_1,
+                                "type": "function",
+                                "function": {"name": "get_date", "arguments": "{}"},
+                            }
+                        ],
+                    },
+                    {
+                        "role": "tool",
+                        "tool_call_id": CALL_ID_1,
+                        "content": "2026-04-24",
+                    },
+                ],
+            },
+        )
+        sent = StrictFakeDeepSeek.requests[-1]
+        self.assertEqual(sent["thinking"], {"type": "disabled"})
+        self.assertNotIn("reasoning_content", sent["messages"][1])
+
+
+class RecoveryTests(_StrictUpstreamCase):
+    def test_cold_cache_recovers_to_latest_user_with_notice(self) -> None:
+        """Stale tool history with no cached reasoning: proxy keeps only
+        the latest user message + recovery system message and prefixes a
+        user-facing notice into the response."""
+        status, response = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "system", "content": "Be brief."},
+                    {"role": "user", "content": "old work"},
+                    {
+                        "role": "assistant",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": CALL_ID_1,
+                                "type": "function",
+                                "function": {"name": "get_date", "arguments": "{}"},
+                            }
+                        ],
+                    },
+                    {
+                        "role": "tool",
+                        "tool_call_id": CALL_ID_1,
+                        "content": "2026-04-24",
+                    },
+                    {"role": "user", "content": "Thanks. What about Saturday?"},
+                ],
+            },
+        )
+        self.assertEqual(status, 200, response)
+        sent = StrictFakeDeepSeek.requests[-1]
+        self.assertEqual(
+            [m["role"] for m in sent["messages"]], ["system", "system", "user"]
+        )
+        self.assertEqual(
+            sent["messages"][-1]["content"], "Thanks. What about Saturday?"
+        )
+        self.assertIn(
+            "[deepseek-cursor-proxy] Refreshed reasoning",
+            response["choices"][0]["message"]["content"],
+        )
+
+    def test_recovery_notice_is_stripped_before_upstream_replay(self) -> None:
+        """When Cursor echoes a previous response (which carried the proxy's
+        recovery notice) back as assistant content, the notice serves as a
+        boundary marker for the proxy but must not be replayed upstream as
+        if DeepSeek had written it."""
+        # Trigger initial recovery so the response carries the notice.
+        status, first = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "old work"},
+                    {
+                        "role": "assistant",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": CALL_ID_1,
+                                "type": "function",
+                                "function": {"name": "get_date", "arguments": "{}"},
+                            }
+                        ],
+                    },
+                    {
+                        "role": "tool",
+                        "tool_call_id": CALL_ID_1,
+                        "content": "2026-04-24",
+                    },
+                    {"role": "user", "content": "Thanks. What about Saturday?"},
+                ],
+            },
+        )
+        self.assertEqual(status, 200)
+
+        # Cursor faithfully echoes the response (including the notice prefix).
+        echoed = _drop_reasoning(first["choices"][0]["message"])
+        status, _ = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "old work"},
+                    {
+                        "role": "assistant",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": CALL_ID_1,
+                                "type": "function",
+                                "function": {"name": "get_date", "arguments": "{}"},
+                            }
+                        ],
+                    },
+                    {
+                        "role": "tool",
+                        "tool_call_id": CALL_ID_1,
+                        "content": "2026-04-24",
+                    },
+                    {"role": "user", "content": "Thanks. What about Saturday?"},
+                    echoed,
+                    {"role": "user", "content": "And Sunday?"},
+                ],
+            },
+        )
+        self.assertEqual(status, 200)
+        sent = StrictFakeDeepSeek.requests[-1]
+        for message in sent["messages"]:
+            if message.get("role") != "assistant":
+                continue
+            self.assertNotIn("deepseek-cursor-proxy", message.get("content", ""))
+
+
+# ---------------------------------------------------------------------------
+# Streaming behaviour
+# ---------------------------------------------------------------------------
+
+
+def _sse_chunks(*chunks: dict[str, Any]) -> bytes:
+    out = b""
+    for chunk in chunks:
+        out += f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+    out += b"data: [DONE]\n\n"
+    return out
+
+
+class _StreamingThenJsonHandler(BaseHTTPRequestHandler):
+    """First request streams a tool call (with reasoning); subsequent
+    non-streaming requests echo a final answer if the assistant message
+    carries the previously-streamed reasoning_content."""
+
+    requests: list[dict[str, Any]] = []
+
+    def log_message(self, fmt: str, *args: Any) -> None:
+        return
+
+    def do_POST(self) -> None:
+        length = int(self.headers.get("Content-Length") or 0)
+        payload = json.loads(self.rfile.read(length).decode("utf-8"))
+        self.__class__.requests.append(payload)
+
+        if payload.get("stream"):
+            self.send_response(200)
+            self.send_header("Content-Type", "text/event-stream")
+            self.end_headers()
+            self.wfile.write(
+                _sse_chunks(
+                    {
+                        "id": "stream-1",
+                        "object": "chat.completion.chunk",
+                        "created": 1,
+                        "model": "deepseek-v4-pro",
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {
+                                    "role": "assistant",
+                                    "reasoning_content": THINKING_1_1,
+                                    "tool_calls": [
+                                        {
+                                            "index": 0,
+                                            "id": CALL_ID_1,
+                                            "type": "function",
+                                            "function": {
+                                                "name": "get_date",
+                                                "arguments": "{}",
+                                            },
+                                        }
+                                    ],
+                                },
+                                "finish_reason": None,
+                            }
+                        ],
+                    },
+                    {
+                        "id": "stream-1",
+                        "object": "chat.completion.chunk",
+                        "created": 1,
+                        "model": "deepseek-v4-pro",
+                        "choices": [
+                            {"index": 0, "delta": {}, "finish_reason": "tool_calls"}
+                        ],
+                    },
+                )
+            )
+            self.wfile.flush()
+            return
+
+        # Non-streaming follow-up: enforce that proxy patched the prior
+        # streamed reasoning_content into history.
+        assistant = payload["messages"][1]
+        if assistant.get("reasoning_content") != THINKING_1_1:
+            self._send(400, {"error": {"message": "missing streamed reasoning"}})
+            return
+        self._send(
+            200,
+            _completion(
+                chat_id="follow-up",
+                finish_reason="stop",
+                content="follow-up accepted",
+                reasoning="post-tool reasoning",
+            ),
+        )
+
+    def _send(self, status: int, body: dict[str, Any]) -> None:
+        encoded = json.dumps(body).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(encoded)))
+        self.end_headers()
+        self.wfile.write(encoded)
+
+
+class StreamingThenNonStreamingTests(unittest.TestCase):
+    def setUp(self) -> None:
+        _StreamingThenJsonHandler.requests = []
+        self.upstream = _Fixture(
+            ThreadingHTTPServer(("127.0.0.1", 0), _StreamingThenJsonHandler)
+        )
+        self.store = ReasoningStore(":memory:")
+        self.proxy = _start_proxy(self.upstream.url, self.store)
+
+    def tearDown(self) -> None:
+        self.proxy.close()
+        self.upstream.close()
+        self.store.close()
+
+    def test_streamed_reasoning_is_replayed_in_next_non_streaming_request(
+        self,
+    ) -> None:
+        """Cursor often streams Turn 1.1 then issues Turn 1.2 as a non-stream
+        POST. The proxy must repair reasoning_content from the streaming
+        cache before forwarding the follow-up."""
+        # Stream a tool call.
+        request = Request(
+            f"{self.proxy.url}/v1/chat/completions",
+            data=json.dumps(
+                {
+                    "model": "deepseek-v4-pro",
+                    "stream": True,
+                    "messages": [{"role": "user", "content": "go"}],
+                    "tools": [TOOLS[0]],
+                }
+            ).encode("utf-8"),
+            method="POST",
+            headers={
+                "Authorization": "Bearer sk-test",
+                "Content-Type": "application/json",
+            },
+        )
+        with urlopen(request, timeout=5) as response:
+            self.assertIn("data: [DONE]", response.read().decode("utf-8"))
+
+        # Non-streaming follow-up (Cursor strips reasoning_content).
+        status, payload = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "go"},
+                    {
+                        "role": "assistant",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": CALL_ID_1,
+                                "type": "function",
+                                "function": {
+                                    "name": "get_date",
+                                    "arguments": "{}",
+                                },
+                            }
+                        ],
+                    },
+                    {
+                        "role": "tool",
+                        "tool_call_id": CALL_ID_1,
+                        "content": "2026-04-24",
+                    },
+                ],
+                "tools": [TOOLS[0]],
+            },
+        )
+        self.assertEqual(status, 200, payload)
+        sent = _StreamingThenJsonHandler.requests[-1]
+        self.assertEqual(sent["messages"][1]["reasoning_content"], THINKING_1_1)
+
+
+class _ReasoningStreamHandler(BaseHTTPRequestHandler):
+    """Streams reasoning_content tokens followed by a content answer."""
+
+    def log_message(self, fmt: str, *args: Any) -> None:
+        return
+
+    def do_POST(self) -> None:
+        length = int(self.headers.get("Content-Length") or 0)
+        self.rfile.read(length)
+        self.send_response(200)
+        self.send_header("Content-Type", "text/event-stream")
+        self.end_headers()
+        self.wfile.write(
+            _sse_chunks(
+                {
+                    "id": "stream-r",
+                    "object": "chat.completion.chunk",
+                    "created": 1,
+                    "model": "deepseek-v4-pro",
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": {
+                                "role": "assistant",
+                                "reasoning_content": "Need ",
+                            },
+                            "finish_reason": None,
+                        }
+                    ],
+                },
+                {
+                    "id": "stream-r",
+                    "object": "chat.completion.chunk",
+                    "created": 1,
+                    "model": "deepseek-v4-pro",
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": {"reasoning_content": "context."},
+                            "finish_reason": None,
+                        }
+                    ],
+                },
+                {
+                    "id": "stream-r",
+                    "object": "chat.completion.chunk",
+                    "created": 1,
+                    "model": "deepseek-v4-pro",
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": {"content": "Final."},
+                            "finish_reason": None,
+                        }
+                    ],
+                },
+                {
+                    "id": "stream-r",
+                    "object": "chat.completion.chunk",
+                    "created": 1,
+                    "model": "deepseek-v4-pro",
+                    "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+                },
+            )
+        )
+        self.wfile.flush()
+
+
+class StreamingDisplayTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.upstream = _Fixture(
+            ThreadingHTTPServer(("127.0.0.1", 0), _ReasoningStreamHandler)
+        )
+        self.store = ReasoningStore(":memory:")
+        self.proxy = _start_proxy(self.upstream.url, self.store)
+
+    def tearDown(self) -> None:
+        self.proxy.close()
+        self.upstream.close()
+        self.store.close()
+
+    def test_streaming_response_mirrors_reasoning_into_details_block(self) -> None:
+        request = Request(
+            f"{self.proxy.url}/v1/chat/completions",
+            data=json.dumps(
+                {
+                    "model": "deepseek-v4-pro",
+                    "stream": True,
+                    "messages": [{"role": "user", "content": "stream"}],
+                }
+            ).encode("utf-8"),
+            method="POST",
+            headers={
+                "Authorization": "Bearer sk-test",
+                "Content-Type": "application/json",
+            },
+        )
+        with urlopen(request, timeout=2) as response:
+            body = response.read().decode("utf-8")
+
+        chunks = [
+            json.loads(line.removeprefix("data: "))
+            for line in body.splitlines()
+            if line.startswith("data: {")
+        ]
+        self.assertEqual(
+            chunks[0]["choices"][0]["delta"]["content"],
+            "<details>\n<summary>Thinking</summary>\n\nNeed ",
+        )
+        self.assertEqual(
+            chunks[2]["choices"][0]["delta"]["content"],
+            "\n</details>\n\nFinal.",
+        )
+
+
+class NonStreamingDisplayTests(_StrictUpstreamCase):
+    def test_non_streaming_response_mirrors_reasoning_into_details_block(
+        self,
+    ) -> None:
+        """The README claims thinking tokens are displayed in Cursor; this
+        must hold for non-streaming responses too, not only streaming ones."""
+        status, response = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "What's the weather tomorrow?"}
+                ],
+                "tools": TOOLS,
+            },
+        )
+        self.assertEqual(status, 200, response)
+        # Turn 1.1 has empty content + reasoning + tool calls.
+        content = response["choices"][0]["message"]["content"]
+        self.assertEqual(
+            content,
+            f"<details>\n<summary>Thinking</summary>\n\n{THINKING_1_1}\n</details>\n\n",
+        )
+
+
+# ---------------------------------------------------------------------------
+# Concurrent threads (independent fake to keep the canonical strict fake
+# stateless across the suite).
+# ---------------------------------------------------------------------------
+
+
+class _PerThreadFakeDeepSeek(BaseHTTPRequestHandler):
+    """Strict fake keyed on a thread-name embedded in the user message; each
+    thread expects its own reasoning text. Catches cross-thread cache leaks."""
+
+    requests: list[dict[str, Any]] = []
+
+    def log_message(self, fmt: str, *args: Any) -> None:
+        return
+
+    def do_POST(self) -> None:
+        length = int(self.headers.get("Content-Length") or 0)
+        payload = json.loads(self.rfile.read(length).decode("utf-8"))
+        self.__class__.requests.append(payload)
+
+        thread = self._thread_name(payload.get("messages") or [])
+        expected_tool = f"tool reasoning {thread}"
+        expected_final = f"final reasoning {thread}"
+        final_content = f"answer {thread}"
+
+        for index, message in enumerate(payload.get("messages") or []):
+            if message.get("role") != "assistant":
+                continue
+            if (
+                message.get("tool_calls")
+                and message.get("reasoning_content") != expected_tool
+            ):
+                return self._send(400, {"error": {"missing_index": index}})
+            if (
+                message.get("content") == final_content
+                and message.get("reasoning_content") != expected_final
+            ):
+                return self._send(400, {"error": {"missing_index": index}})
+
+        messages = payload.get("messages") or []
+        if len(messages) == 1:
+            return self._send(
+                200,
+                _completion(
+                    chat_id=f"tool-{thread}",
+                    finish_reason="tool_calls",
+                    reasoning=expected_tool,
+                    tool_calls=[
+                        {
+                            "id": "call_reused",
+                            "type": "function",
+                            "function": {"name": "lookup", "arguments": "{}"},
+                        }
+                    ],
+                ),
+            )
+        if len(messages) == 3:
+            return self._send(
+                200,
+                _completion(
+                    chat_id=f"final-{thread}",
+                    finish_reason="stop",
+                    content=final_content,
+                    reasoning=expected_final,
+                ),
+            )
+        return self._send(
+            200,
+            _completion(
+                chat_id=f"followup-{thread}",
+                finish_reason="stop",
+                content=f"follow-up {thread}",
+                reasoning="follow-up reasoning",
+            ),
+        )
+
+    @staticmethod
+    def _thread_name(messages: list[dict[str, Any]]) -> str:
+        if not messages:
+            return "?"
+        text = str(messages[0].get("content") or "")
+        if "thread A" in text:
+            return "A"
+        if "thread B" in text:
+            return "B"
+        return "?"
+
+    def _send(self, status: int, body: dict[str, Any]) -> None:
+        encoded = json.dumps(body).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(encoded)))
+        self.end_headers()
+        self.wfile.write(encoded)
+
+
+class ConcurrentThreadTests(unittest.TestCase):
+    def setUp(self) -> None:
+        _PerThreadFakeDeepSeek.requests = []
+        self.upstream = _Fixture(
+            ThreadingHTTPServer(("127.0.0.1", 0), _PerThreadFakeDeepSeek)
+        )
+        self.store = ReasoningStore(":memory:")
+        self.proxy = _start_proxy(self.upstream.url, self.store)
+
+    def tearDown(self) -> None:
+        self.proxy.close()
+        self.upstream.close()
+        self.store.close()
+
+    def test_two_interleaved_threads_do_not_leak_reasoning(self) -> None:
+        """Tool-call IDs are reused across both threads. The proxy must
+        scope cache by conversation, not by tool_call_id, so each thread
+        sees only its own reasoning."""
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "lookup",
+                    "parameters": {"type": "object", "properties": {}},
+                },
+            }
+        ]
+
+        def first(thread: str) -> dict[str, Any]:
+            return {
+                "model": "deepseek-v4-pro",
+                "messages": [{"role": "user", "content": f"Start thread {thread}."}],
+                "tools": tools,
+            }
+
+        def second(thread: str, assistant: dict[str, Any]) -> dict[str, Any]:
+            return {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": f"Start thread {thread}."},
+                    _drop_reasoning(assistant),
+                    {
+                        "role": "tool",
+                        "tool_call_id": "call_reused",
+                        "content": f"tool result {thread}",
+                    },
+                ],
+                "tools": tools,
+            }
+
+        status, first_a = _post(f"{self.proxy.url}/v1/chat/completions", first("A"))
+        self.assertEqual(status, 200)
+        status, first_b = _post(f"{self.proxy.url}/v1/chat/completions", first("B"))
+        self.assertEqual(status, 200)
+        status, _ = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            second("B", first_b["choices"][0]["message"]),
+        )
+        self.assertEqual(status, 200)
+        status, _ = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            second("A", first_a["choices"][0]["message"]),
+        )
+        self.assertEqual(status, 200)
+
+        # If the cache leaked, the upstream's strict reasoning check would
+        # have rejected one of these turns with 400.
+        upstream_b = _PerThreadFakeDeepSeek.requests[2]["messages"]
+        upstream_a = _PerThreadFakeDeepSeek.requests[3]["messages"]
+        self.assertEqual(upstream_b[1]["reasoning_content"], "tool reasoning B")
+        self.assertEqual(upstream_a[1]["reasoning_content"], "tool reasoning A")
+
+
+# ---------------------------------------------------------------------------
+# Streaming-cache timing: tool reasoning must be available before [DONE].
+# ---------------------------------------------------------------------------
+
+
+class _SlowToolStreamHandler(BaseHTTPRequestHandler):
+    """Streams reasoning + tool_calls, then waits before sending [DONE].
+    Lets us verify the proxy stores reasoning at finish_reason=tool_calls,
+    not at [DONE]."""
+
+    def log_message(self, fmt: str, *args: Any) -> None:
+        return
+
+    def do_POST(self) -> None:
+        length = int(self.headers.get("Content-Length") or 0)
+        payload = json.loads(self.rfile.read(length).decode("utf-8"))
+
+        if payload.get("stream"):
+            self.send_response(200)
+            self.send_header("Content-Type", "text/event-stream")
+            self.end_headers()
+            chunks = [
+                {
+                    "id": "stream-tool",
+                    "object": "chat.completion.chunk",
+                    "created": 1,
+                    "model": "deepseek-v4-pro",
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": {
+                                "role": "assistant",
+                                "reasoning_content": "Streamed tool reasoning.",
+                                "tool_calls": [
+                                    {
+                                        "index": 0,
+                                        "id": "call_stream_tool",
+                                        "type": "function",
+                                        "function": {
+                                            "name": "lookup",
+                                            "arguments": "{}",
+                                        },
+                                    }
+                                ],
+                            },
+                            "finish_reason": None,
+                        }
+                    ],
+                },
+                {
+                    "id": "stream-tool",
+                    "object": "chat.completion.chunk",
+                    "created": 1,
+                    "model": "deepseek-v4-pro",
+                    "choices": [
+                        {"index": 0, "delta": {}, "finish_reason": "tool_calls"}
+                    ],
+                },
+            ]
+            for chunk in chunks:
+                self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode("utf-8"))
+                self.wfile.flush()
+                if chunk["choices"][0]["finish_reason"] is None:
+                    time.sleep(0.2)
+            time.sleep(1)  # delay [DONE] so the follow-up beats it.
+            self.wfile.write(b"data: [DONE]\n\n")
+            self.wfile.flush()
+            return
+
+        # Non-streaming follow-up.
+        messages = payload.get("messages") or []
+        if (
+            len(messages) >= 2
+            and messages[1].get("reasoning_content") == "Streamed tool reasoning."
+        ):
+            self._send(
+                200,
+                _completion(
+                    chat_id="follow",
+                    finish_reason="stop",
+                    content="follow-up accepted",
+                    reasoning="post-tool",
+                ),
+            )
+            return
+        self._send(400, {"error": {"message": "missing streamed reasoning"}})
+
+    def _send(self, status: int, body: dict[str, Any]) -> None:
+        encoded = json.dumps(body).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(encoded)))
+        self.end_headers()
+        self.wfile.write(encoded)
+
+
+class StreamingCacheTimingTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.upstream = _Fixture(
+            ThreadingHTTPServer(("127.0.0.1", 0), _SlowToolStreamHandler)
+        )
+        self.store = ReasoningStore(":memory:")
+        self.proxy = _start_proxy(self.upstream.url, self.store)
+
+    def tearDown(self) -> None:
+        self.proxy.close()
+        self.upstream.close()
+        self.store.close()
+
+    def test_tool_reasoning_is_cached_before_done(self) -> None:
+        """A follow-up POST issued before [DONE] must still find the
+        streamed reasoning in cache."""
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "lookup",
+                    "parameters": {"type": "object", "properties": {}},
+                },
+            }
+        ]
+        request = Request(
+            f"{self.proxy.url}/v1/chat/completions",
+            data=json.dumps(
+                {
+                    "model": "deepseek-v4-pro",
+                    "stream": True,
+                    "messages": [{"role": "user", "content": "stream tool"}],
+                    "tools": tools,
+                }
+            ).encode("utf-8"),
+            method="POST",
+            headers={
+                "Authorization": "Bearer sk-test",
+                "Content-Type": "application/json",
+            },
+        )
+        with urlopen(request, timeout=3) as response:
+            # Read until we see the tool_calls chunk; the upstream then
+            # delays [DONE] for ~1s, giving us a window to send a follow-up.
+            while True:
+                line = response.readline().decode("utf-8")
+                self.assertNotEqual(line, "")
+                if '"finish_reason":"tool_calls"' in line:
+                    break
+
+            status, payload = _post(
+                f"{self.proxy.url}/v1/chat/completions",
+                {
+                    "model": "deepseek-v4-pro",
+                    "messages": [
+                        {"role": "user", "content": "stream tool"},
+                        {
+                            "role": "assistant",
+                            "content": "",
+                            "tool_calls": [
+                                {
+                                    "id": "call_stream_tool",
+                                    "type": "function",
+                                    "function": {"name": "lookup", "arguments": "{}"},
+                                }
+                            ],
+                        },
+                        {
+                            "role": "tool",
+                            "tool_call_id": "call_stream_tool",
+                            "content": "tool result",
+                        },
+                    ],
+                    "tools": tools,
+                },
+            )
+            response.read()
+        self.assertEqual(status, 200, payload)
+        self.assertEqual(
+            payload["choices"][0]["message"]["content"].split("</details>")[-1],
+            "\n\nfollow-up accepted",
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_proxy_end_to_end.py b/tests/test_proxy_end_to_end.py
deleted file mode 100644
index 18d5cf8..0000000
--- a/tests/test_proxy_end_to_end.py
+++ /dev/null
@@ -1,1414 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import replace
-from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
-import json
-from pathlib import Path
-from tempfile import TemporaryDirectory
-import threading
-import time
-import unittest
-from urllib.error import HTTPError
-from urllib.request import Request, urlopen
-
-from deepseek_cursor_proxy.config import ProxyConfig
-from deepseek_cursor_proxy.reasoning_store import (
-    ReasoningStore,
-    conversation_scope,
-    message_signature,
-)
-from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
-from deepseek_cursor_proxy.trace import TraceWriter
-from deepseek_cursor_proxy.transform import (
-    RECOVERY_NOTICE_CONTENT,
-    reasoning_cache_namespace,
-)
-
-
-TOOL_REASONING = "I need the current date before answering."
-FINAL_REASONING = "The tool result gives the date, so I can answer."
-FINAL_CONTENT = "Final answer after using the tool."
-
-
-def post_json(
-    url: str, payload: dict, api_key: str = "sk-cursor-test"
-) -> tuple[int, dict]:
-    body = json.dumps(payload).encode("utf-8")
-    request = Request(
-        url,
-        data=body,
-        method="POST",
-        headers={
-            "Authorization": f"Bearer {api_key}",
-            "Content-Type": "application/json",
-        },
-    )
-    try:
-        response = urlopen(request, timeout=5)
-        with response:
-            return response.status, json.loads(response.read().decode("utf-8"))
-    except HTTPError as exc:
-        return exc.code, json.loads(exc.read().decode("utf-8"))
-
-
-class FakeDeepSeekHandler(BaseHTTPRequestHandler):
-    requests: list[dict] = []
-    auth_headers: list[str] = []
-
-    def log_message(self, fmt: str, *args: object) -> None:
-        return
-
-    def do_POST(self) -> None:
-        length = int(self.headers.get("Content-Length") or 0)
-        payload = json.loads(self.rfile.read(length).decode("utf-8"))
-        self.__class__.requests.append(payload)
-        self.__class__.auth_headers.append(self.headers.get("Authorization", ""))
-
-        for index, message in enumerate(payload.get("messages", [])):
-            if not isinstance(message, dict) or message.get("role") != "assistant":
-                continue
-            requires_reasoning = (
-                bool(message.get("tool_calls"))
-                or message.get("content") == FINAL_CONTENT
-            )
-            if requires_reasoning and not message.get("reasoning_content"):
-                self._send_json(
-                    400,
-                    {
-                        "error": {
-                            "message": "The reasoning_content in the thinking mode must be passed back to the API.",
-                            "type": "invalid_request_error",
-                            "param": None,
-                            "code": "invalid_request_error",
-                            "missing_index": index,
-                        }
-                    },
-                )
-                return
-
-        call_number = len(self.__class__.requests)
-        if call_number == 1:
-            self._send_json(200, tool_call_response())
-        elif call_number == 2:
-            self._send_json(200, final_response())
-        else:
-            self._send_json(200, plain_response("Follow-up accepted."))
-
-    def _send_json(self, status: int, payload: dict) -> None:
-        body = json.dumps(payload).encode("utf-8")
-        self.send_response(status)
-        self.send_header("Content-Type", "application/json")
-        self.send_header("Content-Length", str(len(body)))
-        self.end_headers()
-        self.wfile.write(body)
-
-
-class InterleavedFakeDeepSeekHandler(BaseHTTPRequestHandler):
-    requests: list[dict] = []
-
-    def log_message(self, fmt: str, *args: object) -> None:
-        return
-
-    def do_POST(self) -> None:
-        length = int(self.headers.get("Content-Length") or 0)
-        payload = json.loads(self.rfile.read(length).decode("utf-8"))
-        self.__class__.requests.append(payload)
-
-        messages = payload.get("messages", [])
-        thread_name = thread_name_from_messages(messages)
-        if thread_name not in {"A", "B"}:
-            self._send_json(400, {"error": {"message": "unknown test thread"}})
-            return
-
-        expected_tool_reasoning = f"tool reasoning for thread {thread_name}"
-        expected_final_reasoning = f"final reasoning for thread {thread_name}"
-        final_content = f"final answer for thread {thread_name}"
-
-        for index, message in enumerate(messages):
-            if not isinstance(message, dict) or message.get("role") != "assistant":
-                continue
-            if (
-                message.get("tool_calls")
-                and message.get("reasoning_content") != expected_tool_reasoning
-            ):
-                self._send_missing_reasoning(index)
-                return
-            if (
-                message.get("content") == final_content
-                and message.get("reasoning_content") != expected_final_reasoning
-            ):
-                self._send_missing_reasoning(index)
-                return
-
-        if len(messages) == 1:
-            self._send_json(200, interleaved_tool_call_response(thread_name))
-        elif len(messages) == 3:
-            self._send_json(200, interleaved_final_response(thread_name))
-        else:
-            self._send_json(
-                200, plain_response(f"follow-up accepted for thread {thread_name}")
-            )
-
-    def _send_missing_reasoning(self, index: int) -> None:
-        self._send_json(
-            400,
-            {
-                "error": {
-                    "message": "The reasoning_content in the thinking mode must be passed back to the API.",
-                    "type": "invalid_request_error",
-                    "param": None,
-                    "code": "invalid_request_error",
-                    "missing_index": index,
-                }
-            },
-        )
-
-    def _send_json(self, status: int, payload: dict) -> None:
-        body = json.dumps(payload).encode("utf-8")
-        self.send_response(status)
-        self.send_header("Content-Type", "application/json")
-        self.send_header("Content-Length", str(len(body)))
-        self.end_headers()
-        self.wfile.write(body)
-
-
-class SlowAfterDoneStreamingDeepSeekHandler(BaseHTTPRequestHandler):
-    def log_message(self, fmt: str, *args: object) -> None:
-        return
-
-    def do_POST(self) -> None:
-        self.send_response(200)
-        self.send_header("Content-Type", "text/event-stream")
-        self.end_headers()
-        chunk = {
-            "id": "chatcmpl-stream",
-            "object": "chat.completion.chunk",
-            "model": "deepseek-v4-pro",
-            "choices": [
-                {
-                    "index": 0,
-                    "delta": {"role": "assistant", "content": "streamed"},
-                    "finish_reason": None,
-                }
-            ],
-        }
-        self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode("utf-8"))
-        self.wfile.write(b"data: [DONE]\n\n")
-        self.wfile.flush()
-        time.sleep(2)
-
-
-class ReasoningStreamingDeepSeekHandler(BaseHTTPRequestHandler):
-    def log_message(self, fmt: str, *args: object) -> None:
-        return
-
-    def do_POST(self) -> None:
-        self.send_response(200)
-        self.send_header("Content-Type", "text/event-stream")
-        self.end_headers()
-        chunks = [
-            {
-                "id": "chatcmpl-stream",
-                "object": "chat.completion.chunk",
-                "created": 1,
-                "model": "deepseek-v4-pro",
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {"role": "assistant", "reasoning_content": "Need "},
-                        "finish_reason": None,
-                    }
-                ],
-            },
-            {
-                "id": "chatcmpl-stream",
-                "object": "chat.completion.chunk",
-                "created": 1,
-                "model": "deepseek-v4-pro",
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {"reasoning_content": "context."},
-                        "finish_reason": None,
-                    }
-                ],
-            },
-            {
-                "id": "chatcmpl-stream",
-                "object": "chat.completion.chunk",
-                "created": 1,
-                "model": "deepseek-v4-pro",
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {"content": FINAL_CONTENT},
-                        "finish_reason": None,
-                    }
-                ],
-            },
-            {
-                "id": "chatcmpl-stream",
-                "object": "chat.completion.chunk",
-                "created": 1,
-                "model": "deepseek-v4-pro",
-                "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
-                "usage": {
-                    "prompt_tokens": 10,
-                    "completion_tokens": 5,
-                    "total_tokens": 15,
-                    "completion_tokens_details": {"reasoning_tokens": 3},
-                },
-            },
-        ]
-        for chunk in chunks:
-            self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode("utf-8"))
-        self.wfile.write(b"data: [DONE]\n\n")
-        self.wfile.flush()
-
-
-class ToolCallStreamingBeforeDoneDeepSeekHandler(BaseHTTPRequestHandler):
-    requests: list[dict] = []
-
-    def log_message(self, fmt: str, *args: object) -> None:
-        return
-
-    def do_POST(self) -> None:
-        length = int(self.headers.get("Content-Length") or 0)
-        payload = json.loads(self.rfile.read(length).decode("utf-8"))
-        self.__class__.requests.append(payload)
-
-        if payload.get("stream"):
-            self.send_response(200)
-            self.send_header("Content-Type", "text/event-stream")
-            self.end_headers()
-            chunks = [
-                {
-                    "id": "chatcmpl-stream-tool",
-                    "object": "chat.completion.chunk",
-                    "created": 1,
-                    "model": "deepseek-v4-pro",
-                    "choices": [
-                        {
-                            "index": 0,
-                            "delta": {
-                                "role": "assistant",
-                                "reasoning_content": "Streamed tool reasoning.",
-                                "tool_calls": [
-                                    {
-                                        "index": 0,
-                                        "id": "call_stream_tool",
-                                        "type": "function",
-                                        "function": {
-                                            "name": "lookup",
-                                            "arguments": "{}",
-                                        },
-                                    }
-                                ],
-                            },
-                            "finish_reason": None,
-                        }
-                    ],
-                },
-                {
-                    "id": "chatcmpl-stream-tool",
-                    "object": "chat.completion.chunk",
-                    "created": 1,
-                    "model": "deepseek-v4-pro",
-                    "choices": [
-                        {"index": 0, "delta": {}, "finish_reason": "tool_calls"}
-                    ],
-                },
-            ]
-            for chunk in chunks:
-                self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode("utf-8"))
-                self.wfile.flush()
-                if chunk["choices"][0]["finish_reason"] is None:
-                    time.sleep(0.2)
-            time.sleep(1)
-            self.wfile.write(b"data: [DONE]\n\n")
-            self.wfile.flush()
-            return
-
-        messages = payload.get("messages", [])
-        if (
-            len(messages) >= 2
-            and messages[1].get("reasoning_content") == "Streamed tool reasoning."
-        ):
-            self._send_json(200, plain_response("stream follow-up accepted"))
-            return
-        self._send_json(400, {"error": {"message": "missing streamed reasoning"}})
-
-    def _send_json(self, status: int, payload: dict) -> None:
-        body = json.dumps(payload).encode("utf-8")
-        self.send_response(status)
-        self.send_header("Content-Type", "application/json")
-        self.send_header("Content-Length", str(len(body)))
-        self.end_headers()
-        self.wfile.write(body)
-
-
-def tool_call_response() -> dict:
-    return {
-        "id": "chatcmpl-tool",
-        "object": "chat.completion",
-        "created": 1,
-        "model": "deepseek-v4-pro",
-        "choices": [
-            {
-                "index": 0,
-                "finish_reason": "tool_calls",
-                "message": {
-                    "role": "assistant",
-                    "content": "",
-                    "reasoning_content": TOOL_REASONING,
-                    "tool_calls": [
-                        {
-                            "id": "call_date",
-                            "type": "function",
-                            "function": {"name": "get_date", "arguments": "{}"},
-                        }
-                    ],
-                },
-            }
-        ],
-        "usage": {
-            "prompt_tokens": 20,
-            "completion_tokens": 5,
-            "total_tokens": 25,
-            "prompt_cache_hit_tokens": 12,
-            "prompt_cache_miss_tokens": 8,
-            "completion_tokens_details": {"reasoning_tokens": 3},
-        },
-    }
-
-
-def interleaved_tool_call_response(thread_name: str) -> dict:
-    return {
-        "id": f"chatcmpl-tool-{thread_name}",
-        "object": "chat.completion",
-        "created": 1,
-        "model": "deepseek-v4-pro",
-        "choices": [
-            {
-                "index": 0,
-                "finish_reason": "tool_calls",
-                "message": {
-                    "role": "assistant",
-                    "content": "",
-                    "reasoning_content": f"tool reasoning for thread {thread_name}",
-                    "tool_calls": [
-                        {
-                            "id": "call_reused",
-                            "type": "function",
-                            "function": {"name": "lookup", "arguments": "{}"},
-                        }
-                    ],
-                },
-            }
-        ],
-    }
-
-
-def interleaved_final_response(thread_name: str) -> dict:
-    return {
-        "id": f"chatcmpl-final-{thread_name}",
-        "object": "chat.completion",
-        "created": 2,
-        "model": "deepseek-v4-pro",
-        "choices": [
-            {
-                "index": 0,
-                "finish_reason": "stop",
-                "message": {
-                    "role": "assistant",
-                    "content": f"final answer for thread {thread_name}",
-                    "reasoning_content": f"final reasoning for thread {thread_name}",
-                },
-            }
-        ],
-    }
-
-
-def final_response() -> dict:
-    return {
-        "id": "chatcmpl-final",
-        "object": "chat.completion",
-        "created": 2,
-        "model": "deepseek-v4-pro",
-        "choices": [
-            {
-                "index": 0,
-                "finish_reason": "stop",
-                "message": {
-                    "role": "assistant",
-                    "content": FINAL_CONTENT,
-                    "reasoning_content": FINAL_REASONING,
-                },
-            }
-        ],
-    }
-
-
-def plain_response(content: str) -> dict:
-    return {
-        "id": "chatcmpl-plain",
-        "object": "chat.completion",
-        "created": 3,
-        "model": "deepseek-v4-pro",
-        "choices": [
-            {
-                "index": 0,
-                "finish_reason": "stop",
-                "message": {"role": "assistant", "content": content},
-            }
-        ],
-    }
-
-
-class ServerFixture:
-    def __init__(self, server: ThreadingHTTPServer) -> None:
-        self.server = server
-        self.thread = threading.Thread(target=server.serve_forever, daemon=True)
-
-    @property
-    def url(self) -> str:
-        host, port = self.server.server_address
-        return f"http://{host}:{port}"
-
-    def start(self) -> "ServerFixture":
-        self.thread.start()
-        return self
-
-    def close(self) -> None:
-        self.server.shutdown()
-        self.server.server_close()
-        self.thread.join(timeout=5)
-
-
-def read_single_trace(session_dir: Path) -> dict:
-    deadline = time.monotonic() + 2
-    trace_files = sorted(session_dir.glob("request-*.json"))
-    while not trace_files and time.monotonic() < deadline:
-        time.sleep(0.01)
-        trace_files = sorted(session_dir.glob("request-*.json"))
-    if len(trace_files) != 1:
-        raise AssertionError(f"expected one trace file, found {trace_files}")
-    return json.loads(trace_files[0].read_text(encoding="utf-8"))
-
-
-class ProxyEndToEndTests(unittest.TestCase):
-    def setUp(self) -> None:
-        FakeDeepSeekHandler.requests = []
-        FakeDeepSeekHandler.auth_headers = []
-        self.upstream = ServerFixture(
-            ThreadingHTTPServer(("127.0.0.1", 0), FakeDeepSeekHandler)
-        ).start()
-        self.store = ReasoningStore(":memory:")
-        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
-        proxy.config = ProxyConfig(
-            upstream_base_url=self.upstream.url,
-            upstream_model="deepseek-v4-pro",
-        )
-        proxy.reasoning_store = self.store
-        self.proxy = ServerFixture(proxy).start()
-
-    def tearDown(self) -> None:
-        self.proxy.close()
-        self.upstream.close()
-        self.store.close()
-
-    def test_missing_reasoning_reproduces_upstream_error_without_proxy_repair(
-        self,
-    ) -> None:
-        status, payload = post_json(
-            f"{self.upstream.url}/chat/completions",
-            second_cursor_request(include_reasoning=False),
-        )
-
-        self.assertEqual(status, 400)
-        self.assertIn("reasoning_content", payload["error"]["message"])
-
-    def test_proxy_repairs_cursor_style_multi_round_tool_call_history(self) -> None:
-        status, first = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            first_cursor_request(),
-        )
-        self.assertEqual(status, 200)
-        tool_call_message = first["choices"][0]["message"]
-        self.assertEqual(tool_call_message["reasoning_content"], TOOL_REASONING)
-
-        status, second = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            second_cursor_request(include_reasoning=False),
-        )
-        self.assertEqual(status, 200)
-        self.assertEqual(second["choices"][0]["message"]["content"], FINAL_CONTENT)
-
-        status, third = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            third_cursor_request_missing_all_reasoning(),
-        )
-        self.assertEqual(status, 200)
-        self.assertEqual(
-            third["choices"][0]["message"]["content"], "Follow-up accepted."
-        )
-
-        second_upstream_messages = FakeDeepSeekHandler.requests[1]["messages"]
-        self.assertEqual(
-            second_upstream_messages[1]["reasoning_content"], TOOL_REASONING
-        )
-
-        third_upstream_messages = FakeDeepSeekHandler.requests[2]["messages"]
-        self.assertEqual(
-            third_upstream_messages[1]["reasoning_content"], TOOL_REASONING
-        )
-        self.assertEqual(
-            third_upstream_messages[3]["reasoning_content"], FINAL_REASONING
-        )
-
-    def test_proxy_forwards_cursor_bearer_token_to_deepseek(self) -> None:
-        status, _ = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            first_cursor_request(),
-            api_key="sk-from-cursor",
-        )
-
-        self.assertEqual(status, 200)
-        self.assertEqual(FakeDeepSeekHandler.auth_headers[0], "Bearer sk-from-cursor")
-
-    def test_normal_mode_logs_safe_request_progress_without_bodies(self) -> None:
-        with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
-            status, _ = post_json(
-                f"{self.proxy.url}/v1/chat/completions",
-                first_cursor_request(),
-                api_key="sk-from-cursor",
-            )
-
-        output = "\n".join(captured.output)
-        stage_records = [
-            record
-            for record in captured.output
-            if any(
-                marker in record
-                for marker in ("┌ cursor", "├ context", "├ send", "└ stats")
-            )
-        ]
-        self.assertEqual(status, 200)
-        self.assertEqual(len(stage_records), 4)
-        self.assertTrue(all("\n" not in record for record in stage_records))
-        self.assertIn(
-            "┌ cursor  model=deepseek-v4-pro messages=1 tools=1",
-            output,
-        )
-        self.assertIn(
-            "├ context filled=0 missing=0 recovered=0 dropped=0 status=ok",
-            output,
-        )
-        self.assertIn(
-            "├ send    user_msgs=1 messages=1 tools=1 reasoning_content=0",
-            output,
-        )
-        self.assertIn(
-            "└ stats   prompt=20 output=5 reasoning=3 cache_hit=60.0%",
-            output,
-        )
-        self.assertNotIn("What is tomorrow's date?", output)
-        self.assertNotIn("sk-from-cursor", output)
-
-    def test_verbose_mode_logs_metadata_and_bodies_without_api_key(self) -> None:
-        self.proxy.server.config = replace(self.proxy.server.config, verbose=True)
-
-        with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
-            status, _ = post_json(
-                f"{self.proxy.url}/v1/chat/completions",
-                first_cursor_request(),
-                api_key="sk-from-cursor",
-            )
-
-        output = "\n".join(captured.output)
-        self.assertEqual(status, 200)
-        self.assertIn("incoming POST /v1/chat/completions", output)
-        self.assertIn("upstream request metadata", output)
-        self.assertIn("cursor request body", output)
-        self.assertIn("upstream request body", output)
-        self.assertIn("What is tomorrow's date?", output)
-        self.assertNotIn("sk-from-cursor", output)
-
-    def test_trace_captures_full_non_streaming_replay_without_api_key(self) -> None:
-        with TemporaryDirectory() as temp_dir:
-            writer = TraceWriter(temp_dir)
-            self.proxy.server.trace_writer = writer
-
-            status, payload = post_json(
-                f"{self.proxy.url}/v1/chat/completions",
-                first_cursor_request(),
-                api_key="sk-from-cursor",
-            )
-
-            trace = read_single_trace(writer.session_dir)
-            serialized = json.dumps(trace)
-
-        self.assertEqual(status, 200)
-        self.assertEqual(
-            payload["choices"][0]["message"]["tool_calls"][0]["id"], "call_date"
-        )
-        self.assertEqual(trace["completion"]["status"], "completed")
-        self.assertEqual(
-            trace["request"]["body"]["messages"][0]["content"],
-            "What is tomorrow's date?",
-        )
-        self.assertEqual(
-            trace["transform"]["upstream_request_body"]["model"],
-            "deepseek-v4-pro",
-        )
-        self.assertEqual(
-            trace["upstream"]["response"]["body"]["json"]["choices"][0]["message"][
-                "reasoning_content"
-            ],
-            TOOL_REASONING,
-        )
-        self.assertEqual(
-            trace["cursor_response"]["body"]["json"]["choices"][0]["message"][
-                "reasoning_content"
-            ],
-            TOOL_REASONING,
-        )
-        self.assertNotIn("sk-from-cursor", serialized)
-
-    def test_proxy_rejects_missing_cursor_bearer_token(self) -> None:
-        request = Request(
-            f"{self.proxy.url}/v1/chat/completions",
-            data=json.dumps(first_cursor_request()).encode("utf-8"),
-            method="POST",
-            headers={"Content-Type": "application/json"},
-        )
-
-        with self.assertRaises(HTTPError) as caught:
-            urlopen(request, timeout=5)
-
-        self.assertEqual(caught.exception.code, 401)
-        self.assertEqual(FakeDeepSeekHandler.requests, [])
-
-    def test_proxy_rejects_oversized_request_body(self) -> None:
-        self.proxy.server.config = replace(
-            self.proxy.server.config, max_request_body_bytes=10
-        )
-
-        status, payload = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            first_cursor_request(),
-        )
-
-        self.assertEqual(status, 413)
-        self.assertIn("too large", payload["error"]["message"])
-        self.assertEqual(FakeDeepSeekHandler.requests, [])
-
-    def test_proxy_rejects_uncached_cursor_tool_history_in_strict_mode(
-        self,
-    ) -> None:
-        self.proxy.server.config = replace(
-            self.proxy.server.config,
-            missing_reasoning_strategy="reject",
-        )
-
-        status, payload = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            second_cursor_request(include_reasoning=False),
-        )
-
-        self.assertEqual(status, 409)
-        self.assertEqual(payload["error"]["missing_reasoning_messages"], 1)
-        self.assertIn("1 assistant message", payload["error"]["message"])
-        self.assertIn("strict missing-reasoning mode", payload["error"]["message"])
-        self.assertIn(
-            "--missing-reasoning-strategy recover", payload["error"]["message"]
-        )
-        self.assertEqual(FakeDeepSeekHandler.requests, [])
-
-    def test_proxy_recovers_uncached_cursor_tool_history(self) -> None:
-        with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
-            status, payload = post_json(
-                f"{self.proxy.url}/v1/chat/completions",
-                third_cursor_request_missing_all_reasoning(),
-            )
-
-        self.assertEqual(status, 200)
-        self.assertTrue(
-            payload["choices"][0]["message"]["content"].startswith(
-                RECOVERY_NOTICE_CONTENT
-            )
-        )
-        self.assertEqual(
-            [
-                message["role"]
-                for message in FakeDeepSeekHandler.requests[0]["messages"]
-            ],
-            ["system", "user"],
-        )
-        self.assertIn(
-            "recovered this request",
-            FakeDeepSeekHandler.requests[0]["messages"][0]["content"],
-        )
-        self.assertEqual(
-            FakeDeepSeekHandler.requests[0]["messages"][1],
-            {"role": "user", "content": "Thanks, now continue."},
-        )
-        self.assertIn(
-            "status=recovered",
-            "\n".join(captured.output),
-        )
-        self.assertFalse(
-            any(record.startswith("WARNING:") for record in captured.output)
-        )
-
-    def test_trace_captures_recovery_diagnostics(self) -> None:
-        with TemporaryDirectory() as temp_dir:
-            writer = TraceWriter(temp_dir)
-            self.proxy.server.trace_writer = writer
-
-            status, _ = post_json(
-                f"{self.proxy.url}/v1/chat/completions",
-                third_cursor_request_missing_all_reasoning(),
-            )
-
-            trace = read_single_trace(writer.session_dir)
-
-        self.assertEqual(status, 200)
-        self.assertEqual(trace["transform"]["recovered_reasoning_messages"], 2)
-        self.assertEqual(
-            trace["transform"]["recovery_steps"][0]["strategy"],
-            "latest_user",
-        )
-        missing_diagnostics = [
-            item
-            for item in trace["transform"]["reasoning_diagnostics"]
-            if item["missing"]
-        ]
-        self.assertGreaterEqual(len(missing_diagnostics), 2)
-        self.assertIn("lookup_keys", missing_diagnostics[0])
-
-    def test_proxy_keeps_deepseek_context_after_recovery_boundary(self) -> None:
-        status, first = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            third_cursor_request_missing_all_reasoning(),
-        )
-        self.assertEqual(status, 200)
-
-        recovered_assistant = dict(first["choices"][0]["message"])
-        recovered_assistant.pop("reasoning_content", None)
-        payload = third_cursor_request_missing_all_reasoning()
-        payload["messages"].append(recovered_assistant)
-        payload["messages"].append(
-            {
-                "role": "tool",
-                "tool_call_id": "call_date",
-                "content": "2026-04-24",
-            }
-        )
-
-        status, second = post_json(f"{self.proxy.url}/v1/chat/completions", payload)
-
-        self.assertEqual(status, 200)
-        self.assertFalse(
-            second["choices"][0]["message"]["content"].startswith(
-                RECOVERY_NOTICE_CONTENT
-            )
-        )
-        second_upstream_messages = FakeDeepSeekHandler.requests[1]["messages"]
-        self.assertEqual(
-            [message["role"] for message in second_upstream_messages],
-            ["system", "user", "assistant", "tool"],
-        )
-        self.assertEqual(
-            second_upstream_messages[2]["reasoning_content"], TOOL_REASONING
-        )
-
-
-class InterleavedConversationTests(unittest.TestCase):
-    def setUp(self) -> None:
-        InterleavedFakeDeepSeekHandler.requests = []
-        self.upstream = ServerFixture(
-            ThreadingHTTPServer(("127.0.0.1", 0), InterleavedFakeDeepSeekHandler)
-        ).start()
-        self.store = ReasoningStore(":memory:")
-        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
-        proxy.config = ProxyConfig(
-            upstream_base_url=self.upstream.url,
-            upstream_model="deepseek-v4-pro",
-        )
-        proxy.reasoning_store = self.store
-        self.proxy = ServerFixture(proxy).start()
-
-    def tearDown(self) -> None:
-        self.proxy.close()
-        self.upstream.close()
-        self.store.close()
-
-    def test_one_proxy_repairs_two_interleaved_cursor_threads(self) -> None:
-        status, first_a = post_json(
-            f"{self.proxy.url}/v1/chat/completions", interleaved_first_request("A")
-        )
-        self.assertEqual(status, 200)
-
-        status, first_b = post_json(
-            f"{self.proxy.url}/v1/chat/completions", interleaved_first_request("B")
-        )
-        self.assertEqual(status, 200)
-
-        status, final_b = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            interleaved_second_request("B", first_b["choices"][0]["message"]),
-        )
-        self.assertEqual(status, 200)
-
-        status, final_a = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            interleaved_second_request("A", first_a["choices"][0]["message"]),
-        )
-        self.assertEqual(status, 200)
-
-        status, followup_a = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            interleaved_third_request(
-                "A", first_a["choices"][0]["message"], final_a["choices"][0]["message"]
-            ),
-        )
-        self.assertEqual(status, 200)
-        self.assertEqual(
-            followup_a["choices"][0]["message"]["content"],
-            "follow-up accepted for thread A",
-        )
-
-        status, followup_b = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            interleaved_third_request(
-                "B", first_b["choices"][0]["message"], final_b["choices"][0]["message"]
-            ),
-        )
-        self.assertEqual(status, 200)
-        self.assertEqual(
-            followup_b["choices"][0]["message"]["content"],
-            "follow-up accepted for thread B",
-        )
-
-        final_b_upstream_messages = InterleavedFakeDeepSeekHandler.requests[2][
-            "messages"
-        ]
-        final_a_upstream_messages = InterleavedFakeDeepSeekHandler.requests[3][
-            "messages"
-        ]
-        followup_a_upstream_messages = InterleavedFakeDeepSeekHandler.requests[4][
-            "messages"
-        ]
-        followup_b_upstream_messages = InterleavedFakeDeepSeekHandler.requests[5][
-            "messages"
-        ]
-
-        self.assertEqual(
-            final_b_upstream_messages[1]["reasoning_content"],
-            "tool reasoning for thread B",
-        )
-        self.assertEqual(
-            final_a_upstream_messages[1]["reasoning_content"],
-            "tool reasoning for thread A",
-        )
-        self.assertEqual(
-            followup_a_upstream_messages[1]["reasoning_content"],
-            "tool reasoning for thread A",
-        )
-        self.assertEqual(
-            followup_a_upstream_messages[3]["reasoning_content"],
-            "final reasoning for thread A",
-        )
-        self.assertEqual(
-            followup_b_upstream_messages[1]["reasoning_content"],
-            "tool reasoning for thread B",
-        )
-        self.assertEqual(
-            followup_b_upstream_messages[3]["reasoning_content"],
-            "final reasoning for thread B",
-        )
-
-
-class StreamingProxyTests(unittest.TestCase):
-    def setUp(self) -> None:
-        self.upstream = ServerFixture(
-            ThreadingHTTPServer(("127.0.0.1", 0), SlowAfterDoneStreamingDeepSeekHandler)
-        ).start()
-        self.store = ReasoningStore(":memory:")
-        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
-        proxy.config = ProxyConfig(
-            upstream_base_url=self.upstream.url,
-            upstream_model="deepseek-v4-pro",
-        )
-        proxy.reasoning_store = self.store
-        self.proxy = ServerFixture(proxy).start()
-
-    def tearDown(self) -> None:
-        self.proxy.close()
-        self.upstream.close()
-        self.store.close()
-
-    def test_streaming_proxy_closes_after_done_even_if_upstream_stays_open(
-        self,
-    ) -> None:
-        request = Request(
-            f"{self.proxy.url}/v1/chat/completions",
-            data=json.dumps(
-                {
-                    "model": "deepseek-v4-pro",
-                    "stream": True,
-                    "messages": [{"role": "user", "content": "stream"}],
-                }
-            ).encode("utf-8"),
-            method="POST",
-            headers={
-                "Authorization": "Bearer sk-cursor-test",
-                "Content-Type": "application/json",
-            },
-        )
-
-        started = time.monotonic()
-        with urlopen(request, timeout=1) as response:
-            body = response.read().decode("utf-8")
-        elapsed = time.monotonic() - started
-
-        self.assertLess(elapsed, 1)
-        self.assertIn("data: [DONE]", body)
-
-
-class ReasoningStreamingProxyTests(unittest.TestCase):
-    def setUp(self) -> None:
-        self.upstream = ServerFixture(
-            ThreadingHTTPServer(("127.0.0.1", 0), ReasoningStreamingDeepSeekHandler)
-        ).start()
-        self.store = ReasoningStore(":memory:")
-        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
-        proxy.config = ProxyConfig(
-            upstream_base_url=self.upstream.url,
-            upstream_model="deepseek-v4-pro",
-        )
-        proxy.reasoning_store = self.store
-        self.proxy = ServerFixture(proxy).start()
-
-    def tearDown(self) -> None:
-        self.proxy.close()
-        self.upstream.close()
-        self.store.close()
-
-    def test_streaming_proxy_mirrors_reasoning_for_cursor_display(
-        self,
-    ) -> None:
-        request_messages = [{"role": "user", "content": "stream reasoning"}]
-        request = Request(
-            f"{self.proxy.url}/v1/chat/completions",
-            data=json.dumps(
-                {
-                    "model": "deepseek-v4-pro",
-                    "stream": True,
-                    "messages": request_messages,
-                }
-            ).encode("utf-8"),
-            method="POST",
-            headers={
-                "Authorization": "Bearer sk-cursor-test",
-                "Content-Type": "application/json",
-            },
-        )
-
-        with urlopen(request, timeout=2) as response:
-            body = response.read().decode("utf-8")
-
-        chunks = [
-            json.loads(line.removeprefix("data: "))
-            for line in body.splitlines()
-            if line.startswith("data: {")
-        ]
-        self.assertEqual(
-            chunks[0]["choices"][0]["delta"]["content"],
-            "<details>\n<summary>Thinking</summary>\n\nNeed ",
-        )
-        self.assertEqual(chunks[0]["choices"][0]["delta"]["reasoning_content"], "Need ")
-        self.assertEqual(chunks[1]["choices"][0]["delta"]["content"], "context.")
-        self.assertEqual(
-            chunks[2]["choices"][0]["delta"]["content"],
-            "\n</details>\n\n" + FINAL_CONTENT,
-        )
-
-        stored_message = {
-            "role": "assistant",
-            "content": FINAL_CONTENT,
-            "reasoning_content": "Need context.",
-        }
-        cache_namespace = reasoning_cache_namespace(
-            self.proxy.server.config,
-            "deepseek-v4-pro",
-            {"type": "enabled"},
-            "high",
-            "Bearer sk-cursor-test",
-        )
-        self.assertEqual(
-            self.store.get(
-                "scope:"
-                + conversation_scope(request_messages, cache_namespace)
-                + ":signature:"
-                + message_signature(stored_message)
-            ),
-            "Need context.",
-        )
-
-    def test_trace_captures_streaming_replay_chunks(self) -> None:
-        with TemporaryDirectory() as temp_dir:
-            writer = TraceWriter(temp_dir)
-            self.proxy.server.trace_writer = writer
-            request = Request(
-                f"{self.proxy.url}/v1/chat/completions",
-                data=json.dumps(
-                    {
-                        "model": "deepseek-v4-pro",
-                        "stream": True,
-                        "messages": [{"role": "user", "content": "stream reasoning"}],
-                    }
-                ).encode("utf-8"),
-                method="POST",
-                headers={
-                    "Authorization": "Bearer sk-cursor-test",
-                    "Content-Type": "application/json",
-                },
-            )
-
-            with urlopen(request, timeout=2) as response:
-                response.read()
-
-            trace = read_single_trace(writer.session_dir)
-
-        self.assertEqual(trace["completion"]["status"], "completed")
-        self.assertIn(
-            "reasoning_content",
-            trace["upstream"]["stream"]["chunks"][0]["line"],
-        )
-        self.assertIn(
-            "<details>",
-            trace["cursor_response"]["stream"]["chunks"][0]["line"],
-        )
-        self.assertEqual(
-            trace["upstream"]["usage"]["completion_tokens_details"]["reasoning_tokens"],
-            3,
-        )
-
-    def test_streaming_recovery_notice_is_visible_in_cursor_content(self) -> None:
-        payload = third_cursor_request_missing_all_reasoning()
-        payload["stream"] = True
-        request = Request(
-            f"{self.proxy.url}/v1/chat/completions",
-            data=json.dumps(payload).encode("utf-8"),
-            method="POST",
-            headers={
-                "Authorization": "Bearer sk-cursor-test",
-                "Content-Type": "application/json",
-            },
-        )
-
-        with urlopen(request, timeout=2) as response:
-            body = response.read().decode("utf-8")
-
-        chunks = [
-            json.loads(line.removeprefix("data: "))
-            for line in body.splitlines()
-            if line.startswith("data: {")
-        ]
-        self.assertEqual(
-            chunks[2]["choices"][0]["delta"]["content"],
-            "\n</details>\n\n" + RECOVERY_NOTICE_CONTENT + FINAL_CONTENT,
-        )
-
-
-class StreamingToolRaceProxyTests(unittest.TestCase):
-    def setUp(self) -> None:
-        ToolCallStreamingBeforeDoneDeepSeekHandler.requests = []
-        self.upstream = ServerFixture(
-            ThreadingHTTPServer(
-                ("127.0.0.1", 0), ToolCallStreamingBeforeDoneDeepSeekHandler
-            )
-        ).start()
-        self.store = ReasoningStore(":memory:")
-        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
-        proxy.config = ProxyConfig(
-            upstream_base_url=self.upstream.url,
-            upstream_model="deepseek-v4-pro",
-        )
-        proxy.reasoning_store = self.store
-        self.proxy = ServerFixture(proxy).start()
-
-    def tearDown(self) -> None:
-        self.proxy.close()
-        self.upstream.close()
-        self.store.close()
-
-    def test_streaming_tool_reasoning_is_available_before_done(self) -> None:
-        request_messages = [{"role": "user", "content": "stream tool"}]
-        request = Request(
-            f"{self.proxy.url}/v1/chat/completions",
-            data=json.dumps(
-                {
-                    "model": "deepseek-v4-pro",
-                    "stream": True,
-                    "messages": request_messages,
-                    "tools": [
-                        {
-                            "type": "function",
-                            "function": {
-                                "name": "lookup",
-                                "parameters": {"type": "object", "properties": {}},
-                            },
-                        }
-                    ],
-                }
-            ).encode("utf-8"),
-            method="POST",
-            headers={
-                "Authorization": "Bearer sk-cursor-test",
-                "Content-Type": "application/json",
-            },
-        )
-
-        with urlopen(request, timeout=3) as response:
-            while True:
-                line = response.readline().decode("utf-8")
-                self.assertNotEqual(line, "")
-                if '"finish_reason":"tool_calls"' in line:
-                    break
-
-            status, payload = post_json(
-                f"{self.proxy.url}/v1/chat/completions",
-                {
-                    "model": "deepseek-v4-pro",
-                    "messages": [
-                        *request_messages,
-                        {
-                            "role": "assistant",
-                            "content": "",
-                            "tool_calls": [
-                                {
-                                    "id": "call_stream_tool",
-                                    "type": "function",
-                                    "function": {
-                                        "name": "lookup",
-                                        "arguments": "{}",
-                                    },
-                                }
-                            ],
-                        },
-                        {
-                            "role": "tool",
-                            "tool_call_id": "call_stream_tool",
-                            "content": "tool result",
-                        },
-                    ],
-                    "tools": [
-                        {
-                            "type": "function",
-                            "function": {
-                                "name": "lookup",
-                                "parameters": {"type": "object", "properties": {}},
-                            },
-                        }
-                    ],
-                },
-            )
-            response.read()
-
-        self.assertEqual(status, 200, payload)
-        self.assertEqual(
-            payload["choices"][0]["message"]["content"], "stream follow-up accepted"
-        )
-
-    def test_streaming_tool_reasoning_is_available_before_finish_reason(self) -> None:
-        request_messages = [{"role": "user", "content": "stream tool"}]
-        request = Request(
-            f"{self.proxy.url}/v1/chat/completions",
-            data=json.dumps(
-                {
-                    "model": "deepseek-v4-pro",
-                    "stream": True,
-                    "messages": request_messages,
-                    "tools": [
-                        {
-                            "type": "function",
-                            "function": {
-                                "name": "lookup",
-                                "parameters": {"type": "object", "properties": {}},
-                            },
-                        }
-                    ],
-                }
-            ).encode("utf-8"),
-            method="POST",
-            headers={
-                "Authorization": "Bearer sk-cursor-test",
-                "Content-Type": "application/json",
-            },
-        )
-
-        with urlopen(request, timeout=3) as response:
-            while True:
-                line = response.readline().decode("utf-8")
-                self.assertNotEqual(line, "")
-                if '"tool_calls"' in line:
-                    break
-
-            status, payload = post_json(
-                f"{self.proxy.url}/v1/chat/completions",
-                {
-                    "model": "deepseek-v4-pro",
-                    "messages": [
-                        *request_messages,
-                        {
-                            "role": "assistant",
-                            "content": "",
-                            "tool_calls": [
-                                {
-                                    "id": "call_stream_tool",
-                                    "type": "function",
-                                    "function": {
-                                        "name": "lookup",
-                                        "arguments": "{}",
-                                    },
-                                }
-                            ],
-                        },
-                        {
-                            "role": "tool",
-                            "tool_call_id": "call_stream_tool",
-                            "content": "tool result",
-                        },
-                    ],
-                    "tools": [
-                        {
-                            "type": "function",
-                            "function": {
-                                "name": "lookup",
-                                "parameters": {"type": "object", "properties": {}},
-                            },
-                        }
-                    ],
-                },
-            )
-            response.read()
-
-        self.assertEqual(status, 200, payload)
-        self.assertEqual(
-            payload["choices"][0]["message"]["content"], "stream follow-up accepted"
-        )
-
-
-def first_cursor_request() -> dict:
-    return {
-        "model": "deepseek-v4-pro",
-        "messages": [{"role": "user", "content": "What is tomorrow's date?"}],
-        "tools": [
-            {
-                "type": "function",
-                "function": {
-                    "name": "get_date",
-                    "description": "Get the current date",
-                    "parameters": {"type": "object", "properties": {}},
-                },
-            }
-        ],
-    }
-
-
-def second_cursor_request(include_reasoning: bool) -> dict:
-    assistant_message = {
-        "role": "assistant",
-        "content": "",
-        "tool_calls": [
-            {
-                "id": "call_date",
-                "type": "function",
-                "function": {"name": "get_date", "arguments": "{}"},
-            }
-        ],
-    }
-    if include_reasoning:
-        assistant_message["reasoning_content"] = TOOL_REASONING
-    return {
-        "model": "deepseek-v4-pro",
-        "messages": [
-            {"role": "user", "content": "What is tomorrow's date?"},
-            assistant_message,
-            {"role": "tool", "tool_call_id": "call_date", "content": "2026-04-24"},
-        ],
-        "tools": first_cursor_request()["tools"],
-    }
-
-
-def third_cursor_request_missing_all_reasoning() -> dict:
-    payload = second_cursor_request(include_reasoning=False)
-    payload["messages"].append({"role": "assistant", "content": FINAL_CONTENT})
-    payload["messages"].append({"role": "user", "content": "Thanks, now continue."})
-    return payload
-
-
-def thread_name_from_messages(messages: list[dict]) -> str | None:
-    if not messages:
-        return None
-    content = str(messages[0].get("content") or "")
-    if "thread A" in content:
-        return "A"
-    if "thread B" in content:
-        return "B"
-    return None
-
-
-def interleaved_first_request(thread_name: str) -> dict:
-    return {
-        "model": "deepseek-v4-pro",
-        "messages": [{"role": "user", "content": f"Start thread {thread_name}."}],
-        "tools": [
-            {
-                "type": "function",
-                "function": {
-                    "name": "lookup",
-                    "description": "Return a value.",
-                    "parameters": {"type": "object", "properties": {}},
-                },
-            }
-        ],
-    }
-
-
-def interleaved_second_request(thread_name: str, assistant_message: dict) -> dict:
-    cursor_assistant = dict(assistant_message)
-    cursor_assistant.pop("reasoning_content", None)
-    return {
-        "model": "deepseek-v4-pro",
-        "messages": [
-            {"role": "user", "content": f"Start thread {thread_name}."},
-            cursor_assistant,
-            {
-                "role": "tool",
-                "tool_call_id": "call_reused",
-                "content": f"tool result for thread {thread_name}",
-            },
-        ],
-        "tools": interleaved_first_request(thread_name)["tools"],
-    }
-
-
-def interleaved_third_request(
-    thread_name: str, tool_assistant_message: dict, final_message: dict
-) -> dict:
-    payload = interleaved_second_request(thread_name, tool_assistant_message)
-    cursor_final = dict(final_message)
-    cursor_final.pop("reasoning_content", None)
-    payload["messages"].append(cursor_final)
-    payload["messages"].append(
-        {"role": "user", "content": f"Follow up in thread {thread_name}."}
-    )
-    return payload
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_server.py b/tests/test_server.py
index 69dd7da..7788229 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -1,24 +1,45 @@
+"""Server boundary, CLI, and operational tests.
+
+Pure helper tests (gzip, summarize) and stub-handler tests (client
+disconnect) live near the top. The bottom of the file boots a real proxy +
+tiny upstream to exercise things that need the HTTP layer: bearer token
+forwarding, oversized body, missing-bearer rejection, logging modes, and
+streaming connection close.
+"""
+
 from __future__ import annotations
 
+from dataclasses import replace
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
 from io import BytesIO
 import gzip
 import json
 from pathlib import Path
+import threading
+import time
 from types import SimpleNamespace
 import unittest
 import zlib
+from urllib.error import HTTPError
+from urllib.request import Request, urlopen
 
 from deepseek_cursor_proxy.config import ProxyConfig
 from deepseek_cursor_proxy.reasoning_store import ReasoningStore
 from deepseek_cursor_proxy.server import (
     DeepSeekProxyHandler,
+    DeepSeekProxyServer,
     build_arg_parser,
     read_response_body,
     summarize_chat_payload,
 )
 
 
-class FakeResponse:
+# ---------------------------------------------------------------------------
+# Stubs for fast in-process tests of internal handler methods
+# ---------------------------------------------------------------------------
+
+
+class _FakeResponse:
     def __init__(self, body: bytes, encoding: str = "", status: int = 200) -> None:
         self._body = BytesIO(body)
         self.headers = {"Content-Encoding": encoding} if encoding else {}
@@ -28,7 +49,7 @@ class FakeResponse:
         return self._body.read()
 
 
-class FakeStreamingResponse:
+class _FakeStreamingResponse:
     status = 200
     headers = {"Content-Type": "text/event-stream"}
 
@@ -43,7 +64,7 @@ class FakeStreamingResponse:
         return self._lines.pop(0)
 
 
-class FailingStreamingResponse:
+class _FailingStreamingResponse:
     status = 200
     headers = {"Content-Type": "text/event-stream"}
 
@@ -51,7 +72,7 @@ class FailingStreamingResponse:
         raise OSError("record layer failure")
 
 
-class BrokenPipeWfile:
+class _BrokenPipeWfile:
     def write(self, body: bytes) -> None:
         raise BrokenPipeError("test disconnect")
 
@@ -59,10 +80,10 @@ class BrokenPipeWfile:
         raise BrokenPipeError("test disconnect")
 
 
-def make_proxy_handler(wfile: object) -> DeepSeekProxyHandler:
+def _make_handler_stub(wfile: object, **config: object) -> DeepSeekProxyHandler:
     handler = object.__new__(DeepSeekProxyHandler)
     handler.server = SimpleNamespace(
-        config=ProxyConfig(),
+        config=ProxyConfig(**config),
         reasoning_store=ReasoningStore(":memory:"),
     )
     handler.wfile = wfile
@@ -73,8 +94,13 @@ def make_proxy_handler(wfile: object) -> DeepSeekProxyHandler:
     return handler
 
 
-class ServerTests(unittest.TestCase):
-    def test_cli_boolean_overrides_have_on_and_off_forms(self) -> None:
+# ---------------------------------------------------------------------------
+# CLI / pure helpers
+# ---------------------------------------------------------------------------
+
+
+class CliAndHelperTests(unittest.TestCase):
+    def test_cli_boolean_flags_have_on_and_off_forms(self) -> None:
         args = build_arg_parser().parse_args(
             [
                 "--no-ngrok",
@@ -86,7 +112,6 @@ class ServerTests(unittest.TestCase):
                 "/tmp/dcp-traces",
             ]
         )
-
         self.assertFalse(args.ngrok)
         self.assertFalse(args.verbose)
         self.assertFalse(args.display_reasoning)
@@ -94,19 +119,17 @@ class ServerTests(unittest.TestCase):
         self.assertTrue(args.cors)
         self.assertEqual(args.trace_dir, Path("/tmp/dcp-traces"))
 
-    def test_read_response_body_handles_gzip(self) -> None:
-        body = gzip.compress(b'{"ok":true}')
-
-        self.assertEqual(read_response_body(FakeResponse(body, "gzip")), b'{"ok":true}')
-
-    def test_read_response_body_handles_deflate(self) -> None:
-        body = zlib.compress(b'{"ok":true}')
-
+    def test_read_response_body_decodes_gzip_and_deflate(self) -> None:
         self.assertEqual(
-            read_response_body(FakeResponse(body, "deflate")), b'{"ok":true}'
+            read_response_body(_FakeResponse(gzip.compress(b'{"ok":1}'), "gzip")),
+            b'{"ok":1}',
+        )
+        self.assertEqual(
+            read_response_body(_FakeResponse(zlib.compress(b'{"ok":1}'), "deflate")),
+            b'{"ok":1}',
         )
 
-    def test_summarize_chat_payload_does_not_include_message_content(self) -> None:
+    def test_summarize_chat_payload_omits_message_content(self) -> None:
         summary = summarize_chat_payload(
             {
                 "model": "deepseek-v4-pro",
@@ -116,18 +139,22 @@ class ServerTests(unittest.TestCase):
                 "tool_choice": "auto",
             }
         )
-
         self.assertIn("model='deepseek-v4-pro'", summary)
-        self.assertIn("stream=True", summary)
         self.assertIn("messages=1", summary)
-        self.assertIn("tools=1", summary)
         self.assertNotIn("secret prompt", summary)
 
+
+# ---------------------------------------------------------------------------
+# Client-disconnect / upstream-failure stubs (no real HTTP needed)
+# ---------------------------------------------------------------------------
+
+
+class HandlerStubTests(unittest.TestCase):
     def test_regular_response_handles_client_disconnect(self) -> None:
-        handler = make_proxy_handler(BrokenPipeWfile())
+        handler = _make_handler_stub(_BrokenPipeWfile())
         body = json.dumps(
             {
-                "id": "chatcmpl-test",
+                "id": "x",
                 "object": "chat.completion",
                 "model": "deepseek-v4-pro",
                 "choices": [
@@ -139,116 +166,324 @@ class ServerTests(unittest.TestCase):
                 ],
             }
         ).encode("utf-8")
-
         try:
             with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
-                sent = handler._proxy_regular_response(
-                    FakeResponse(body),
+                result = handler._proxy_regular_response(
+                    _FakeResponse(body),
                     "deepseek-v4-pro",
                     [{"role": "user", "content": "hi"}],
-                    "cache-namespace",
+                    "ns",
                 )
         finally:
             handler.server.reasoning_store.close()
-
-        self.assertFalse(sent.sent)
+        self.assertFalse(result.sent)
         self.assertIn("sending upstream response body", "\n".join(captured.output))
 
     def test_streaming_response_stops_on_client_disconnect(self) -> None:
-        handler = make_proxy_handler(BrokenPipeWfile())
+        handler = _make_handler_stub(_BrokenPipeWfile())
         chunk = {
-            "id": "chatcmpl-stream",
+            "id": "stream",
             "model": "deepseek-v4-pro",
-            "choices": [
-                {
-                    "index": 0,
-                    "delta": {"role": "assistant", "content": "hello"},
-                }
-            ],
+            "choices": [{"index": 0, "delta": {"role": "assistant", "content": "hi"}}],
         }
-        response = FakeStreamingResponse(
+        response = _FakeStreamingResponse(
             [
                 f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
                 b"data: [DONE]\n\n",
             ]
         )
-
         try:
             with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
-                sent = handler._proxy_streaming_response(
+                result = handler._proxy_streaming_response(
                     response,
                     "deepseek-v4-pro",
                     [{"role": "user", "content": "hi"}],
-                    "cache-namespace",
+                    "ns",
                 )
         finally:
             handler.server.reasoning_store.close()
-
-        self.assertFalse(sent.sent)
+        self.assertFalse(result.sent)
         self.assertEqual(response.readline_calls, 1)
         self.assertIn("sending streaming response chunk", "\n".join(captured.output))
 
     def test_streaming_response_handles_upstream_read_failure(self) -> None:
-        handler = make_proxy_handler(BytesIO())
-
+        handler = _make_handler_stub(BytesIO())
         try:
             with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
-                sent = handler._proxy_streaming_response(
-                    FailingStreamingResponse(),
+                result = handler._proxy_streaming_response(
+                    _FailingStreamingResponse(),
                     "deepseek-v4-pro",
                     [{"role": "user", "content": "hi"}],
-                    "cache-namespace",
+                    "ns",
                 )
         finally:
             handler.server.reasoning_store.close()
-
-        self.assertFalse(sent.sent)
+        self.assertFalse(result.sent)
         self.assertIn(
-            "upstream streaming response read failed",
-            "\n".join(captured.output),
+            "upstream streaming response read failed", "\n".join(captured.output)
         )
 
-    def test_collapsible_reasoning_has_no_effect_when_display_is_disabled(
-        self,
-    ) -> None:
+    def test_collapsible_reasoning_no_effect_when_display_disabled(self) -> None:
         wfile = BytesIO()
-        handler = make_proxy_handler(wfile)
-        handler.server.config = ProxyConfig(
-            display_reasoning=False,
-            collapsible_reasoning=True,
+        handler = _make_handler_stub(
+            wfile, display_reasoning=False, collapsible_reasoning=True
         )
         chunk = {
-            "id": "chatcmpl-stream",
+            "id": "stream",
             "model": "deepseek-v4-pro",
-            "choices": [
-                {
-                    "index": 0,
-                    "delta": {"reasoning_content": "Need context."},
-                }
-            ],
+            "choices": [{"index": 0, "delta": {"reasoning_content": "Need context."}}],
         }
-        response = FakeStreamingResponse(
+        response = _FakeStreamingResponse(
             [
                 f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
                 b"data: [DONE]\n\n",
             ]
         )
-
         try:
-            sent = handler._proxy_streaming_response(
+            handler._proxy_streaming_response(
                 response,
                 "deepseek-v4-pro",
                 [{"role": "user", "content": "hi"}],
-                "cache-namespace",
+                "ns",
             )
         finally:
             handler.server.reasoning_store.close()
-
         body = wfile.getvalue().decode("utf-8")
-        self.assertTrue(sent.sent)
         self.assertIn("reasoning_content", body)
         self.assertNotIn("<details>", body)
-        self.assertNotIn("<think>", body)
+
+
+# ---------------------------------------------------------------------------
+# HTTP-level boundary tests: real proxy + tiny upstream
+# ---------------------------------------------------------------------------
+
+
+class _PlainFakeUpstream(BaseHTTPRequestHandler):
+    """Returns a fixed plain response and records every request."""
+
+    requests: list[dict[str, object]] = []
+    auth_headers: list[str] = []
+    delay_after_done: float = 0.0
+    response: dict[str, object] = {}
+
+    def log_message(self, fmt: str, *args: object) -> None:
+        return
+
+    def do_POST(self) -> None:
+        length = int(self.headers.get("Content-Length") or 0)
+        payload = json.loads(self.rfile.read(length).decode("utf-8"))
+        self.__class__.requests.append(payload)
+        self.__class__.auth_headers.append(self.headers.get("Authorization", ""))
+
+        if payload.get("stream"):
+            self.send_response(200)
+            self.send_header("Content-Type", "text/event-stream")
+            self.end_headers()
+            self.wfile.write(
+                b'data: {"choices":[{"index":0,"delta":{"content":"x"}}]}\n\n'
+            )
+            self.wfile.write(b"data: [DONE]\n\n")
+            self.wfile.flush()
+            if self.__class__.delay_after_done:
+                time.sleep(self.__class__.delay_after_done)
+            return
+
+        body = json.dumps(self.__class__.response).encode("utf-8")
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+
+_BASE_RESPONSE: dict[str, object] = {
+    "id": "x",
+    "object": "chat.completion",
+    "created": 1,
+    "model": "deepseek-v4-pro",
+    "choices": [
+        {
+            "index": 0,
+            "finish_reason": "stop",
+            "message": {"role": "assistant", "content": "ok"},
+        }
+    ],
+    "usage": {
+        "prompt_tokens": 20,
+        "completion_tokens": 5,
+        "total_tokens": 25,
+        "prompt_cache_hit_tokens": 12,
+        "prompt_cache_miss_tokens": 8,
+        "completion_tokens_details": {"reasoning_tokens": 3},
+    },
+}
+
+
+class _Fixture:
+    def __init__(self, server: ThreadingHTTPServer) -> None:
+        self.server = server
+        self.thread = threading.Thread(target=server.serve_forever, daemon=True)
+        self.thread.start()
+
+    @property
+    def url(self) -> str:
+        host, port = self.server.server_address
+        return f"http://{host}:{port}"
+
+    def close(self) -> None:
+        self.server.shutdown()
+        self.server.server_close()
+        self.thread.join(timeout=5)
+
+
+def _post(url: str, payload: dict, api_key: str = "sk-test") -> tuple[int, dict]:
+    request = Request(
+        url,
+        data=json.dumps(payload).encode("utf-8"),
+        method="POST",
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        },
+    )
+    try:
+        with urlopen(request, timeout=5) as response:
+            return response.status, json.loads(response.read().decode("utf-8"))
+    except HTTPError as exc:
+        return exc.code, json.loads(exc.read().decode("utf-8"))
+
+
+class HttpBoundaryTests(unittest.TestCase):
+    """Real-HTTP tests that don't fit the protocol suite: things the proxy
+    must do at the HTTP boundary regardless of what DeepSeek answers."""
+
+    def setUp(self) -> None:
+        _PlainFakeUpstream.requests = []
+        _PlainFakeUpstream.auth_headers = []
+        _PlainFakeUpstream.delay_after_done = 0.0
+        _PlainFakeUpstream.response = dict(_BASE_RESPONSE)
+        self.upstream = _Fixture(
+            ThreadingHTTPServer(("127.0.0.1", 0), _PlainFakeUpstream)
+        )
+        self.store = ReasoningStore(":memory:")
+        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
+        proxy.config = ProxyConfig(
+            upstream_base_url=self.upstream.url,
+            upstream_model="deepseek-v4-pro",
+            ngrok=False,
+        )
+        proxy.reasoning_store = self.store
+        self.proxy = _Fixture(proxy)
+
+    def tearDown(self) -> None:
+        self.proxy.close()
+        self.upstream.close()
+        self.store.close()
+
+    def _request(self) -> dict:
+        return {
+            "model": "deepseek-v4-pro",
+            "messages": [{"role": "user", "content": "hi"}],
+        }
+
+    def test_rejects_missing_bearer_token(self) -> None:
+        request = Request(
+            f"{self.proxy.url}/v1/chat/completions",
+            data=json.dumps(self._request()).encode("utf-8"),
+            method="POST",
+            headers={"Content-Type": "application/json"},
+        )
+        with self.assertRaises(HTTPError) as caught:
+            urlopen(request, timeout=5)
+        self.assertEqual(caught.exception.code, 401)
+        self.assertEqual(_PlainFakeUpstream.requests, [])
+
+    def test_rejects_oversized_request_body(self) -> None:
+        self.proxy.server.config = replace(
+            self.proxy.server.config, max_request_body_bytes=10
+        )
+        status, payload = _post(
+            f"{self.proxy.url}/v1/chat/completions", self._request()
+        )
+        self.assertEqual(status, 413)
+        self.assertIn("too large", payload["error"]["message"])
+        self.assertEqual(_PlainFakeUpstream.requests, [])
+
+    def test_forwards_bearer_token_to_upstream(self) -> None:
+        status, _ = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            self._request(),
+            api_key="sk-from-cursor",
+        )
+        self.assertEqual(status, 200)
+        self.assertEqual(_PlainFakeUpstream.auth_headers[0], "Bearer sk-from-cursor")
+
+    def test_streaming_response_closes_after_done_when_upstream_lingers(
+        self,
+    ) -> None:
+        """Cursor relies on the proxy ending the SSE stream at [DONE], even
+        if the upstream socket stays open."""
+        _PlainFakeUpstream.delay_after_done = 2.0
+        request = Request(
+            f"{self.proxy.url}/v1/chat/completions",
+            data=json.dumps(
+                {
+                    "model": "deepseek-v4-pro",
+                    "stream": True,
+                    "messages": [{"role": "user", "content": "stream"}],
+                }
+            ).encode("utf-8"),
+            method="POST",
+            headers={
+                "Authorization": "Bearer sk-test",
+                "Content-Type": "application/json",
+            },
+        )
+        started = time.monotonic()
+        with urlopen(request, timeout=1) as response:
+            body = response.read().decode("utf-8")
+        self.assertLess(time.monotonic() - started, 1.0)
+        self.assertIn("data: [DONE]", body)
+
+    def test_normal_logging_summarizes_without_bodies_or_keys(self) -> None:
+        with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
+            status, _ = _post(
+                f"{self.proxy.url}/v1/chat/completions",
+                self._request(),
+                api_key="sk-from-cursor",
+            )
+            # `└ stats` is emitted on the handler thread *after* the response
+            # body hits the socket, so the client may return before it lands.
+            deadline = time.monotonic() + 2
+            while time.monotonic() < deadline and not any(
+                "└ stats" in record for record in captured.output
+            ):
+                time.sleep(0.01)
+        output = "\n".join(captured.output)
+        self.assertEqual(status, 200)
+        # Single-line stage records keep the log readable.
+        for marker in ("┌ cursor", "├ context", "├ send", "└ stats"):
+            self.assertIn(marker, output)
+        self.assertNotIn("hi", output.split("┌ cursor")[1].split("\n")[0])
+        self.assertNotIn("sk-from-cursor", output)
+
+    def test_verbose_logging_includes_bodies_but_redacts_api_key(self) -> None:
+        self.proxy.server.config = replace(self.proxy.server.config, verbose=True)
+        with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
+            _post(
+                f"{self.proxy.url}/v1/chat/completions",
+                self._request(),
+                api_key="sk-from-cursor",
+            )
+        output = "\n".join(captured.output)
+        self.assertIn("cursor request body", output)
+        self.assertIn("upstream request body", output)
+        self.assertNotIn("sk-from-cursor", output)
+
+    def test_healthz_returns_ok(self) -> None:
+        with urlopen(f"{self.proxy.url}/healthz", timeout=2) as response:
+            self.assertEqual(response.status, 200)
+            self.assertEqual(json.loads(response.read())["ok"], True)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
index d2019b1..9f594f7 100644
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@@ -6,6 +6,7 @@ from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_s
 from deepseek_cursor_proxy.streaming import (
     CursorReasoningDisplayAdapter,
     StreamAccumulator,
+    fold_reasoning_into_content,
 )
 
 
@@ -430,5 +431,44 @@ class CursorReasoningDisplayAdapterTests(unittest.TestCase):
         self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
 
 
+class FoldReasoningTests(unittest.TestCase):
+    def test_fold_reasoning_into_non_streaming_content(self) -> None:
+        """Non-streaming responses mirror reasoning_content into a visible
+        <details> block, matching the streaming layout."""
+        payload = {
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "answer",
+                        "reasoning_content": "thinking",
+                    },
+                }
+            ]
+        }
+        fold_reasoning_into_content(payload, collapsible=True)
+        self.assertEqual(
+            payload["choices"][0]["message"]["content"],
+            "<details>\n<summary>Thinking</summary>\n\nthinking\n</details>\n\nanswer",
+        )
+
+    def test_fold_reasoning_skips_empty_reasoning(self) -> None:
+        payload = {
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": "answer",
+                        "reasoning_content": "",
+                    },
+                }
+            ]
+        }
+        fold_reasoning_into_content(payload, collapsible=True)
+        self.assertEqual(payload["choices"][0]["message"]["content"], "answer")
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_trace.py b/tests/test_trace.py
index d2be77b..261c35d 100644
--- a/tests/test_trace.py
+++ b/tests/test_trace.py
@@ -1,14 +1,25 @@
+"""Trace writer tests, both as a unit (writes/redacts files) and integrated
+through the proxy (captures real request flow on disk)."""
+
 from __future__ import annotations
 
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
 import json
+from pathlib import Path
 import stat
+import threading
 from tempfile import TemporaryDirectory
+import time
 import unittest
+from urllib.request import Request, urlopen
 
+from deepseek_cursor_proxy.config import ProxyConfig
+from deepseek_cursor_proxy.reasoning_store import ReasoningStore
+from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
 from deepseek_cursor_proxy.trace import TraceWriter
 
 
-class TraceWriterTests(unittest.TestCase):
+class TraceWriterUnitTests(unittest.TestCase):
     def test_writes_manifest_and_numbered_request_files(self) -> None:
         with TemporaryDirectory() as temp_dir:
             writer = TraceWriter(temp_dir)
@@ -47,17 +58,244 @@ class TraceWriterTests(unittest.TestCase):
                 headers={"Authorization": "Bearer sk-secret"},
             )
             trace.finish("completed", http_status=200)
-
-            payload = json.loads(trace.path.read_text(encoding="utf-8"))
-            serialized = json.dumps(payload)
-
+            serialized = trace.path.read_text(encoding="utf-8")
             self.assertNotIn("sk-secret", serialized)
+            payload = json.loads(serialized)
             self.assertEqual(
-                payload["request"]["headers"]["Authorization"]["present"],
-                True,
+                payload["request"]["headers"]["Authorization"]["present"], True
             )
             self.assertIn("sha256", payload["request"]["headers"]["Authorization"])
 
 
+# ---------------------------------------------------------------------------
+# Integration: trace writer attached to a running proxy.
+# ---------------------------------------------------------------------------
+
+
+class _CannedUpstream(BaseHTTPRequestHandler):
+    """Returns a tool-call response for the first POST and a streamed
+    reasoning response for the second."""
+
+    requests: list[dict[str, object]] = []
+
+    def log_message(self, fmt: str, *args: object) -> None:
+        return
+
+    def do_POST(self) -> None:
+        length = int(self.headers.get("Content-Length") or 0)
+        payload = json.loads(self.rfile.read(length).decode("utf-8"))
+        self.__class__.requests.append(payload)
+
+        if payload.get("stream"):
+            self.send_response(200)
+            self.send_header("Content-Type", "text/event-stream")
+            self.end_headers()
+            self.wfile.write(
+                b'data: {"id":"s","object":"chat.completion.chunk","choices":'
+                b'[{"index":0,"delta":{"role":"assistant","reasoning_content":"think"},'
+                b'"finish_reason":null}]}\n\n'
+            )
+            self.wfile.write(
+                b'data: {"id":"s","object":"chat.completion.chunk","choices":'
+                b'[{"index":0,"delta":{"content":"answer"},"finish_reason":null}],'
+                b'"usage":{"completion_tokens_details":{"reasoning_tokens":1}}}\n\n'
+            )
+            self.wfile.write(
+                b'data: {"id":"s","object":"chat.completion.chunk",'
+                b'"choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n'
+            )
+            self.wfile.write(b"data: [DONE]\n\n")
+            self.wfile.flush()
+            return
+
+        body = json.dumps(
+            {
+                "id": "tool",
+                "object": "chat.completion",
+                "model": "deepseek-v4-pro",
+                "choices": [
+                    {
+                        "index": 0,
+                        "finish_reason": "tool_calls",
+                        "message": {
+                            "role": "assistant",
+                            "content": "",
+                            "reasoning_content": "I need the date.",
+                            "tool_calls": [
+                                {
+                                    "id": "call_date",
+                                    "type": "function",
+                                    "function": {
+                                        "name": "get_date",
+                                        "arguments": "{}",
+                                    },
+                                }
+                            ],
+                        },
+                    }
+                ],
+            }
+        ).encode("utf-8")
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+
+class _Fixture:
+    def __init__(self, server: ThreadingHTTPServer) -> None:
+        self.server = server
+        self.thread = threading.Thread(target=server.serve_forever, daemon=True)
+        self.thread.start()
+
+    @property
+    def url(self) -> str:
+        host, port = self.server.server_address
+        return f"http://{host}:{port}"
+
+    def close(self) -> None:
+        self.server.shutdown()
+        self.server.server_close()
+        self.thread.join(timeout=5)
+
+
+def _read_single_trace(session_dir: Path) -> dict:
+    deadline = time.monotonic() + 2
+    files = sorted(session_dir.glob("request-*.json"))
+    while not files and time.monotonic() < deadline:
+        time.sleep(0.01)
+        files = sorted(session_dir.glob("request-*.json"))
+    if len(files) != 1:
+        raise AssertionError(f"expected one trace, found {files}")
+    return json.loads(files[0].read_text(encoding="utf-8"))
+
+
+class TraceIntegrationTests(unittest.TestCase):
+    def setUp(self) -> None:
+        _CannedUpstream.requests = []
+        self.upstream = _Fixture(ThreadingHTTPServer(("127.0.0.1", 0), _CannedUpstream))
+        self.store = ReasoningStore(":memory:")
+        self.temp_dir = TemporaryDirectory()
+        self.writer = TraceWriter(self.temp_dir.name)
+        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
+        proxy.config = ProxyConfig(
+            upstream_base_url=self.upstream.url,
+            upstream_model="deepseek-v4-pro",
+            ngrok=False,
+        )
+        proxy.reasoning_store = self.store
+        proxy.trace_writer = self.writer
+        self.proxy = _Fixture(proxy)
+
+    def tearDown(self) -> None:
+        self.proxy.close()
+        self.upstream.close()
+        self.store.close()
+        self.temp_dir.cleanup()
+
+    def _post(self, payload: dict) -> dict:
+        request = Request(
+            f"{self.proxy.url}/v1/chat/completions",
+            data=json.dumps(payload).encode("utf-8"),
+            method="POST",
+            headers={
+                "Authorization": "Bearer sk-from-cursor",
+                "Content-Type": "application/json",
+            },
+        )
+        with urlopen(request, timeout=5) as response:
+            return json.loads(response.read())
+
+    def test_captures_non_streaming_replay_without_api_key(self) -> None:
+        self._post(
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [{"role": "user", "content": "What is tomorrow's date?"}],
+            }
+        )
+        trace = _read_single_trace(self.writer.session_dir)
+        serialized = json.dumps(trace)
+        self.assertEqual(trace["completion"]["status"], "completed")
+        self.assertEqual(
+            trace["request"]["body"]["messages"][0]["content"],
+            "What is tomorrow's date?",
+        )
+        self.assertEqual(
+            trace["upstream"]["response"]["body"]["json"]["choices"][0]["message"][
+                "reasoning_content"
+            ],
+            "I need the date.",
+        )
+        self.assertNotIn("sk-from-cursor", serialized)
+
+    def test_captures_streaming_replay_chunks(self) -> None:
+        request = Request(
+            f"{self.proxy.url}/v1/chat/completions",
+            data=json.dumps(
+                {
+                    "model": "deepseek-v4-pro",
+                    "stream": True,
+                    "messages": [{"role": "user", "content": "stream"}],
+                }
+            ).encode("utf-8"),
+            method="POST",
+            headers={
+                "Authorization": "Bearer sk-test",
+                "Content-Type": "application/json",
+            },
+        )
+        with urlopen(request, timeout=2) as response:
+            response.read()
+        trace = _read_single_trace(self.writer.session_dir)
+        self.assertEqual(trace["completion"]["status"], "completed")
+        self.assertIn(
+            "reasoning_content",
+            trace["upstream"]["stream"]["chunks"][0]["line"],
+        )
+        self.assertIn(
+            "<details>", trace["cursor_response"]["stream"]["chunks"][0]["line"]
+        )
+
+    def test_captures_recovery_diagnostics(self) -> None:
+        """A request that triggers cold-cache recovery records the recovery
+        steps + diagnostic counters in the trace."""
+        self._post(
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "user", "content": "old"},
+                    {
+                        "role": "assistant",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": "call_x",
+                                "type": "function",
+                                "function": {"name": "f", "arguments": "{}"},
+                            }
+                        ],
+                    },
+                    {"role": "tool", "tool_call_id": "call_x", "content": "result"},
+                    {"role": "user", "content": "new"},
+                ],
+            }
+        )
+        trace = _read_single_trace(self.writer.session_dir)
+        self.assertEqual(
+            trace["transform"]["recovery_steps"][0]["strategy"], "latest_user"
+        )
+        self.assertGreaterEqual(
+            len(
+                [
+                    item
+                    for item in trace["transform"]["reasoning_diagnostics"]
+                    if item["missing"]
+                ]
+            ),
+            1,
+        )
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 6e8aac2..649a409 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -1,3 +1,11 @@
+"""Pure-function unit tests for transform.py.
+
+Anything that requires a fake DeepSeek upstream lives in test_protocol.py.
+This file only exercises helpers that take dicts/strings and return
+dicts/strings — content extraction, request normalization, response rewrite,
+recovery-notice stripping, and warning behaviour for dropped fields.
+"""
+
 from __future__ import annotations
 
 import json
@@ -11,1429 +19,239 @@ from deepseek_cursor_proxy.reasoning_store import (
 )
 from deepseek_cursor_proxy.transform import (
     RECOVERY_NOTICE_CONTENT,
+    RECOVERY_NOTICE_TEXT,
     extract_text_content,
+    normalize_reasoning_effort,
     prepare_upstream_request,
-    reasoning_cache_namespace,
     rewrite_response_body,
     strip_cursor_thinking_blocks,
+    strip_recovery_notice_for_upstream,
 )
 
 
-DEFAULT_CONFIG = ProxyConfig()
-DEFAULT_CACHE_NAMESPACE = reasoning_cache_namespace(
-    DEFAULT_CONFIG,
-    "deepseek-v4-pro",
-    {"type": "enabled"},
-    "high",
-)
+class ContentHelpersTests(unittest.TestCase):
+    def test_extract_text_content_flattens_multipart_array(self) -> None:
+        content = [
+            {"type": "text", "text": "hello"},
+            {"type": "image_url", "image_url": {"url": "data:..."}},
+            {"type": "input_text", "text": "world"},
+        ]
+        self.assertEqual(
+            extract_text_content(content),
+            "hello\n[image_url omitted by DeepSeek text proxy]\nworld",
+        )
+
+    def test_extract_text_content_passes_through_string_and_none(self) -> None:
+        self.assertEqual(extract_text_content("plain"), "plain")
+        self.assertIsNone(extract_text_content(None))
+
+    def test_strip_cursor_thinking_blocks_removes_details_and_think(self) -> None:
+        self.assertEqual(
+            strip_cursor_thinking_blocks(
+                "<details>\n<summary>Thinking</summary>\n\nplan\n</details>\n\nanswer"
+            ),
+            "answer",
+        )
+        self.assertEqual(
+            strip_cursor_thinking_blocks("<think>\nplan\n</think>\n\nanswer"),
+            "answer",
+        )
+
+    def test_strip_cursor_thinking_blocks_preserves_unrelated_details(self) -> None:
+        kept = "<details><summary>Diff</summary>\nrelevant\n</details>"
+        self.assertEqual(strip_cursor_thinking_blocks(kept), kept)
+
+    def test_normalize_reasoning_effort_aliases(self) -> None:
+        self.assertEqual(normalize_reasoning_effort("low"), "high")
+        self.assertEqual(normalize_reasoning_effort("medium"), "high")
+        self.assertEqual(normalize_reasoning_effort("high"), "high")
+        self.assertEqual(normalize_reasoning_effort("max"), "max")
+        self.assertEqual(normalize_reasoning_effort("xhigh"), "max")
+        self.assertEqual(normalize_reasoning_effort("nonsense"), "high")
 
 
-def cache_scope(messages: list[dict]) -> str:
-    return conversation_scope(messages, DEFAULT_CACHE_NAMESPACE)
-
-
-class TransformTests(unittest.TestCase):
+class RequestPreparationTests(unittest.TestCase):
     def setUp(self) -> None:
         self.store = ReasoningStore(":memory:")
 
     def tearDown(self) -> None:
         self.store.close()
 
-    def test_extracts_text_from_cursor_style_content_parts(self) -> None:
-        content = [
-            {"type": "text", "text": "hello"},
-            {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
-            {"type": "input_text", "text": "world"},
-        ]
-
-        self.assertEqual(
-            extract_text_content(content),
-            "hello\n[image_url omitted by DeepSeek text proxy]\nworld",
-        )
-
-    def test_strips_cursor_display_thinking_blocks_from_assistant_content(
-        self,
-    ) -> None:
-        self.assertEqual(
-            strip_cursor_thinking_blocks(
-                "<think>\nNeed context.\n</think>\n\nFinal answer."
-            ),
-            "Final answer.",
-        )
-        self.assertEqual(
-            strip_cursor_thinking_blocks(
-                "<details>\n"
-                "<summary>Thinking</summary>\n\n"
-                "Need context.\n"
-                "</details>\n\n"
-                "Final answer."
-            ),
-            "Final answer.",
-        )
-
-    def test_preserves_regular_markdown_details_in_assistant_content(self) -> None:
-        content = (
-            "<details>\n"
-            "<summary>Example</summary>\n\n"
-            "Visible details.\n"
-            "</details>\n\n"
-            "Final answer."
-        )
-
-        self.assertEqual(strip_cursor_thinking_blocks(content), content)
-
-    def test_prepares_assistant_content_without_mirrored_thinking_blocks(
-        self,
-    ) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "user", "content": "hello"},
-                {
-                    "role": "assistant",
-                    "content": (
-                        "<details>\n"
-                        "<summary>Thinking</summary>\n\n"
-                        "Hidden.\n"
-                        "</details>\n\n"
-                        "Visible answer."
-                    ),
-                },
-                {"role": "user", "content": "continue"},
-            ],
-        }
-
-        prepared = prepare_upstream_request(
-            payload,
-            ProxyConfig(missing_reasoning_strategy="reject"),
-            self.store,
-        )
-
-        self.assertEqual(prepared.payload["messages"][1]["content"], "Visible answer.")
-
-    def test_prepares_thinking_request_and_converts_legacy_functions(self) -> None:
-        payload = {
-            "model": "deepseek-v4-flash",
-            "messages": [{"role": "user", "content": "hi"}],
-            "functions": [{"name": "lookup", "parameters": {"type": "object"}}],
-            "function_call": {"name": "lookup"},
-            "max_completion_tokens": 123,
-            "parallel_tool_calls": True,
-        }
-        config = ProxyConfig()
-
-        prepared = prepare_upstream_request(payload, config, self.store)
-
-        self.assertEqual(prepared.original_model, "deepseek-v4-flash")
-        self.assertEqual(prepared.upstream_model, "deepseek-v4-flash")
-        self.assertEqual(prepared.payload["model"], "deepseek-v4-flash")
-        self.assertEqual(prepared.payload["thinking"], {"type": "enabled"})
-        self.assertEqual(prepared.payload["reasoning_effort"], "high")
-        self.assertEqual(prepared.payload["max_tokens"], 123)
-        self.assertEqual(prepared.payload["tools"][0]["type"], "function")
-        self.assertEqual(
-            prepared.payload["tool_choice"],
-            {"type": "function", "function": {"name": "lookup"}},
-        )
-        self.assertNotIn("parallel_tool_calls", prepared.payload)
-
-    def test_uses_config_model_only_when_request_model_is_missing(self) -> None:
-        prepared = prepare_upstream_request(
-            {"messages": [{"role": "user", "content": "hi"}]},
-            ProxyConfig(upstream_model="deepseek-v4-flash"),
-            self.store,
-        )
-
-        self.assertEqual(prepared.original_model, "deepseek-v4-flash")
-        self.assertEqual(prepared.upstream_model, "deepseek-v4-flash")
-        self.assertEqual(prepared.payload["model"], "deepseek-v4-flash")
-
-    def test_streaming_requests_include_usage_for_runtime_stats(self) -> None:
+    def test_legacy_functions_field_is_converted_to_tools(self) -> None:
         prepared = prepare_upstream_request(
             {
                 "model": "deepseek-v4-pro",
-                "stream": True,
-                "stream_options": {"include_usage": False},
                 "messages": [{"role": "user", "content": "hi"}],
+                "functions": [{"name": "lookup", "parameters": {"type": "object"}}],
+                "function_call": "auto",
             },
             ProxyConfig(),
             self.store,
         )
+        self.assertEqual(prepared.payload["tools"][0]["function"]["name"], "lookup")
+        self.assertEqual(prepared.payload["tool_choice"], "auto")
+        self.assertNotIn("functions", prepared.payload)
+        self.assertNotIn("function_call", prepared.payload)
 
-        self.assertEqual(prepared.payload["stream_options"]["include_usage"], True)
-
-    def test_preserves_required_tool_choice(self) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [{"role": "user", "content": "call a tool"}],
-            "tools": [{"type": "function", "function": {"name": "lookup"}}],
-            "tool_choice": "required",
-        }
-
+    def test_named_function_call_becomes_named_tool_choice(self) -> None:
         prepared = prepare_upstream_request(
-            payload,
-            ProxyConfig(missing_reasoning_strategy="reject"),
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [{"role": "user", "content": "hi"}],
+                "function_call": {"name": "lookup"},
+            },
+            ProxyConfig(),
             self.store,
         )
-
-        self.assertEqual(prepared.payload["tool_choice"], "required")
-
-    def test_preserves_named_tool_choice(self) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [{"role": "user", "content": "call lookup"}],
-            "tools": [{"type": "function", "function": {"name": "lookup"}}],
-            "tool_choice": {
-                "type": "function",
-                "function": {"name": "lookup"},
-            },
-        }
-
-        prepared = prepare_upstream_request(payload, ProxyConfig(), self.store)
-
         self.assertEqual(
             prepared.payload["tool_choice"],
             {"type": "function", "function": {"name": "lookup"}},
         )
 
-    def test_restores_reasoning_content_for_cached_tool_call(self) -> None:
-        prior_messages = [{"role": "user", "content": "read README"}]
-        assistant_message = {
-            "role": "assistant",
-            "content": "",
-            "reasoning_content": "Need the file contents before answering.",
-            "tool_calls": [
-                {
-                    "id": "call_123",
-                    "type": "function",
-                    "function": {
-                        "name": "read_file",
-                        "arguments": '{"path":"README.md"}',
-                    },
-                }
-            ],
-        }
-        self.store.store_assistant_message(
-            assistant_message, cache_scope(prior_messages)
+    def test_max_completion_tokens_is_aliased_to_max_tokens(self) -> None:
+        prepared = prepare_upstream_request(
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [{"role": "user", "content": "hi"}],
+                "max_completion_tokens": 256,
+            },
+            ProxyConfig(),
+            self.store,
         )
+        self.assertEqual(prepared.payload["max_tokens"], 256)
 
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "user", "content": "read README"},
+    def test_standard_openai_fields_are_forwarded_without_warning(self) -> None:
+        # Cursor and the OpenAI SDK send these on every request; forward them
+        # so DeepSeek can use what it understands and ignore the rest.
+        with self.assertNoLogs("deepseek_cursor_proxy", level="WARNING"):
+            prepared = prepare_upstream_request(
                 {
-                    "role": "assistant",
-                    "content": "",
-                    "tool_calls": [
-                        {
-                            "id": "call_123",
-                            "type": "function",
-                            "function": {
-                                "name": "read_file",
-                                "arguments": '{"path":"README.md"}',
-                            },
-                        }
-                    ],
+                    "model": "deepseek-v4-pro",
+                    "messages": [{"role": "user", "content": "hi"}],
+                    "user": "user-abc",
+                    "seed": 42,
+                    "n": 1,
+                    "logit_bias": {"50256": -100},
                 },
-                {"role": "tool", "tool_call_id": "call_123", "content": "file text"},
-                {"role": "user", "content": "continue"},
-            ],
-        }
-
-        prepared = prepare_upstream_request(
-            payload,
-            ProxyConfig(missing_reasoning_strategy="reject"),
-            self.store,
-        )
-
-        self.assertEqual(prepared.patched_reasoning_messages, 1)
-        self.assertEqual(
-            prepared.payload["messages"][1]["reasoning_content"],
-            "Need the file contents before answering.",
-        )
-
-    def test_accepts_empty_reasoning_content_when_present_for_tool_call(
-        self,
-    ) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "user", "content": "read README"},
-                {
-                    "role": "assistant",
-                    "content": "",
-                    "reasoning_content": "",
-                    "tool_calls": [
-                        {
-                            "id": "call_empty",
-                            "type": "function",
-                            "function": {
-                                "name": "read_file",
-                                "arguments": '{"path":"README.md"}',
-                            },
-                        }
-                    ],
-                },
-                {"role": "tool", "tool_call_id": "call_empty", "content": "file text"},
-            ],
-        }
-
-        prepared = prepare_upstream_request(payload, ProxyConfig(), self.store)
-
-        self.assertEqual(prepared.patched_reasoning_messages, 0)
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertIn("reasoning_content", prepared.payload["messages"][1])
-        self.assertEqual(prepared.payload["messages"][1]["reasoning_content"], "")
-
-    def test_restores_empty_reasoning_content_from_cache(self) -> None:
-        prior_messages = [{"role": "user", "content": "read README"}]
-        tool_call = {
-            "id": "call_empty",
-            "type": "function",
-            "function": {
-                "name": "read_file",
-                "arguments": '{"path":"README.md"}',
-            },
-        }
-        self.store.store_assistant_message(
-            {
-                "role": "assistant",
-                "content": "",
-                "reasoning_content": "",
-                "tool_calls": [tool_call],
-            },
-            cache_scope(prior_messages),
-        )
-
-        prepared = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    *prior_messages,
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-                    {
-                        "role": "tool",
-                        "tool_call_id": "call_empty",
-                        "content": "file text",
-                    },
-                ],
-            },
-            ProxyConfig(),
-            self.store,
-        )
-
-        self.assertEqual(prepared.patched_reasoning_messages, 1)
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertIn("reasoning_content", prepared.payload["messages"][1])
-        self.assertEqual(prepared.payload["messages"][1]["reasoning_content"], "")
-
-    def test_restores_reasoning_content_for_cached_final_tool_turn_message(
-        self,
-    ) -> None:
-        prior_messages = [
-            {"role": "user", "content": "read README"},
-            {
-                "role": "assistant",
-                "content": "",
-                "reasoning_content": "Need the file contents before answering.",
-                "tool_calls": [
-                    {
-                        "id": "call_123",
-                        "type": "function",
-                        "function": {
-                            "name": "read_file",
-                            "arguments": '{"path":"README.md"}',
-                        },
-                    }
-                ],
-            },
-            {"role": "tool", "tool_call_id": "call_123", "content": "file text"},
-        ]
-        assistant_message = {
-            "role": "assistant",
-            "content": "Final answer after using the tool.",
-            "reasoning_content": "The tool result is enough to answer.",
-        }
-        self.store.store_assistant_message(
-            assistant_message, cache_scope(prior_messages)
-        )
-
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "user", "content": "read README"},
-                {
-                    "role": "assistant",
-                    "content": "",
-                    "tool_calls": [
-                        {
-                            "id": "call_123",
-                            "type": "function",
-                            "function": {
-                                "name": "read_file",
-                                "arguments": '{"path":"README.md"}',
-                            },
-                        }
-                    ],
-                },
-                {"role": "tool", "tool_call_id": "call_123", "content": "file text"},
-                {"role": "assistant", "content": "Final answer after using the tool."},
-                {"role": "user", "content": "another question"},
-            ],
-        }
-
-        prepared = prepare_upstream_request(
-            payload,
-            ProxyConfig(missing_reasoning_strategy="reject"),
-            self.store,
-        )
-
-        self.assertEqual(prepared.patched_reasoning_messages, 1)
-        self.assertEqual(
-            prepared.payload["messages"][3]["reasoning_content"],
-            "The tool result is enough to answer.",
-        )
-
-    def test_reasoning_cache_is_scoped_by_conversation_prefix(self) -> None:
-        tool_call = {
-            "id": "call_reused",
-            "type": "function",
-            "function": {"name": "lookup", "arguments": "{}"},
-        }
-        assistant_a = {
-            "role": "assistant",
-            "content": "",
-            "reasoning_content": "Reasoning for thread A.",
-            "tool_calls": [tool_call],
-        }
-        assistant_b = {
-            "role": "assistant",
-            "content": "",
-            "reasoning_content": "Reasoning for thread B.",
-            "tool_calls": [tool_call],
-        }
-        prior_a = [{"role": "user", "content": "thread A"}]
-        prior_b = [{"role": "user", "content": "thread B"}]
-
-        self.store.store_assistant_message(assistant_a, cache_scope(prior_a))
-        self.store.store_assistant_message(assistant_b, cache_scope(prior_b))
-
-        payload_a = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                *prior_a,
-                {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-            ],
-        }
-        payload_b = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                *prior_b,
-                {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-            ],
-        }
-
-        prepared_a = prepare_upstream_request(payload_a, ProxyConfig(), self.store)
-        prepared_b = prepare_upstream_request(payload_b, ProxyConfig(), self.store)
-
-        self.assertEqual(
-            prepared_a.payload["messages"][1]["reasoning_content"],
-            "Reasoning for thread A.",
-        )
-        self.assertEqual(
-            prepared_b.payload["messages"][1]["reasoning_content"],
-            "Reasoning for thread B.",
-        )
-
-    def test_exact_message_signature_wins_over_tool_call_id_fallback(self) -> None:
-        prior = [{"role": "user", "content": "same conversation prefix"}]
-        scope = cache_scope(prior)
-        first_tool_call = {
-            "id": "call_reused",
-            "type": "function",
-            "function": {"name": "lookup", "arguments": '{"value":"first"}'},
-        }
-        second_tool_call = {
-            "id": "call_reused",
-            "type": "function",
-            "function": {"name": "lookup", "arguments": '{"value":"second"}'},
-        }
-        self.store.store_assistant_message(
-            {
-                "role": "assistant",
-                "content": "",
-                "reasoning_content": "first reasoning",
-                "tool_calls": [first_tool_call],
-            },
-            scope,
-        )
-        self.store.store_assistant_message(
-            {
-                "role": "assistant",
-                "content": "",
-                "reasoning_content": "second reasoning",
-                "tool_calls": [second_tool_call],
-            },
-            scope,
-        )
-
-        prepared = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    *prior,
-                    {
-                        "role": "assistant",
-                        "content": "",
-                        "tool_calls": [first_tool_call],
-                    },
-                ],
-            },
-            ProxyConfig(),
-            self.store,
-        )
-
-        self.assertEqual(
-            prepared.payload["messages"][1]["reasoning_content"], "first reasoning"
-        )
-
-    def test_strict_hit_backfills_portable_cache_for_mode_switch(self) -> None:
-        agent_prior = [
-            {"role": "system", "content": "Agent mode."},
-            {"role": "user", "content": "set up the task"},
-            {"role": "user", "content": "read README"},
-        ]
-        plan_prior = [
-            {"role": "system", "content": "Plan mode."},
-            {"role": "user", "content": "set up the task"},
-            {"role": "user", "content": "read README"},
-        ]
-        tool_call = {
-            "id": "call_mode_switch",
-            "type": "function",
-            "function": {"name": "read_file", "arguments": '{"path":"README.md"}'},
-        }
-        assistant_message = {
-            "role": "assistant",
-            "content": "",
-            "reasoning_content": "Need README before answering.",
-            "tool_calls": [tool_call],
-        }
-        self.store.store_assistant_message(
-            assistant_message,
-            cache_scope(agent_prior),
-        )
-
-        strict_prepared = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    *agent_prior,
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-                ],
-            },
-            ProxyConfig(),
-            self.store,
-        )
-        portable_prepared = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    *plan_prior,
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-                ],
-            },
-            ProxyConfig(),
-            self.store,
-        )
-
-        self.assertEqual(strict_prepared.patched_reasoning_messages, 1)
-        self.assertEqual(portable_prepared.patched_reasoning_messages, 1)
-        self.assertEqual(portable_prepared.missing_reasoning_messages, 0)
-        self.assertEqual(
-            portable_prepared.payload["messages"][3]["reasoning_content"],
-            "Need README before answering.",
-        )
-        self.assertTrue(
-            str(portable_prepared.reasoning_diagnostics[-1]["hit_kind"]).startswith(
-                "portable_"
+                ProxyConfig(),
+                self.store,
             )
-        )
+        self.assertEqual(prepared.payload["user"], "user-abc")
+        self.assertEqual(prepared.payload["seed"], 42)
+        self.assertEqual(prepared.payload["n"], 1)
+        self.assertEqual(prepared.payload["logit_bias"], {"50256": -100})
 
-    def test_portable_turn_cache_restores_final_assistant_after_tool_result(
-        self,
-    ) -> None:
-        agent_user = {"role": "user", "content": "look up project state"}
-        plan_user = dict(agent_user)
-        tool_call = {
-            "id": "call_project_state",
-            "type": "function",
-            "function": {"name": "lookup", "arguments": '{"query":"state"}'},
-        }
-        tool_result = {
-            "role": "tool",
-            "tool_call_id": "call_project_state",
-            "content": '{"state":"ready"}',
-        }
-        tool_assistant = {
-            "role": "assistant",
-            "content": "",
-            "reasoning_content": "Need the project state.",
-            "tool_calls": [tool_call],
-        }
-        final_assistant = {
-            "role": "assistant",
-            "content": "The project is ready.",
-            "reasoning_content": "The tool result is enough to answer.",
-        }
-        agent_initial_prior = [
-            {"role": "system", "content": "Agent mode."},
-            agent_user,
-        ]
-        agent_final_prior = [*agent_initial_prior, tool_assistant, tool_result]
-        self.store.store_assistant_message(
-            tool_assistant,
-            cache_scope(agent_initial_prior),
-            DEFAULT_CACHE_NAMESPACE,
-            agent_initial_prior,
-        )
-        self.store.store_assistant_message(
-            final_assistant,
-            cache_scope(agent_final_prior),
-            DEFAULT_CACHE_NAMESPACE,
-            agent_final_prior,
-        )
-
-        prepared = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    {"role": "system", "content": "Plan mode."},
-                    plan_user,
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-                    tool_result,
-                    {"role": "assistant", "content": "The project is ready."},
-                    {"role": "user", "content": "continue"},
-                ],
-            },
-            ProxyConfig(missing_reasoning_strategy="reject"),
-            self.store,
-        )
-
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertEqual(prepared.patched_reasoning_messages, 2)
-        self.assertEqual(
-            prepared.payload["messages"][4]["reasoning_content"],
-            "The tool result is enough to answer.",
-        )
-
-    def test_portable_turn_cache_isolated_for_reused_tool_call_id(self) -> None:
-        tool_call = {
-            "id": "call_reused",
-            "type": "function",
-            "function": {"name": "lookup", "arguments": "{}"},
-        }
-        assistant_a = {
-            "role": "assistant",
-            "content": "",
-            "reasoning_content": "Reasoning for thread A.",
-            "tool_calls": [tool_call],
-        }
-        assistant_b = {
-            "role": "assistant",
-            "content": "",
-            "reasoning_content": "Reasoning for thread B.",
-            "tool_calls": [tool_call],
-        }
-        prior_a = [
-            {"role": "system", "content": "Agent mode."},
-            {"role": "user", "content": "thread A"},
-        ]
-        prior_b = [
-            {"role": "system", "content": "Agent mode."},
-            {"role": "user", "content": "thread B"},
-        ]
-        self.store.store_assistant_message(
-            assistant_a,
-            cache_scope(prior_a),
-            DEFAULT_CACHE_NAMESPACE,
-            prior_a,
-        )
-        self.store.store_assistant_message(
-            assistant_b,
-            cache_scope(prior_b),
-            DEFAULT_CACHE_NAMESPACE,
-            prior_b,
-        )
-
-        prepared = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    {"role": "system", "content": "Plan mode."},
-                    {"role": "user", "content": "thread A"},
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-                ],
-            },
-            ProxyConfig(),
-            self.store,
-        )
-
-        self.assertEqual(
-            prepared.payload["messages"][2]["reasoning_content"],
-            "Reasoning for thread A.",
-        )
-
-    def test_restores_reasoning_when_cursor_drops_tool_call_id_but_keeps_function_call(
-        self,
-    ) -> None:
-        prior = [{"role": "user", "content": "inspect repo"}]
-        assistant_message = {
-            "role": "assistant",
-            "content": "",
-            "reasoning_content": "Need to call the file tool.",
-            "tool_calls": [
+    def test_unknown_request_fields_are_dropped_with_warning(self) -> None:
+        with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
+            prepared = prepare_upstream_request(
                 {
-                    "id": "call_original",
-                    "type": "function",
-                    "function": {
-                        "name": "read_file",
-                        "arguments": '{"path":"README.md"}',
-                    },
-                }
-            ],
-        }
-        self.store.store_assistant_message(assistant_message, cache_scope(prior))
-
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                *prior,
-                {
-                    "role": "assistant",
-                    "content": None,
-                    "tool_calls": [
-                        {
-                            "type": "function",
-                            "function": {
-                                "name": "read_file",
-                                "arguments": '{"path":"README.md"}',
-                            },
-                        }
-                    ],
+                    "model": "deepseek-v4-pro",
+                    "messages": [{"role": "user", "content": "hi"}],
+                    "parallel_tool_calls": True,
+                    "service_tier": "fast",
                 },
-            ],
-        }
+                ProxyConfig(),
+                self.store,
+            )
+        self.assertNotIn("parallel_tool_calls", prepared.payload)
+        self.assertNotIn("service_tier", prepared.payload)
+        log = "\n".join(captured.output)
+        self.assertIn("parallel_tool_calls", log)
+        self.assertIn("service_tier", log)
 
-        prepared = prepare_upstream_request(payload, ProxyConfig(), self.store)
-
-        self.assertEqual(prepared.patched_reasoning_messages, 1)
-        self.assertEqual(prepared.payload["messages"][1]["content"], "")
-        self.assertEqual(
-            prepared.payload["messages"][1]["reasoning_content"],
-            "Need to call the file tool.",
-        )
-
-    def test_restores_reasoning_when_cursor_history_contains_mirrored_think_block(
-        self,
-    ) -> None:
-        prior = [{"role": "user", "content": "inspect repo"}]
-        tool_call = {
-            "id": "call_original",
-            "type": "function",
-            "function": {
-                "name": "read_file",
-                "arguments": '{"path":"README.md"}',
-            },
-        }
-        self.store.store_assistant_message(
-            {
-                "role": "assistant",
-                "content": "",
-                "reasoning_content": "Need to call the file tool.",
-                "tool_calls": [tool_call],
-            },
-            cache_scope(prior),
-        )
+    def test_non_deepseek_model_is_rewritten_with_warning(self) -> None:
+        with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
+            prepared = prepare_upstream_request(
+                {
+                    "model": "gpt-4",
+                    "messages": [{"role": "user", "content": "hi"}],
+                },
+                ProxyConfig(upstream_model="deepseek-v4-pro"),
+                self.store,
+            )
+        self.assertEqual(prepared.payload["model"], "deepseek-v4-pro")
+        self.assertIn("non-DeepSeek", "\n".join(captured.output))
 
+    def test_thinking_disabled_strips_reasoning_from_assistant_history(self) -> None:
         prepared = prepare_upstream_request(
             {
                 "model": "deepseek-v4-pro",
                 "messages": [
-                    *prior,
+                    {"role": "user", "content": "hi"},
                     {
                         "role": "assistant",
-                        "content": (
-                            "<details>\n"
-                            "<summary>Thinking</summary>\n\n"
-                            "Need to call the file tool.\n"
-                            "</details>\n\n"
-                        ),
-                        "tool_calls": [tool_call],
+                        "content": "answer",
+                        "reasoning_content": "should be discarded",
                     },
                 ],
             },
-            ProxyConfig(),
+            ProxyConfig(thinking="disabled"),
             self.store,
         )
-
-        self.assertEqual(prepared.patched_reasoning_messages, 1)
-        self.assertEqual(prepared.payload["messages"][1]["content"], "")
-        self.assertEqual(
-            prepared.payload["messages"][1]["reasoning_content"],
-            "Need to call the file tool.",
-        )
-
-    def test_reports_missing_reasoning_for_uncached_assistant_tool_call(self) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "user", "content": "read README"},
-                {
-                    "role": "assistant",
-                    "content": "",
-                    "tool_calls": [
-                        {
-                            "id": "call_uncached",
-                            "type": "function",
-                            "function": {
-                                "name": "read_file",
-                                "arguments": '{"path":"README.md"}',
-                            },
-                        }
-                    ],
-                },
-                {
-                    "role": "tool",
-                    "tool_call_id": "call_uncached",
-                    "content": "file text",
-                },
-            ],
-        }
-
-        prepared = prepare_upstream_request(
-            payload,
-            ProxyConfig(missing_reasoning_strategy="reject"),
-            self.store,
-        )
-
-        self.assertEqual(prepared.patched_reasoning_messages, 0)
-        self.assertEqual(prepared.missing_reasoning_messages, 1)
+        self.assertEqual(prepared.payload["thinking"], {"type": "disabled"})
         self.assertNotIn("reasoning_content", prepared.payload["messages"][1])
 
-    def test_can_recover_uncached_tool_history_from_latest_user(self) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "system", "content": "Follow project rules."},
-                {"role": "user", "content": "read README"},
-                {
-                    "role": "assistant",
-                    "content": "",
-                    "tool_calls": [
-                        {
-                            "id": "call_uncached",
-                            "type": "function",
-                            "function": {
-                                "name": "read_file",
-                                "arguments": '{"path":"README.md"}',
-                            },
-                        }
-                    ],
-                },
-                {
-                    "role": "tool",
-                    "tool_call_id": "call_uncached",
-                    "content": "file text",
-                },
-                {"role": "user", "content": "continue with the summary"},
-            ],
-        }
-
-        prepared = prepare_upstream_request(
-            payload,
-            ProxyConfig(missing_reasoning_strategy="recover"),
-            self.store,
-        )
-
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertEqual(prepared.recovered_reasoning_messages, 1)
-        self.assertEqual(prepared.recovery_dropped_messages, 3)
-        self.assertEqual(prepared.recovery_notice, RECOVERY_NOTICE_CONTENT)
-        self.assertEqual(
-            [message["role"] for message in prepared.payload["messages"]],
-            ["system", "system", "user"],
-        )
-        self.assertIn(
-            "recovered this request", prepared.payload["messages"][1]["content"]
-        )
-        self.assertEqual(
-            prepared.payload["messages"][2],
-            {"role": "user", "content": "continue with the summary"},
-        )
-
-    def test_recovery_boundary_preserves_later_deepseek_tool_context(self) -> None:
-        old_tool_call = {
-            "id": "call_old",
-            "type": "function",
-            "function": {
-                "name": "read_file",
-                "arguments": '{"path":"README.md"}',
-            },
-        }
-        new_tool_call = {
-            "id": "call_new",
-            "type": "function",
-            "function": {
-                "name": "lookup",
-                "arguments": '{"query":"new"}',
-            },
-        }
-        first_recovered = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    {"role": "user", "content": "old model turn"},
-                    {
-                        "role": "assistant",
-                        "content": "",
-                        "tool_calls": [old_tool_call],
-                    },
-                    {
-                        "role": "tool",
-                        "tool_call_id": "call_old",
-                        "content": "old result",
-                    },
-                    {"role": "user", "content": "continue with DeepSeek"},
-                ],
-            },
-            ProxyConfig(missing_reasoning_strategy="recover"),
-            self.store,
-        )
-        recovered_tool_message = {
-            "role": "assistant",
-            "content": RECOVERY_NOTICE_CONTENT,
-            "reasoning_content": "Need the new lookup.",
-            "tool_calls": [new_tool_call],
-        }
-        self.store.store_assistant_message(
-            recovered_tool_message,
-            conversation_scope(
-                first_recovered.payload["messages"],
-                first_recovered.cache_namespace,
-            ),
-        )
-
+    def test_plain_chat_history_does_not_require_reasoning(self) -> None:
         prepared = prepare_upstream_request(
             {
                 "model": "deepseek-v4-pro",
                 "messages": [
-                    {"role": "user", "content": "old model turn"},
-                    {
-                        "role": "assistant",
-                        "content": "",
-                        "tool_calls": [old_tool_call],
-                    },
-                    {
-                        "role": "tool",
-                        "tool_call_id": "call_old",
-                        "content": "old result",
-                    },
-                    {"role": "user", "content": "continue with DeepSeek"},
-                    {
-                        "role": "assistant",
-                        "content": RECOVERY_NOTICE_CONTENT,
-                        "tool_calls": [new_tool_call],
-                    },
-                    {
-                        "role": "tool",
-                        "tool_call_id": "call_new",
-                        "content": "new result",
-                    },
+                    {"role": "user", "content": "hi"},
+                    {"role": "assistant", "content": "hello"},
+                    {"role": "user", "content": "again"},
                 ],
             },
-            ProxyConfig(missing_reasoning_strategy="recover"),
+            ProxyConfig(),
             self.store,
         )
-
         self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertEqual(prepared.recovered_reasoning_messages, 0)
-        self.assertEqual(prepared.recovery_dropped_messages, 0)
-        self.assertTrue(prepared.continued_recovery_boundary)
-        self.assertGreater(prepared.retired_prefix_messages, 0)
-        self.assertIsNone(prepared.recovery_notice)
-        self.assertEqual(
-            [message["role"] for message in prepared.payload["messages"]],
-            ["system", "user", "assistant", "tool"],
-        )
-        self.assertEqual(
-            prepared.payload["messages"][2]["reasoning_content"],
-            "Need the new lookup.",
-        )
-        self.assertEqual(
-            prepared.payload["messages"][3],
-            {
-                "role": "tool",
-                "tool_call_id": "call_new",
-                "content": "new result",
-            },
-        )
 
-    def test_recovered_response_is_recorded_under_pre_recovery_scope(self) -> None:
-        old_tool_call = {
-            "id": "call_old",
-            "type": "function",
-            "function": {
-                "name": "read_file",
-                "arguments": '{"path":"README.md"}',
-            },
-        }
-        new_tool_call = {
-            "id": "call_new",
-            "type": "function",
-            "function": {"name": "lookup", "arguments": '{"query":"new"}'},
-        }
-        first_payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "user", "content": "old model turn"},
-                {"role": "assistant", "content": "", "tool_calls": [old_tool_call]},
-                {"role": "tool", "tool_call_id": "call_old", "content": "old result"},
-                {"role": "user", "content": "continue with DeepSeek"},
-            ],
-        }
-        first_recovered = prepare_upstream_request(
-            first_payload,
-            ProxyConfig(missing_reasoning_strategy="recover"),
-            self.store,
-        )
-        self.assertEqual(first_recovered.recovered_reasoning_messages, 1)
 
-        response_body = json.dumps(
+class RecoveryNoticeStrippingTests(unittest.TestCase):
+    def test_strips_only_the_recovery_notice_prefix(self) -> None:
+        messages = [
+            {"role": "user", "content": "hi"},
             {
-                "id": "chatcmpl-test",
-                "object": "chat.completion",
-                "model": "deepseek-v4-pro",
-                "choices": [
-                    {
-                        "index": 0,
-                        "finish_reason": "tool_calls",
-                        "message": {
-                            "role": "assistant",
-                            "content": "",
-                            "reasoning_content": "Need the new lookup.",
-                            "tool_calls": [new_tool_call],
-                        },
-                    }
-                ],
+                "role": "assistant",
+                "content": RECOVERY_NOTICE_CONTENT + "real answer",
+            },
+            {"role": "assistant", "content": "ordinary"},
+        ]
+        result = strip_recovery_notice_for_upstream(messages)
+        self.assertEqual(result[1]["content"], "real answer")
+        self.assertEqual(result[2]["content"], "ordinary")
+
+    def test_returns_a_copy_so_caller_keeps_with_prefix_messages(self) -> None:
+        original = [
+            {
+                "role": "assistant",
+                "content": RECOVERY_NOTICE_CONTENT + "answer",
             }
-        ).encode()
-        rewritten = rewrite_response_body(
-            response_body,
-            "deepseek-v4-pro",
-            self.store,
-            first_recovered.payload["messages"],
-            first_recovered.cache_namespace,
-            content_prefix=first_recovered.recovery_notice,
-            recording_contexts=first_recovered.record_response_contexts,
-        )
-        recovered_assistant = json.loads(rewritten)["choices"][0]["message"]
-        self.assertEqual(len(first_recovered.record_response_contexts), 2)
-        for scope, _messages in first_recovered.record_response_contexts:
-            self.assertEqual(
-                self.store.get(
-                    f"scope:{scope}:signature:{message_signature(recovered_assistant)}"
-                ),
-                "Need the new lookup.",
-            )
-        recovered_assistant.pop("reasoning_content", None)
+        ]
+        stripped = strip_recovery_notice_for_upstream(original)
+        # The cache scope is computed on the with-prefix history, so the
+        # caller's list must NOT be mutated in place.
+        self.assertEqual(original[0]["content"], RECOVERY_NOTICE_CONTENT + "answer")
+        self.assertEqual(stripped[0]["content"], "answer")
+        self.assertIsNot(stripped[0], original[0])
 
-        second_payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                *first_payload["messages"],
-                recovered_assistant,
-                {"role": "tool", "tool_call_id": "call_new", "content": "new result"},
-            ],
-        }
+    def test_text_constant_matches_content_prefix(self) -> None:
+        # Sanity check that the user-visible text used as a boundary marker
+        # is consistent with the wire-format prefix.
+        self.assertTrue(RECOVERY_NOTICE_CONTENT.startswith(RECOVERY_NOTICE_TEXT))
 
-        second_prepared = prepare_upstream_request(
-            second_payload,
-            ProxyConfig(missing_reasoning_strategy="recover"),
-            self.store,
-        )
 
-        self.assertEqual(second_prepared.missing_reasoning_messages, 0)
-        self.assertEqual(second_prepared.recovered_reasoning_messages, 0)
-        self.assertEqual(second_prepared.recovery_dropped_messages, 0)
-        self.assertTrue(second_prepared.continued_recovery_boundary)
-        self.assertGreater(second_prepared.retired_prefix_messages, 0)
-        self.assertEqual(
-            second_prepared.payload["messages"][2]["reasoning_content"],
-            "Need the new lookup.",
-        )
+class ResponseRewriteTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.store = ReasoningStore(":memory:")
 
-    def test_recovery_boundary_accepts_legacy_notice_text(self) -> None:
-        legacy_recovery_notice = (
-            "Note: recovered this DeepSeek chat because older tool-call reasoning "
-            "was unavailable; continuing with recent context only.\n\n"
-        )
-        old_tool_call = {
-            "id": "call_old",
-            "type": "function",
-            "function": {
-                "name": "read_file",
-                "arguments": '{"path":"README.md"}',
-            },
-        }
-        new_tool_call = {
-            "id": "call_new",
-            "type": "function",
-            "function": {
-                "name": "lookup",
-                "arguments": '{"query":"new"}',
-            },
-        }
-        first_recovered = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    {"role": "user", "content": "old model turn"},
-                    {
-                        "role": "assistant",
-                        "content": "",
-                        "tool_calls": [old_tool_call],
-                    },
-                    {
-                        "role": "tool",
-                        "tool_call_id": "call_old",
-                        "content": "old result",
-                    },
-                    {"role": "user", "content": "continue with DeepSeek"},
-                ],
-            },
-            ProxyConfig(missing_reasoning_strategy="recover"),
-            self.store,
-        )
-        self.store.store_assistant_message(
-            {
-                "role": "assistant",
-                "content": legacy_recovery_notice,
-                "reasoning_content": "Need the new lookup.",
-                "tool_calls": [new_tool_call],
-            },
-            conversation_scope(
-                first_recovered.payload["messages"],
-                first_recovered.cache_namespace,
-            ),
-        )
+    def tearDown(self) -> None:
+        self.store.close()
 
-        prepared = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    {"role": "user", "content": "old model turn"},
-                    {
-                        "role": "assistant",
-                        "content": "",
-                        "tool_calls": [old_tool_call],
-                    },
-                    {
-                        "role": "tool",
-                        "tool_call_id": "call_old",
-                        "content": "old result",
-                    },
-                    {"role": "user", "content": "continue with DeepSeek"},
-                    {
-                        "role": "assistant",
-                        "content": legacy_recovery_notice,
-                        "tool_calls": [new_tool_call],
-                    },
-                    {
-                        "role": "tool",
-                        "tool_call_id": "call_new",
-                        "content": "new result",
-                    },
-                ],
-            },
-            ProxyConfig(missing_reasoning_strategy="recover"),
-            self.store,
-        )
-
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertEqual(prepared.recovered_reasoning_messages, 0)
-        self.assertEqual(prepared.recovery_dropped_messages, 0)
-        self.assertTrue(prepared.continued_recovery_boundary)
-        self.assertGreater(prepared.retired_prefix_messages, 0)
-        self.assertIsNone(prepared.recovery_notice)
-        self.assertEqual(
-            prepared.payload["messages"][2]["reasoning_content"],
-            "Need the new lookup.",
-        )
-
-    def test_reports_missing_reasoning_for_uncached_assistant_after_tool_result(
-        self,
-    ) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "user", "content": "read README"},
-                {
-                    "role": "assistant",
-                    "content": "",
-                    "reasoning_content": "Need file text.",
-                    "tool_calls": [
-                        {
-                            "id": "call_uncached",
-                            "type": "function",
-                            "function": {
-                                "name": "read_file",
-                                "arguments": '{"path":"README.md"}',
-                            },
-                        }
-                    ],
-                },
-                {
-                    "role": "tool",
-                    "tool_call_id": "call_uncached",
-                    "content": "file text",
-                },
-                {"role": "assistant", "content": "Summary of file."},
-                {"role": "user", "content": "continue"},
-            ],
-        }
-
-        prepared = prepare_upstream_request(
-            payload,
-            ProxyConfig(missing_reasoning_strategy="reject"),
-            self.store,
-        )
-
-        self.assertEqual(prepared.missing_reasoning_messages, 1)
-        self.assertNotIn("reasoning_content", prepared.payload["messages"][3])
-
-    def test_does_not_report_missing_reasoning_for_plain_chat_history(self) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "user", "content": "hello"},
-                {"role": "assistant", "content": "hi"},
-                {"role": "user", "content": "continue"},
-            ],
-        }
-
-        prepared = prepare_upstream_request(payload, ProxyConfig(), self.store)
-
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertNotIn("reasoning_content", prepared.payload["messages"][1])
-
-    def test_does_not_repair_reasoning_when_thinking_is_disabled(self) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "user", "content": "read README"},
-                {
-                    "role": "assistant",
-                    "content": "",
-                    "reasoning_content": "Should be removed in non-thinking mode.",
-                    "tool_calls": [
-                        {
-                            "id": "call_uncached",
-                            "type": "function",
-                            "function": {
-                                "name": "read_file",
-                                "arguments": '{"path":"README.md"}',
-                            },
-                        }
-                    ],
-                },
-                {
-                    "role": "tool",
-                    "tool_call_id": "call_uncached",
-                    "content": "file text",
-                },
-            ],
-        }
-
-        prepared = prepare_upstream_request(
-            payload, ProxyConfig(thinking="disabled"), self.store
-        )
-
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertNotIn("reasoning_content", prepared.payload["messages"][1])
-
-    def test_reasoning_cache_is_namespaced_by_authorization(self) -> None:
-        config = ProxyConfig(missing_reasoning_strategy="reject")
-        prior = [{"role": "user", "content": "read README"}]
-        namespace_a = reasoning_cache_namespace(
-            config,
-            config.upstream_model,
-            {"type": "enabled"},
-            "high",
-            "Bearer key-a",
-        )
-        tool_call = {
-            "id": "call_123",
-            "type": "function",
-            "function": {
-                "name": "read_file",
-                "arguments": '{"path":"README.md"}',
-            },
-        }
-        self.store.store_assistant_message(
-            {
-                "role": "assistant",
-                "content": "",
-                "reasoning_content": "Reasoning for key A.",
-                "tool_calls": [tool_call],
-            },
-            conversation_scope(prior, namespace_a),
-        )
-
-        prepared = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    *prior,
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-                ],
-            },
-            config,
-            self.store,
-            authorization="Bearer key-b",
-        )
-
-        self.assertEqual(prepared.missing_reasoning_messages, 1)
-        self.assertNotIn("reasoning_content", prepared.payload["messages"][1])
-
-    def test_deepseek_pro_and_flash_share_reasoning_namespace(self) -> None:
-        config = ProxyConfig()
-        namespace_pro = reasoning_cache_namespace(
-            config,
-            "deepseek-v4-pro",
-            {"type": "enabled"},
-            "high",
-            "Bearer key-a",
-        )
-        namespace_flash = reasoning_cache_namespace(
-            config,
-            "deepseek-v4-flash",
-            {"type": "enabled"},
-            "high",
-            "Bearer key-a",
-        )
-        self.assertEqual(namespace_pro, namespace_flash)
-
-        prior = [{"role": "user", "content": "read README"}]
-        tool_call = {
-            "id": "call_shared",
-            "type": "function",
-            "function": {
-                "name": "read_file",
-                "arguments": '{"path":"README.md"}',
-            },
-        }
-        self.store.store_assistant_message(
-            {
-                "role": "assistant",
-                "content": "",
-                "reasoning_content": "Shared DeepSeek reasoning.",
-                "tool_calls": [tool_call],
-            },
-            conversation_scope(prior, namespace_pro),
-            namespace_pro,
-            prior,
-        )
-
-        prepared = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-flash",
-                "messages": [
-                    *prior,
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-                ],
-            },
-            config,
-            self.store,
-            authorization="Bearer key-a",
-        )
-
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertEqual(
-            prepared.payload["messages"][1]["reasoning_content"],
-            "Shared DeepSeek reasoning.",
-        )
-
-    def test_converted_function_message_uses_tool_schema(self) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {
-                    "role": "function",
-                    "name": "lookup",
-                    "tool_call_id": "call_1",
-                    "content": {"ok": True},
-                }
-            ],
-        }
-
-        prepared = prepare_upstream_request(payload, ProxyConfig(), self.store)
-
-        self.assertEqual(
-            prepared.payload["messages"][0],
-            {"role": "tool", "tool_call_id": "call_1", "content": '{"ok": true}'},
-        )
-
-    def test_rewrite_response_records_reasoning_and_restores_model_name(self) -> None:
+    def test_records_reasoning_and_restores_original_model_name(self) -> None:
         body = json.dumps(
             {
-                "id": "chatcmpl-test",
-                "object": "chat.completion",
-                "model": "deepseek-v4-pro",
-                "choices": [
-                    {
-                        "index": 0,
-                        "finish_reason": "tool_calls",
-                        "message": {
-                            "role": "assistant",
-                            "content": "",
-                            "reasoning_content": "I need to inspect the repo.",
-                            "tool_calls": [
-                                {
-                                    "id": "call_abc",
-                                    "type": "function",
-                                    "function": {
-                                        "name": "list_files",
-                                        "arguments": "{}",
-                                    },
-                                }
-                            ],
-                        },
-                    }
-                ],
-            }
-        ).encode()
-
-        request_messages = [{"role": "user", "content": "inspect repo"}]
-        rewritten = rewrite_response_body(
-            body, "deepseek-v4-flash", self.store, request_messages
-        )
-        payload = json.loads(rewritten)
-
-        self.assertEqual(payload["model"], "deepseek-v4-flash")
-        self.assertEqual(
-            self.store.get(
-                f"scope:{conversation_scope(request_messages)}:tool_call:call_abc"
-            ),
-            "I need to inspect the repo.",
-        )
-
-    def test_rewrite_response_can_prefix_recovery_notice_before_storing(
-        self,
-    ) -> None:
-        body = json.dumps(
-            {
-                "id": "chatcmpl-test",
+                "id": "chatcmpl",
                 "object": "chat.completion",
                 "model": "deepseek-v4-pro",
                 "choices": [
@@ -1442,48 +260,56 @@ class TransformTests(unittest.TestCase):
                         "finish_reason": "stop",
                         "message": {
                             "role": "assistant",
-                            "content": "Summary.",
-                            "reasoning_content": "Tool result is enough.",
+                            "content": "Final.",
+                            "reasoning_content": "Done thinking.",
                         },
                     }
                 ],
             }
         ).encode()
+        request_messages = [{"role": "user", "content": "hi"}]
+        rewritten = rewrite_response_body(
+            body, "deepseek-v4-pro", self.store, request_messages
+        )
+        payload = json.loads(rewritten)
+        self.assertEqual(payload["model"], "deepseek-v4-pro")
+        stored = self.store.get(
+            f"scope:{conversation_scope(request_messages)}:signature:"
+            f"{message_signature(payload['choices'][0]['message'])}"
+        )
+        self.assertEqual(stored, "Done thinking.")
 
-        request_messages = [
-            {"role": "user", "content": "read README"},
-            {"role": "tool", "tool_call_id": "call_abc", "content": "file text"},
-        ]
+    def test_recovery_notice_is_prefixed_into_response_content(self) -> None:
+        body = json.dumps(
+            {
+                "id": "chatcmpl",
+                "object": "chat.completion",
+                "model": "deepseek-v4-pro",
+                "choices": [
+                    {
+                        "index": 0,
+                        "finish_reason": "stop",
+                        "message": {"role": "assistant", "content": "Final."},
+                    }
+                ],
+            }
+        ).encode()
         rewritten = rewrite_response_body(
             body,
             "deepseek-v4-pro",
             self.store,
-            request_messages,
+            [{"role": "user", "content": "hi"}],
             content_prefix=RECOVERY_NOTICE_CONTENT,
         )
-        payload = json.loads(rewritten)
-        stored_message = {
-            "role": "assistant",
-            "content": RECOVERY_NOTICE_CONTENT + "Summary.",
-            "reasoning_content": "Tool result is enough.",
-        }
-
-        self.assertEqual(
-            payload["choices"][0]["message"]["content"],
-            RECOVERY_NOTICE_CONTENT + "Summary.",
-        )
-        self.assertEqual(
-            self.store.get(
-                f"scope:{conversation_scope(request_messages)}:signature:"
-                f"{message_signature(stored_message)}"
-            ),
-            "Tool result is enough.",
+        self.assertIn(
+            RECOVERY_NOTICE_CONTENT,
+            json.loads(rewritten)["choices"][0]["message"]["content"],
         )
 
-    def test_rewrite_response_preserves_prompt_cache_usage_fields(self) -> None:
+    def test_preserves_prompt_cache_usage_fields(self) -> None:
         body = json.dumps(
             {
-                "id": "chatcmpl-test",
+                "id": "chatcmpl",
                 "object": "chat.completion",
                 "model": "deepseek-v4-pro",
                 "choices": [
@@ -1502,12 +328,10 @@ class TransformTests(unittest.TestCase):
                 },
             }
         ).encode()
-
         rewritten = rewrite_response_body(body, "deepseek-v4-flash", self.store, [])
-        payload = json.loads(rewritten)
-
-        self.assertEqual(payload["usage"]["prompt_cache_hit_tokens"], 6)
-        self.assertEqual(payload["usage"]["prompt_cache_miss_tokens"], 4)
+        usage = json.loads(rewritten)["usage"]
+        self.assertEqual(usage["prompt_cache_hit_tokens"], 6)
+        self.assertEqual(usage["prompt_cache_miss_tokens"], 4)
 
 
 if __name__ == "__main__":