refactor(proxy): audit thinking-mode protocol and refactor test suite (#33)

2026-05-01 19:48:08 +08:00 · 2026-05-01 19:48:08 +08:00 · be0310751c
parent b65f0dd8a2
commit be0310751c
14 changed files with 2223 additions and 2894 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,6 @@
 # AIs
 .claude/
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[codz]
--- a/README.md
+++ b/README.md
@ -134,7 +134,7 @@ Select `deepseek-v4-pro` in Cursor and use chat or agent mode as usual.
 ## How It Works
- **Core fix:** DeepSeek's [thinking mode](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) requires `reasoning_content` from assistant tool-call messages to be passed back in subsequent requests, but Cursor omits this field, causing a 400 error. The proxy (`Cursor → ngrok → proxy → DeepSeek API`) stores `reasoning_content` from every DeepSeek response in a local SQLite cache, keyed by message signature, tool-call ID, and tool-call function signature, and patches outgoing requests with missing `reasoning_content` before they reach DeepSeek. On a cold cache (proxy restart, model switch), it logs and drops unrecoverable history, continues from the latest user request, and prefixes the next Cursor response with a notice.
+- **Core fix:** DeepSeek [thinking-mode tool calls](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) require the complete **multi-round** `reasoning_content` chain to be sent back in later requests. Cursor omits that field, causing a 400 error. The proxy (`Cursor -> ngrok -> proxy -> DeepSeek API`) stores DeepSeek's original `reasoning_content` and patches missing blocks back into outgoing tool-call history.
 - **Multi-conversation isolation:** To avoid collisions across concurrent conversations, the proxy scopes cache keys by a SHA-256 hash of the canonical conversation prefix (roles, content, and tool calls, excluding `reasoning_content`) plus the upstream model, configuration, and an API-key hash. Different threads get different scopes, so reused tool-call IDs do not collide. Byte-identical cloned histories produce identical scopes.
 - **Context caching compatibility:** The proxy preserves compatibility by never injecting synthetic thread IDs, timestamps, or cache-control messages. It restores `reasoning_content` as the exact original string, so repeated prefixes remain intact for [DeepSeek context cache](https://api-docs.deepseek.com/guides/kv_cache). Cache hit rates are logged in the terminal output.
 - **Additional compatibility fixes:** Beyond reasoning repair, the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases, strips mirrored thinking display blocks from assistant content, flattens multi-part content arrays to plain text, and mirrors `reasoning_content` into Cursor-visible Markdown details blocks.
--- a/src/deepseek_cursor_proxy/config.py
+++ b/src/deepseek_cursor_proxy/config.py
@ -172,8 +172,6 @@ def settings_from_config(
 def normalize_thinking(value: Any) -> str:
    thinking = as_str(value, DEFAULT_THINKING).strip().lower()
    if thinking in {"passthrough", "pass-through", "pass_through"}:
        return "pass-through"
    if thinking in {"enabled", "disabled"}:
        return thinking
    return DEFAULT_THINKING
--- a/src/deepseek_cursor_proxy/server.py
+++ b/src/deepseek_cursor_proxy/server.py
@ -540,6 +540,8 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
                scope=record_response_scope,
                prior_messages=record_response_messages,
                recording_contexts=record_response_contexts,
                display_reasoning=self.config.display_reasoning,
                collapsible_reasoning=self.config.collapsible_reasoning,
            )
        except (json.JSONDecodeError, UnicodeDecodeError) as exc:
            LOG.warning("failed to rewrite upstream JSON response: %s", exc)
@ -812,7 +814,7 @@ def build_arg_parser() -> argparse.ArgumentParser:
    )
    parser.add_argument(
        "--thinking",
-        choices=["enabled", "disabled", "pass-through"],
+        choices=["enabled", "disabled"],
        help="DeepSeek thinking mode, default from config or enabled",
    )
    parser.add_argument(
--- a/src/deepseek_cursor_proxy/streaming.py
+++ b/src/deepseek_cursor_proxy/streaming.py
@ -292,3 +292,34 @@ class CursorReasoningDisplayAdapter:
        }
        if metadata:
            self._last_chunk_metadata.update(metadata)
 def fold_reasoning_into_content(
    response_payload: dict[str, Any],
    collapsible: bool,
 ) -> None:
    """Mirror `reasoning_content` into the visible `content` field for
    non-streaming responses, matching the streaming `<details>` layout."""
    block_start = (
        COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
    )
    block_end = COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
    choices = response_payload.get("choices")
    if not isinstance(choices, list):
        return
    for choice in choices:
        if not isinstance(choice, dict):
            continue
        message = choice.get("message")
        if not isinstance(message, dict):
            continue
        reasoning = message.get("reasoning_content")
        if not isinstance(reasoning, str) or not reasoning:
            continue
        content = message.get("content")
        message["content"] = (
            block_start
            + reasoning
            + block_end
            + (content if isinstance(content, str) else "")
        )
--- a/src/deepseek_cursor_proxy/transform.py
+++ b/src/deepseek_cursor_proxy/transform.py
@ -3,6 +3,7 @@ from __future__ import annotations
 from dataclasses import dataclass, field
 import hashlib
 import json
 import logging
 import re
 from typing import Any
@ -15,6 +16,10 @@ from .reasoning_store import (
    tool_call_signature,
    turn_context_signature,
 )
 from .streaming import fold_reasoning_into_content
 LOG = logging.getLogger("deepseek_cursor_proxy")
 SUPPORTED_REQUEST_FIELDS = {
@ -35,6 +40,13 @@ SUPPORTED_REQUEST_FIELDS = {
    "frequency_penalty",
    "logprobs",
    "top_logprobs",
    # Standard OpenAI Chat Completions fields that DeepSeek either honors or
    # safely ignores. Cursor and most OpenAI SDKs send these unconditionally,
    # so forwarding keeps clients happy and avoids log spam.
    "user",
    "seed",
    "n",
    "logit_bias",
 }
 MESSAGE_FIELDS = {
@ -83,10 +95,6 @@ CURSOR_THINKING_BLOCK_RE = re.compile(
 )
 RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history."
 LEGACY_RECOVERY_NOTICE_TEXT = (
    "Note: recovered this DeepSeek chat because older tool-call reasoning "
    "was unavailable; continuing with recent context only."
 )
 RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n"
 RECOVERY_SYSTEM_CONTENT = (
    "deepseek-cursor-proxy recovered this request because older DeepSeek "
@ -460,10 +468,33 @@ def has_recovery_notice(message: dict[str, Any]) -> bool:
    return (
        message.get("role") == "assistant"
        and isinstance(content, str)
-        and content.startswith((RECOVERY_NOTICE_TEXT, LEGACY_RECOVERY_NOTICE_TEXT))
+        and content.startswith(RECOVERY_NOTICE_TEXT)
    )
 def strip_recovery_notice_for_upstream(
    messages: list[dict[str, Any]],
 ) -> list[dict[str, Any]]:
    """Cursor echoes the proxy's recovery notice back to us in later turns.
    The notice serves as a boundary marker for the proxy, but DeepSeek must
    not see proxy-generated prose. Return a copy with assistant prefixes
    stripped; leave the input untouched so cache scopes/recording contexts
    keep matching the with-prefix history that Cursor will send next time."""
    stripped: list[dict[str, Any]] = []
    for message in messages:
        if message.get("role") != "assistant":
            stripped.append(message)
            continue
        content = message.get("content")
        if not isinstance(content, str) or not content.startswith(RECOVERY_NOTICE_TEXT):
            stripped.append(message)
            continue
        cleaned = dict(message)
        cleaned["content"] = content[len(RECOVERY_NOTICE_TEXT) :].lstrip("\r\n")
        stripped.append(cleaned)
    return stripped
 def leading_system_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
    leading_messages: list[dict[str, Any]] = []
    for message in messages:
@ -628,6 +659,11 @@ def assistant_needs_reasoning_for_tool_context(
 def upstream_model_for(original_model: str, config: ProxyConfig) -> str:
    if original_model.startswith("deepseek-"):
        return original_model
    LOG.warning(
        "rewriting non-DeepSeek model %r to configured fallback %r",
        original_model,
        config.upstream_model,
    )
    return config.upstream_model
@ -688,6 +724,16 @@ def prepare_upstream_request(
    prepared = {
        key: value for key, value in payload.items() if key in SUPPORTED_REQUEST_FIELDS
    }
    dropped_fields = sorted(
        key
        for key in payload.keys()
        if key not in SUPPORTED_REQUEST_FIELDS
        and key not in {"max_completion_tokens", "functions", "function_call"}
    )
    if dropped_fields:
        LOG.warning(
            "dropping unsupported request field(s): %s", ", ".join(dropped_fields)
        )
    if "max_tokens" not in prepared and "max_completion_tokens" in payload:
        prepared["max_tokens"] = payload["max_completion_tokens"]
@ -719,14 +765,9 @@ def prepare_upstream_request(
        if tool_choice is not None:
            prepared["tool_choice"] = tool_choice
    if config.thinking != "pass-through":
    prepared["thinking"] = {"type": config.thinking}
-
+    thinking_enabled = config.thinking == "enabled"
-    thinking = prepared.get("thinking")
+    thinking_disabled = config.thinking == "disabled"
    thinking_enabled = isinstance(thinking, dict) and thinking.get("type") == "enabled"
    thinking_disabled = (
        isinstance(thinking, dict) and thinking.get("type") == "disabled"
    )
    if thinking_enabled:
        prepared["reasoning_effort"] = normalize_reasoning_effort(
            prepared.get("reasoning_effort") or config.reasoning_effort
@ -797,12 +838,12 @@ def prepare_upstream_request(
            keep_reasoning=not thinking_disabled,
        )
        reasoning_diagnostics.extend(latest_diagnostics)
    prepared["messages"] = messages
    active_record_response_scope = conversation_scope(messages, cache_namespace)
    record_response_contexts = response_recording_contexts(
        (record_response_scope, record_response_messages),
        (active_record_response_scope, messages),
    )
    prepared["messages"] = strip_recovery_notice_for_upstream(messages)
    return PreparedRequest(
        payload=prepared,
@ -874,6 +915,8 @@ def rewrite_response_body(
    scope: str | None = None,
    prior_messages: list[dict[str, Any]] | None = None,
    recording_contexts: list[tuple[str, list[dict[str, Any]]]] | None = None,
    display_reasoning: bool = False,
    collapsible_reasoning: bool = True,
 ) -> bytes:
    response_payload = json.loads(body.decode("utf-8"))
    if isinstance(response_payload, dict):
@ -888,6 +931,8 @@ def rewrite_response_body(
            prior_messages=prior_messages,
            recording_contexts=recording_contexts,
        )
        if display_reasoning:
            fold_reasoning_into_content(response_payload, collapsible_reasoning)
        if "model" in response_payload:
            response_payload["model"] = original_model
    return json.dumps(
--- a/tests/test_config.py
+++ b/tests/test_config.py
@ -121,7 +121,7 @@ class ConfigTests(unittest.TestCase):
                    [
                        "base_url: https://example.com/v1/",
                        "model: deepseek-v4-flash",
-                        "thinking: pass_through",
+                        "thinking: disabled",
                        "reasoning_effort: max",
                        "port: 9100",
                        "host: 0.0.0.0",
@ -145,7 +145,7 @@ class ConfigTests(unittest.TestCase):
        self.assertEqual(config.upstream_base_url, "https://example.com/v1")
        self.assertEqual(config.upstream_model, "deepseek-v4-flash")
-        self.assertEqual(config.thinking, "pass-through")
+        self.assertEqual(config.thinking, "disabled")
        self.assertEqual(config.reasoning_effort, "max")
        self.assertEqual(config.host, "0.0.0.0")
        self.assertEqual(config.port, 9100)
--- a/tests/test_live_deepseek_cursor_proxy.py
+++ b/tests/test_live_deepseek_cursor_proxy.py
--- a/tests/test_protocol.py
+++ b/tests/test_protocol.py
--- a/tests/test_proxy_end_to_end.py
+++ b/tests/test_proxy_end_to_end.py
--- a/tests/test_server.py
+++ b/tests/test_server.py
@ -1,24 +1,45 @@
 """Server boundary, CLI, and operational tests.
 Pure helper tests (gzip, summarize) and stub-handler tests (client
 disconnect) live near the top. The bottom of the file boots a real proxy +
 tiny upstream to exercise things that need the HTTP layer: bearer token
 forwarding, oversized body, missing-bearer rejection, logging modes, and
 streaming connection close.
 """
 from __future__ import annotations
 from dataclasses import replace
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
 from io import BytesIO
 import gzip
 import json
 from pathlib import Path
 import threading
 import time
 from types import SimpleNamespace
 import unittest
 import zlib
 from urllib.error import HTTPError
 from urllib.request import Request, urlopen
 from deepseek_cursor_proxy.config import ProxyConfig
 from deepseek_cursor_proxy.reasoning_store import ReasoningStore
 from deepseek_cursor_proxy.server import (
    DeepSeekProxyHandler,
    DeepSeekProxyServer,
    build_arg_parser,
    read_response_body,
    summarize_chat_payload,
 )
-class FakeResponse:
+# ---------------------------------------------------------------------------
 # Stubs for fast in-process tests of internal handler methods
 # ---------------------------------------------------------------------------
 class _FakeResponse:
    def __init__(self, body: bytes, encoding: str = "", status: int = 200) -> None:
        self._body = BytesIO(body)
        self.headers = {"Content-Encoding": encoding} if encoding else {}
@ -28,7 +49,7 @@ class FakeResponse:
        return self._body.read()
-class FakeStreamingResponse:
+class _FakeStreamingResponse:
    status = 200
    headers = {"Content-Type": "text/event-stream"}
@ -43,7 +64,7 @@ class FakeStreamingResponse:
        return self._lines.pop(0)
-class FailingStreamingResponse:
+class _FailingStreamingResponse:
    status = 200
    headers = {"Content-Type": "text/event-stream"}
@ -51,7 +72,7 @@ class FailingStreamingResponse:
        raise OSError("record layer failure")
-class BrokenPipeWfile:
+class _BrokenPipeWfile:
    def write(self, body: bytes) -> None:
        raise BrokenPipeError("test disconnect")
@ -59,10 +80,10 @@ class BrokenPipeWfile:
        raise BrokenPipeError("test disconnect")
-def make_proxy_handler(wfile: object) -> DeepSeekProxyHandler:
+def _make_handler_stub(wfile: object, **config: object) -> DeepSeekProxyHandler:
    handler = object.__new__(DeepSeekProxyHandler)
    handler.server = SimpleNamespace(
-        config=ProxyConfig(),
+        config=ProxyConfig(**config),
        reasoning_store=ReasoningStore(":memory:"),
    )
    handler.wfile = wfile
@ -73,8 +94,13 @@ def make_proxy_handler(wfile: object) -> DeepSeekProxyHandler:
    return handler
-class ServerTests(unittest.TestCase):
+# ---------------------------------------------------------------------------
-    def test_cli_boolean_overrides_have_on_and_off_forms(self) -> None:
+# CLI / pure helpers
 # ---------------------------------------------------------------------------
 class CliAndHelperTests(unittest.TestCase):
    def test_cli_boolean_flags_have_on_and_off_forms(self) -> None:
        args = build_arg_parser().parse_args(
            [
                "--no-ngrok",
@ -86,7 +112,6 @@ class ServerTests(unittest.TestCase):
                "/tmp/dcp-traces",
            ]
        )
        self.assertFalse(args.ngrok)
        self.assertFalse(args.verbose)
        self.assertFalse(args.display_reasoning)
@ -94,19 +119,17 @@ class ServerTests(unittest.TestCase):
        self.assertTrue(args.cors)
        self.assertEqual(args.trace_dir, Path("/tmp/dcp-traces"))
-    def test_read_response_body_handles_gzip(self) -> None:
+    def test_read_response_body_decodes_gzip_and_deflate(self) -> None:
        body = gzip.compress(b'{"ok":true}')
        self.assertEqual(read_response_body(FakeResponse(body, "gzip")), b'{"ok":true}')
    def test_read_response_body_handles_deflate(self) -> None:
        body = zlib.compress(b'{"ok":true}')
        self.assertEqual(
-            read_response_body(FakeResponse(body, "deflate")), b'{"ok":true}'
+            read_response_body(_FakeResponse(gzip.compress(b'{"ok":1}'), "gzip")),
            b'{"ok":1}',
        )
        self.assertEqual(
            read_response_body(_FakeResponse(zlib.compress(b'{"ok":1}'), "deflate")),
            b'{"ok":1}',
        )
-    def test_summarize_chat_payload_does_not_include_message_content(self) -> None:
+    def test_summarize_chat_payload_omits_message_content(self) -> None:
        summary = summarize_chat_payload(
            {
                "model": "deepseek-v4-pro",
@ -116,18 +139,22 @@ class ServerTests(unittest.TestCase):
                "tool_choice": "auto",
            }
        )
        self.assertIn("model='deepseek-v4-pro'", summary)
        self.assertIn("stream=True", summary)
        self.assertIn("messages=1", summary)
        self.assertIn("tools=1", summary)
        self.assertNotIn("secret prompt", summary)
 # ---------------------------------------------------------------------------
 # Client-disconnect / upstream-failure stubs (no real HTTP needed)
 # ---------------------------------------------------------------------------
 class HandlerStubTests(unittest.TestCase):
    def test_regular_response_handles_client_disconnect(self) -> None:
-        handler = make_proxy_handler(BrokenPipeWfile())
+        handler = _make_handler_stub(_BrokenPipeWfile())
        body = json.dumps(
            {
-                "id": "chatcmpl-test",
+                "id": "x",
                "object": "chat.completion",
                "model": "deepseek-v4-pro",
                "choices": [
@ -139,116 +166,324 @@ class ServerTests(unittest.TestCase):
                ],
            }
        ).encode("utf-8")
        try:
            with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
-                sent = handler._proxy_regular_response(
+                result = handler._proxy_regular_response(
-                    FakeResponse(body),
+                    _FakeResponse(body),
                    "deepseek-v4-pro",
                    [{"role": "user", "content": "hi"}],
-                    "cache-namespace",
+                    "ns",
                )
        finally:
            handler.server.reasoning_store.close()
-
+        self.assertFalse(result.sent)
        self.assertFalse(sent.sent)
        self.assertIn("sending upstream response body", "\n".join(captured.output))
    def test_streaming_response_stops_on_client_disconnect(self) -> None:
-        handler = make_proxy_handler(BrokenPipeWfile())
+        handler = _make_handler_stub(_BrokenPipeWfile())
        chunk = {
-            "id": "chatcmpl-stream",
+            "id": "stream",
            "model": "deepseek-v4-pro",
-            "choices": [
+            "choices": [{"index": 0, "delta": {"role": "assistant", "content": "hi"}}],
                {
                    "index": 0,
                    "delta": {"role": "assistant", "content": "hello"},
        }
-            ],
+        response = _FakeStreamingResponse(
        }
        response = FakeStreamingResponse(
            [
                f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
                b"data: [DONE]\n\n",
            ]
        )
        try:
            with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
-                sent = handler._proxy_streaming_response(
+                result = handler._proxy_streaming_response(
                    response,
                    "deepseek-v4-pro",
                    [{"role": "user", "content": "hi"}],
-                    "cache-namespace",
+                    "ns",
                )
        finally:
            handler.server.reasoning_store.close()
-
+        self.assertFalse(result.sent)
        self.assertFalse(sent.sent)
        self.assertEqual(response.readline_calls, 1)
        self.assertIn("sending streaming response chunk", "\n".join(captured.output))
    def test_streaming_response_handles_upstream_read_failure(self) -> None:
-        handler = make_proxy_handler(BytesIO())
+        handler = _make_handler_stub(BytesIO())
        try:
            with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
-                sent = handler._proxy_streaming_response(
+                result = handler._proxy_streaming_response(
-                    FailingStreamingResponse(),
+                    _FailingStreamingResponse(),
                    "deepseek-v4-pro",
                    [{"role": "user", "content": "hi"}],
-                    "cache-namespace",
+                    "ns",
                )
        finally:
            handler.server.reasoning_store.close()
-
+        self.assertFalse(result.sent)
        self.assertFalse(sent.sent)
        self.assertIn(
-            "upstream streaming response read failed",
+            "upstream streaming response read failed", "\n".join(captured.output)
            "\n".join(captured.output),
        )
-    def test_collapsible_reasoning_has_no_effect_when_display_is_disabled(
+    def test_collapsible_reasoning_no_effect_when_display_disabled(self) -> None:
        self,
    ) -> None:
        wfile = BytesIO()
-        handler = make_proxy_handler(wfile)
+        handler = _make_handler_stub(
-        handler.server.config = ProxyConfig(
+            wfile, display_reasoning=False, collapsible_reasoning=True
            display_reasoning=False,
            collapsible_reasoning=True,
        )
        chunk = {
-            "id": "chatcmpl-stream",
+            "id": "stream",
            "model": "deepseek-v4-pro",
-            "choices": [
+            "choices": [{"index": 0, "delta": {"reasoning_content": "Need context."}}],
                {
                    "index": 0,
                    "delta": {"reasoning_content": "Need context."},
        }
-            ],
+        response = _FakeStreamingResponse(
        }
        response = FakeStreamingResponse(
            [
                f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
                b"data: [DONE]\n\n",
            ]
        )
        try:
-            sent = handler._proxy_streaming_response(
+            handler._proxy_streaming_response(
                response,
                "deepseek-v4-pro",
                [{"role": "user", "content": "hi"}],
-                "cache-namespace",
+                "ns",
            )
        finally:
            handler.server.reasoning_store.close()
        body = wfile.getvalue().decode("utf-8")
        self.assertTrue(sent.sent)
        self.assertIn("reasoning_content", body)
        self.assertNotIn("<details>", body)
-        self.assertNotIn("<think>", body)
+
 # ---------------------------------------------------------------------------
 # HTTP-level boundary tests: real proxy + tiny upstream
 # ---------------------------------------------------------------------------
 class _PlainFakeUpstream(BaseHTTPRequestHandler):
    """Returns a fixed plain response and records every request."""
    requests: list[dict[str, object]] = []
    auth_headers: list[str] = []
    delay_after_done: float = 0.0
    response: dict[str, object] = {}
    def log_message(self, fmt: str, *args: object) -> None:
        return
    def do_POST(self) -> None:
        length = int(self.headers.get("Content-Length") or 0)
        payload = json.loads(self.rfile.read(length).decode("utf-8"))
        self.__class__.requests.append(payload)
        self.__class__.auth_headers.append(self.headers.get("Authorization", ""))
        if payload.get("stream"):
            self.send_response(200)
            self.send_header("Content-Type", "text/event-stream")
            self.end_headers()
            self.wfile.write(
                b'data: {"choices":[{"index":0,"delta":{"content":"x"}}]}\n\n'
            )
            self.wfile.write(b"data: [DONE]\n\n")
            self.wfile.flush()
            if self.__class__.delay_after_done:
                time.sleep(self.__class__.delay_after_done)
            return
        body = json.dumps(self.__class__.response).encode("utf-8")
        self.send_response(200)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)
 _BASE_RESPONSE: dict[str, object] = {
    "id": "x",
    "object": "chat.completion",
    "created": 1,
    "model": "deepseek-v4-pro",
    "choices": [
        {
            "index": 0,
            "finish_reason": "stop",
            "message": {"role": "assistant", "content": "ok"},
        }
    ],
    "usage": {
        "prompt_tokens": 20,
        "completion_tokens": 5,
        "total_tokens": 25,
        "prompt_cache_hit_tokens": 12,
        "prompt_cache_miss_tokens": 8,
        "completion_tokens_details": {"reasoning_tokens": 3},
    },
 }
 class _Fixture:
    def __init__(self, server: ThreadingHTTPServer) -> None:
        self.server = server
        self.thread = threading.Thread(target=server.serve_forever, daemon=True)
        self.thread.start()
    @property
    def url(self) -> str:
        host, port = self.server.server_address
        return f"http://{host}:{port}"
    def close(self) -> None:
        self.server.shutdown()
        self.server.server_close()
        self.thread.join(timeout=5)
 def _post(url: str, payload: dict, api_key: str = "sk-test") -> tuple[int, dict]:
    request = Request(
        url,
        data=json.dumps(payload).encode("utf-8"),
        method="POST",
        headers={
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json",
        },
    )
    try:
        with urlopen(request, timeout=5) as response:
            return response.status, json.loads(response.read().decode("utf-8"))
    except HTTPError as exc:
        return exc.code, json.loads(exc.read().decode("utf-8"))
 class HttpBoundaryTests(unittest.TestCase):
    """Real-HTTP tests that don't fit the protocol suite: things the proxy
    must do at the HTTP boundary regardless of what DeepSeek answers."""
    def setUp(self) -> None:
        _PlainFakeUpstream.requests = []
        _PlainFakeUpstream.auth_headers = []
        _PlainFakeUpstream.delay_after_done = 0.0
        _PlainFakeUpstream.response = dict(_BASE_RESPONSE)
        self.upstream = _Fixture(
            ThreadingHTTPServer(("127.0.0.1", 0), _PlainFakeUpstream)
        )
        self.store = ReasoningStore(":memory:")
        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
        proxy.config = ProxyConfig(
            upstream_base_url=self.upstream.url,
            upstream_model="deepseek-v4-pro",
            ngrok=False,
        )
        proxy.reasoning_store = self.store
        self.proxy = _Fixture(proxy)
    def tearDown(self) -> None:
        self.proxy.close()
        self.upstream.close()
        self.store.close()
    def _request(self) -> dict:
        return {
            "model": "deepseek-v4-pro",
            "messages": [{"role": "user", "content": "hi"}],
        }
    def test_rejects_missing_bearer_token(self) -> None:
        request = Request(
            f"{self.proxy.url}/v1/chat/completions",
            data=json.dumps(self._request()).encode("utf-8"),
            method="POST",
            headers={"Content-Type": "application/json"},
        )
        with self.assertRaises(HTTPError) as caught:
            urlopen(request, timeout=5)
        self.assertEqual(caught.exception.code, 401)
        self.assertEqual(_PlainFakeUpstream.requests, [])
    def test_rejects_oversized_request_body(self) -> None:
        self.proxy.server.config = replace(
            self.proxy.server.config, max_request_body_bytes=10
        )
        status, payload = _post(
            f"{self.proxy.url}/v1/chat/completions", self._request()
        )
        self.assertEqual(status, 413)
        self.assertIn("too large", payload["error"]["message"])
        self.assertEqual(_PlainFakeUpstream.requests, [])
    def test_forwards_bearer_token_to_upstream(self) -> None:
        status, _ = _post(
            f"{self.proxy.url}/v1/chat/completions",
            self._request(),
            api_key="sk-from-cursor",
        )
        self.assertEqual(status, 200)
        self.assertEqual(_PlainFakeUpstream.auth_headers[0], "Bearer sk-from-cursor")
    def test_streaming_response_closes_after_done_when_upstream_lingers(
        self,
    ) -> None:
        """Cursor relies on the proxy ending the SSE stream at [DONE], even
        if the upstream socket stays open."""
        _PlainFakeUpstream.delay_after_done = 2.0
        request = Request(
            f"{self.proxy.url}/v1/chat/completions",
            data=json.dumps(
                {
                    "model": "deepseek-v4-pro",
                    "stream": True,
                    "messages": [{"role": "user", "content": "stream"}],
                }
            ).encode("utf-8"),
            method="POST",
            headers={
                "Authorization": "Bearer sk-test",
                "Content-Type": "application/json",
            },
        )
        started = time.monotonic()
        with urlopen(request, timeout=1) as response:
            body = response.read().decode("utf-8")
        self.assertLess(time.monotonic() - started, 1.0)
        self.assertIn("data: [DONE]", body)
    def test_normal_logging_summarizes_without_bodies_or_keys(self) -> None:
        with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
            status, _ = _post(
                f"{self.proxy.url}/v1/chat/completions",
                self._request(),
                api_key="sk-from-cursor",
            )
            # `└ stats` is emitted on the handler thread *after* the response
            # body hits the socket, so the client may return before it lands.
            deadline = time.monotonic() + 2
            while time.monotonic() < deadline and not any(
                "└ stats" in record for record in captured.output
            ):
                time.sleep(0.01)
        output = "\n".join(captured.output)
        self.assertEqual(status, 200)
        # Single-line stage records keep the log readable.
        for marker in ("┌ cursor", "├ context", "├ send", "└ stats"):
            self.assertIn(marker, output)
        self.assertNotIn("hi", output.split("┌ cursor")[1].split("\n")[0])
        self.assertNotIn("sk-from-cursor", output)
    def test_verbose_logging_includes_bodies_but_redacts_api_key(self) -> None:
        self.proxy.server.config = replace(self.proxy.server.config, verbose=True)
        with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
            _post(
                f"{self.proxy.url}/v1/chat/completions",
                self._request(),
                api_key="sk-from-cursor",
            )
        output = "\n".join(captured.output)
        self.assertIn("cursor request body", output)
        self.assertIn("upstream request body", output)
        self.assertNotIn("sk-from-cursor", output)
    def test_healthz_returns_ok(self) -> None:
        with urlopen(f"{self.proxy.url}/healthz", timeout=2) as response:
            self.assertEqual(response.status, 200)
            self.assertEqual(json.loads(response.read())["ok"], True)
 if __name__ == "__main__":
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@ -6,6 +6,7 @@ from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_s
 from deepseek_cursor_proxy.streaming import (
    CursorReasoningDisplayAdapter,
    StreamAccumulator,
    fold_reasoning_into_content,
 )
@ -430,5 +431,44 @@ class CursorReasoningDisplayAdapterTests(unittest.TestCase):
        self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
 class FoldReasoningTests(unittest.TestCase):
    def test_fold_reasoning_into_non_streaming_content(self) -> None:
        """Non-streaming responses mirror reasoning_content into a visible
        <details> block, matching the streaming layout."""
        payload = {
            "choices": [
                {
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": "answer",
                        "reasoning_content": "thinking",
                    },
                }
            ]
        }
        fold_reasoning_into_content(payload, collapsible=True)
        self.assertEqual(
            payload["choices"][0]["message"]["content"],
            "<details>\n<summary>Thinking</summary>\n\nthinking\n</details>\n\nanswer",
        )
    def test_fold_reasoning_skips_empty_reasoning(self) -> None:
        payload = {
            "choices": [
                {
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": "answer",
                        "reasoning_content": "",
                    },
                }
            ]
        }
        fold_reasoning_into_content(payload, collapsible=True)
        self.assertEqual(payload["choices"][0]["message"]["content"], "answer")
 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_trace.py
+++ b/tests/test_trace.py
@ -1,14 +1,25 @@
 """Trace writer tests, both as a unit (writes/redacts files) and integrated
 through the proxy (captures real request flow on disk)."""
 from __future__ import annotations
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
 import json
 from pathlib import Path
 import stat
 import threading
 from tempfile import TemporaryDirectory
 import time
 import unittest
 from urllib.request import Request, urlopen
 from deepseek_cursor_proxy.config import ProxyConfig
 from deepseek_cursor_proxy.reasoning_store import ReasoningStore
 from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
 from deepseek_cursor_proxy.trace import TraceWriter
-class TraceWriterTests(unittest.TestCase):
+class TraceWriterUnitTests(unittest.TestCase):
    def test_writes_manifest_and_numbered_request_files(self) -> None:
        with TemporaryDirectory() as temp_dir:
            writer = TraceWriter(temp_dir)
@ -47,17 +58,244 @@ class TraceWriterTests(unittest.TestCase):
                headers={"Authorization": "Bearer sk-secret"},
            )
            trace.finish("completed", http_status=200)
-
+            serialized = trace.path.read_text(encoding="utf-8")
            payload = json.loads(trace.path.read_text(encoding="utf-8"))
            serialized = json.dumps(payload)
            self.assertNotIn("sk-secret", serialized)
            payload = json.loads(serialized)
            self.assertEqual(
-                payload["request"]["headers"]["Authorization"]["present"],
+                payload["request"]["headers"]["Authorization"]["present"], True
                True,
            )
            self.assertIn("sha256", payload["request"]["headers"]["Authorization"])
 # ---------------------------------------------------------------------------
 # Integration: trace writer attached to a running proxy.
 # ---------------------------------------------------------------------------
 class _CannedUpstream(BaseHTTPRequestHandler):
    """Returns a tool-call response for the first POST and a streamed
    reasoning response for the second."""
    requests: list[dict[str, object]] = []
    def log_message(self, fmt: str, *args: object) -> None:
        return
    def do_POST(self) -> None:
        length = int(self.headers.get("Content-Length") or 0)
        payload = json.loads(self.rfile.read(length).decode("utf-8"))
        self.__class__.requests.append(payload)
        if payload.get("stream"):
            self.send_response(200)
            self.send_header("Content-Type", "text/event-stream")
            self.end_headers()
            self.wfile.write(
                b'data: {"id":"s","object":"chat.completion.chunk","choices":'
                b'[{"index":0,"delta":{"role":"assistant","reasoning_content":"think"},'
                b'"finish_reason":null}]}\n\n'
            )
            self.wfile.write(
                b'data: {"id":"s","object":"chat.completion.chunk","choices":'
                b'[{"index":0,"delta":{"content":"answer"},"finish_reason":null}],'
                b'"usage":{"completion_tokens_details":{"reasoning_tokens":1}}}\n\n'
            )
            self.wfile.write(
                b'data: {"id":"s","object":"chat.completion.chunk",'
                b'"choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n'
            )
            self.wfile.write(b"data: [DONE]\n\n")
            self.wfile.flush()
            return
        body = json.dumps(
            {
                "id": "tool",
                "object": "chat.completion",
                "model": "deepseek-v4-pro",
                "choices": [
                    {
                        "index": 0,
                        "finish_reason": "tool_calls",
                        "message": {
                            "role": "assistant",
                            "content": "",
                            "reasoning_content": "I need the date.",
                            "tool_calls": [
                                {
                                    "id": "call_date",
                                    "type": "function",
                                    "function": {
                                        "name": "get_date",
                                        "arguments": "{}",
                                    },
                                }
                            ],
                        },
                    }
                ],
            }
        ).encode("utf-8")
        self.send_response(200)
        self.send_header("Content-Type", "application/json")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)
 class _Fixture:
    def __init__(self, server: ThreadingHTTPServer) -> None:
        self.server = server
        self.thread = threading.Thread(target=server.serve_forever, daemon=True)
        self.thread.start()
    @property
    def url(self) -> str:
        host, port = self.server.server_address
        return f"http://{host}:{port}"
    def close(self) -> None:
        self.server.shutdown()
        self.server.server_close()
        self.thread.join(timeout=5)
 def _read_single_trace(session_dir: Path) -> dict:
    deadline = time.monotonic() + 2
    files = sorted(session_dir.glob("request-*.json"))
    while not files and time.monotonic() < deadline:
        time.sleep(0.01)
        files = sorted(session_dir.glob("request-*.json"))
    if len(files) != 1:
        raise AssertionError(f"expected one trace, found {files}")
    return json.loads(files[0].read_text(encoding="utf-8"))
 class TraceIntegrationTests(unittest.TestCase):
    def setUp(self) -> None:
        _CannedUpstream.requests = []
        self.upstream = _Fixture(ThreadingHTTPServer(("127.0.0.1", 0), _CannedUpstream))
        self.store = ReasoningStore(":memory:")
        self.temp_dir = TemporaryDirectory()
        self.writer = TraceWriter(self.temp_dir.name)
        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
        proxy.config = ProxyConfig(
            upstream_base_url=self.upstream.url,
            upstream_model="deepseek-v4-pro",
            ngrok=False,
        )
        proxy.reasoning_store = self.store
        proxy.trace_writer = self.writer
        self.proxy = _Fixture(proxy)
    def tearDown(self) -> None:
        self.proxy.close()
        self.upstream.close()
        self.store.close()
        self.temp_dir.cleanup()
    def _post(self, payload: dict) -> dict:
        request = Request(
            f"{self.proxy.url}/v1/chat/completions",
            data=json.dumps(payload).encode("utf-8"),
            method="POST",
            headers={
                "Authorization": "Bearer sk-from-cursor",
                "Content-Type": "application/json",
            },
        )
        with urlopen(request, timeout=5) as response:
            return json.loads(response.read())
    def test_captures_non_streaming_replay_without_api_key(self) -> None:
        self._post(
            {
                "model": "deepseek-v4-pro",
                "messages": [{"role": "user", "content": "What is tomorrow's date?"}],
            }
        )
        trace = _read_single_trace(self.writer.session_dir)
        serialized = json.dumps(trace)
        self.assertEqual(trace["completion"]["status"], "completed")
        self.assertEqual(
            trace["request"]["body"]["messages"][0]["content"],
            "What is tomorrow's date?",
        )
        self.assertEqual(
            trace["upstream"]["response"]["body"]["json"]["choices"][0]["message"][
                "reasoning_content"
            ],
            "I need the date.",
        )
        self.assertNotIn("sk-from-cursor", serialized)
    def test_captures_streaming_replay_chunks(self) -> None:
        request = Request(
            f"{self.proxy.url}/v1/chat/completions",
            data=json.dumps(
                {
                    "model": "deepseek-v4-pro",
                    "stream": True,
                    "messages": [{"role": "user", "content": "stream"}],
                }
            ).encode("utf-8"),
            method="POST",
            headers={
                "Authorization": "Bearer sk-test",
                "Content-Type": "application/json",
            },
        )
        with urlopen(request, timeout=2) as response:
            response.read()
        trace = _read_single_trace(self.writer.session_dir)
        self.assertEqual(trace["completion"]["status"], "completed")
        self.assertIn(
            "reasoning_content",
            trace["upstream"]["stream"]["chunks"][0]["line"],
        )
        self.assertIn(
            "<details>", trace["cursor_response"]["stream"]["chunks"][0]["line"]
        )
    def test_captures_recovery_diagnostics(self) -> None:
        """A request that triggers cold-cache recovery records the recovery
        steps + diagnostic counters in the trace."""
        self._post(
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "user", "content": "old"},
                    {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [
                            {
                                "id": "call_x",
                                "type": "function",
                                "function": {"name": "f", "arguments": "{}"},
                            }
                        ],
                    },
                    {"role": "tool", "tool_call_id": "call_x", "content": "result"},
                    {"role": "user", "content": "new"},
                ],
            }
        )
        trace = _read_single_trace(self.writer.session_dir)
        self.assertEqual(
            trace["transform"]["recovery_steps"][0]["strategy"], "latest_user"
        )
        self.assertGreaterEqual(
            len(
                [
                    item
                    for item in trace["transform"]["reasoning_diagnostics"]
                    if item["missing"]
                ]
            ),
            1,
        )
 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_transform.py
+++ b/tests/test_transform.py