feat(proxy): mirror reasoning as think tags for cursor (#2)

2026-04-24 17:11:21 +08:00 · 2026-04-24 17:11:21 +08:00 · 1717331057
parent 27f332616a
commit 1717331057
10 changed files with 481 additions and 15 deletions
--- a/.env.example
+++ b/.env.example
@ -10,6 +10,7 @@ DEEPSEEK_MODEL=deepseek-v4-pro
 DEEPSEEK_BASE_URL=https://api.deepseek.com
 DEEPSEEK_THINKING=enabled
 DEEPSEEK_REASONING_EFFORT=high
 CURSOR_DISPLAY_REASONING=true
 PROXY_HOST=127.0.0.1
 PROXY_PORT=9000
--- a/README.md
+++ b/README.md
@ -2,11 +2,19 @@
 A simple proxy that caches and restores DeepSeek `reasoning_content` across tool-call turns in Cursor, making thinking models like `deepseek-v4-pro` and `deepseek-v4-flash` work correctly.
 ## What It Does
 - Caches DeepSeek `reasoning_content` from regular and streamed responses, then restores it on later tool-call turns when Cursor omits it.
 - Mirrors streamed `reasoning_content` into Cursor-visible `<think>...</think>` text so thinking tokens are shown in Cursor BYOK/proxy chats. Cursor currently renders this as normal chat text, not as a native collapsible Thinking block.
 - Provides other compatibility fixes for running Cursor with the DeepSeek official API.
 ## Why This Exists
 DeepSeek thinking mode returns `reasoning_content` separately from final `content`. After an assistant turn with tool calls, DeepSeek requires that same `reasoning_content` to be sent back in later requests. Cursor can omit it in custom OpenAI-compatible flows, causing `The reasoning_content in the thinking mode must be passed back to the API.` This proxy caches reasoning by conversation prefix, message signature, and tool-call IDs, then restores it before forwarding to DeepSeek.
-Thi repo fixes the following error:
+For streamed responses, the proxy also mirrors DeepSeek `reasoning_content` into Cursor-visible `<think>...</think>` content while leaving the original `reasoning_content` field intact. This lets Cursor display the thinking text in OpenAI-compatible BYOK/proxy flows, and the proxy strips those display-only tags from later assistant history before replaying it to DeepSeek.
 This repo fixes the following error:
 ![Error 400 - reasoning_content must be passed back](assets/error_400.png)
@ -41,6 +49,7 @@ Edit `~/.deepseek-cursor-proxy/.env`:
 ```bash
 DEEPSEEK_API_KEY=sk-your-deepseek-key
 PROXY_API_KEY=cursor-local-token
 CURSOR_DISPLAY_REASONING=true
 ```
 Keep `PROXY_API_KEY` set when using ngrok because the proxy will be reachable from the public internet.
@ -91,6 +100,12 @@ Run without ngrok for local curl testing:
 PROXY_NGROK=false deepseek-cursor-proxy --port 9000 --verbose
 ```
 Disable the Cursor display mirror if you only want raw OpenAI-compatible response fields:
 ```bash
 CURSOR_DISPLAY_REASONING=false deepseek-cursor-proxy --verbose
 ```
 Log full request bodies only when needed:
 ```bash
--- a/src/deepseek_cursor_proxy/config.py
+++ b/src/deepseek_cursor_proxy/config.py
@ -129,6 +129,7 @@ class ProxyConfig:
    reasoning_effort: str = "high"
    request_timeout: float = 300.0
    reasoning_content_path: Path = field(default_factory=default_reasoning_content_path)
    cursor_display_reasoning: bool = True
    verbose: bool = False
    log_bodies: bool = False
    ngrok: bool = False
@ -167,6 +168,7 @@ class ProxyConfig:
                ("REASONING_CONTENT_PATH",),
                default_reasoning_content_path(),
            ),
            cursor_display_reasoning=env_bool(values, "CURSOR_DISPLAY_REASONING", True),
            verbose=env_bool(values, "PROXY_VERBOSE", False),
            log_bodies=env_bool(values, "PROXY_LOG_BODIES", False),
            ngrok=env_bool(values, "PROXY_NGROK", False),
--- a/src/deepseek_cursor_proxy/server.py
+++ b/src/deepseek_cursor_proxy/server.py
@ -21,7 +21,7 @@ from .config import (
    default_reasoning_content_path,
 )
 from .reasoning_store import ReasoningStore, conversation_scope
-from .streaming import StreamAccumulator
+from .streaming import CursorReasoningDisplayAdapter, StreamAccumulator
 from .tunnel import NgrokTunnel, local_tunnel_target
 from .transform import prepare_upstream_request, rewrite_response_body
@ -319,16 +319,20 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
        self.close_connection = True
        accumulator = StreamAccumulator()
        display_adapter = (
            CursorReasoningDisplayAdapter()
            if self.config.cursor_display_reasoning
            else None
        )
        scope = conversation_scope(request_messages)
        finalized = False
        while True:
            line = response.readline()
            if not line:
                break
-            rewritten = self._rewrite_sse_line(line, original_model, accumulator, scope)
+            rewritten, finalized = self._rewrite_sse_line(
-            if rewritten is None:
+                line, original_model, accumulator, scope, display_adapter
-                finalized = True
+            )
                rewritten = b"data: [DONE]\n\n"
            self.wfile.write(rewritten)
            self.wfile.flush()
            if finalized:
@ -347,10 +351,11 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
        original_model: str,
        accumulator: StreamAccumulator,
        scope: str,
-    ) -> bytes | None:
+        display_adapter: CursorReasoningDisplayAdapter | None,
    ) -> tuple[bytes, bool]:
        stripped = line.strip()
        if not stripped.startswith(b"data:"):
-            return line
+            return line, False
        data = stripped[len(b"data:") :].strip()
        if data == b"[DONE]":
@ -359,15 +364,22 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
            stored = accumulator.store_reasoning(self.reasoning_store, scope)
            if stored:
                LOG.info("stored %s streaming reasoning cache key(s)", stored)
-            return None
+            if display_adapter is None:
                return b"data: [DONE]\n\n", True
            closing_chunk = display_adapter.flush_chunk(original_model)
            if closing_chunk is None:
                return b"data: [DONE]\n\n", True
            return sse_data(closing_chunk) + b"data: [DONE]\n\n", True
        try:
            chunk = json.loads(data.decode("utf-8"))
        except (json.JSONDecodeError, UnicodeDecodeError):
-            return line
+            return line, False
        if isinstance(chunk, dict):
            accumulator.ingest_chunk(chunk)
            if display_adapter is not None:
                display_adapter.rewrite_chunk(chunk)
            if "model" in chunk:
                chunk["model"] = original_model
            ending = b"\r\n" if line.endswith(b"\r\n") else b"\n"
@ -377,8 +389,8 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
                    "utf-8"
                )
                + ending
-            )
+            ), False
-        return line
+        return line, False
 def build_arg_parser() -> argparse.ArgumentParser:
@ -424,6 +436,11 @@ def build_arg_parser() -> argparse.ArgumentParser:
        action="store_true",
        help="Log normalized upstream request bodies",
    )
    parser.add_argument(
        "--no-cursor-display-reasoning",
        action="store_true",
        help="Do not mirror reasoning_content into Cursor-visible <think> content",
    )
    return parser
@ -448,6 +465,14 @@ def log_bytes(label: str, body: bytes) -> None:
    log_json(label, payload)
 def sse_data(payload: dict[str, Any]) -> bytes:
    return (
        b"data: "
        + json.dumps(payload, ensure_ascii=False, separators=(",", ":")).encode("utf-8")
        + b"\n\n"
    )
 def summarize_chat_payload(payload: dict[str, Any]) -> str:
    messages = payload.get("messages")
    tools = payload.get("tools")
@ -498,6 +523,8 @@ def main(argv: list[str] | None = None) -> int:
        updates["verbose"] = True
    if args.log_bodies:
        updates["log_bodies"] = True
    if args.no_cursor_display_reasoning:
        updates["cursor_display_reasoning"] = False
    if updates:
        config = replace(config, **updates)
@ -519,9 +546,10 @@ def main(argv: list[str] | None = None) -> int:
        config.upstream_model,
    )
    LOG.info(
-        "thinking=%s reasoning_effort=%s reasoning_content_path=%s",
+        "thinking=%s reasoning_effort=%s cursor_display_reasoning=%s reasoning_content_path=%s",
        config.thinking,
        config.reasoning_effort,
        config.cursor_display_reasoning,
        config.reasoning_content_path,
    )
    if config.verbose:
--- a/src/deepseek_cursor_proxy/streaming.py
+++ b/src/deepseek_cursor_proxy/streaming.py
@ -1,11 +1,16 @@
 from __future__ import annotations
 from dataclasses import dataclass, field
 import time
 from typing import Any
 from .reasoning_store import ReasoningStore
 THINKING_BLOCK_START = "<think>\n"
 THINKING_BLOCK_END = "\n</think>\n\n"
@dataclass
 class StreamingChoice:
    role: str = "assistant"
@ -109,3 +114,80 @@ class StreamAccumulator:
                function["arguments"] = (function.get("arguments") or "") + str(
                    function_delta["arguments"]
                )
 class CursorReasoningDisplayAdapter:
    """Mirror reasoning_content into content for Cursor's visible thinking UI path."""
    def __init__(self) -> None:
        self._open_choices: set[int] = set()
        self._last_chunk_metadata: dict[str, Any] = {}
    def rewrite_chunk(self, chunk: dict[str, Any]) -> None:
        self._remember_chunk_metadata(chunk)
        choices = chunk.get("choices")
        if not isinstance(choices, list):
            return
        for raw_choice in choices:
            if not isinstance(raw_choice, dict):
                continue
            index = int(raw_choice.get("index") or 0)
            delta = raw_choice.get("delta")
            if not isinstance(delta, dict):
                delta = {}
                raw_choice["delta"] = delta
            mirrored_parts: list[str] = []
            reasoning_content = delta.get("reasoning_content")
            if isinstance(reasoning_content, str) and reasoning_content:
                if index not in self._open_choices:
                    mirrored_parts.append(THINKING_BLOCK_START)
                    self._open_choices.add(index)
                mirrored_parts.append(reasoning_content)
            existing_content = delta.get("content")
            should_close = index in self._open_choices and (
                bool(existing_content)
                or bool(delta.get("tool_calls"))
                or raw_choice.get("finish_reason") is not None
            )
            if should_close:
                mirrored_parts.append(THINKING_BLOCK_END)
                self._open_choices.discard(index)
            if not mirrored_parts:
                continue
            if isinstance(existing_content, str):
                mirrored_parts.append(existing_content)
            delta["content"] = "".join(mirrored_parts)
    def flush_chunk(self, model: str) -> dict[str, Any] | None:
        if not self._open_choices:
            return None
        choices = [
            {
                "index": index,
                "delta": {"content": THINKING_BLOCK_END},
                "finish_reason": None,
            }
            for index in sorted(self._open_choices)
        ]
        self._open_choices.clear()
        chunk: dict[str, Any] = {
            "id": self._last_chunk_metadata.get("id", "chatcmpl-reasoning-close"),
            "object": self._last_chunk_metadata.get("object", "chat.completion.chunk"),
            "created": self._last_chunk_metadata.get("created", int(time.time())),
            "model": model,
            "choices": choices,
        }
        return chunk
    def _remember_chunk_metadata(self, chunk: dict[str, Any]) -> None:
        metadata = {
            key: chunk[key] for key in ("id", "object", "created") if key in chunk
        }
        if metadata:
            self._last_chunk_metadata.update(metadata)
--- a/src/deepseek_cursor_proxy/transform.py
+++ b/src/deepseek_cursor_proxy/transform.py
@ -2,6 +2,7 @@ from __future__ import annotations
 from dataclasses import dataclass
 import json
 import re
 from typing import Any
 from .config import ProxyConfig
@ -60,6 +61,11 @@ EFFORT_ALIASES = {
    "xhigh": "max",
 }
 CURSOR_THINKING_BLOCK_RE = re.compile(
    r"<(?:think|thinking)>[\s\S]*?(?:</(?:think|thinking)>|$)\s*",
    re.IGNORECASE,
 )
@dataclass(frozen=True)
 class PreparedRequest:
@ -102,6 +108,10 @@ def extract_text_content(content: Any) -> str | None:
    return str(content)
 def strip_cursor_thinking_blocks(content: str) -> str:
    return CURSOR_THINKING_BLOCK_RE.sub("", content).lstrip("\r\n")
 def normalize_tool_call(tool_call: Any) -> dict[str, Any]:
    if not isinstance(tool_call, dict):
        tool_call = {}
@ -190,6 +200,8 @@ def normalize_message(
        normalized["content"] = extract_text_content(normalized["content"]) or ""
    elif normalized["role"] in {"assistant", "tool", "system", "user"}:
        normalized["content"] = ""
    if normalized["role"] == "assistant" and isinstance(normalized.get("content"), str):
        normalized["content"] = strip_cursor_thinking_blocks(normalized["content"])
    if normalized.get("tool_calls"):
        normalized["tool_calls"] = [
--- a/tests/test_config.py
+++ b/tests/test_config.py
@ -109,6 +109,17 @@ class ConfigTests(unittest.TestCase):
        self.assertTrue(config.log_bodies)
        self.assertTrue(config.ngrok)
    def test_cursor_reasoning_display_can_be_disabled_from_env(self) -> None:
        config = ProxyConfig.from_env(
            env={
                "DEEPSEEK_API_KEY": "key",
                "CURSOR_DISPLAY_REASONING": "false",
            },
            env_file_path=Path("/does/not/exist"),
        )
        self.assertFalse(config.cursor_display_reasoning)
    def test_config_path_can_be_overridden_from_environment(self) -> None:
        with TemporaryDirectory() as temp_dir:
            first_env_path = Path(temp_dir) / "first.env"
--- a/tests/test_proxy_end_to_end.py
+++ b/tests/test_proxy_end_to_end.py
@ -9,7 +9,11 @@ from urllib.error import HTTPError
 from urllib.request import Request, urlopen
 from deepseek_cursor_proxy.config import ProxyConfig
-from deepseek_cursor_proxy.reasoning_store import ReasoningStore
+from deepseek_cursor_proxy.reasoning_store import (
    ReasoningStore,
    conversation_scope,
    message_signature,
 )
 from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
@ -184,6 +188,68 @@ class SlowAfterDoneStreamingDeepSeekHandler(BaseHTTPRequestHandler):
        time.sleep(2)
 class ReasoningStreamingDeepSeekHandler(BaseHTTPRequestHandler):
    def log_message(self, fmt: str, *args: object) -> None:
        return
    def do_POST(self) -> None:
        self.send_response(200)
        self.send_header("Content-Type", "text/event-stream")
        self.end_headers()
        chunks = [
            {
                "id": "chatcmpl-stream",
                "object": "chat.completion.chunk",
                "created": 1,
                "model": "deepseek-v4-pro",
                "choices": [
                    {
                        "index": 0,
                        "delta": {"role": "assistant", "reasoning_content": "Need "},
                        "finish_reason": None,
                    }
                ],
            },
            {
                "id": "chatcmpl-stream",
                "object": "chat.completion.chunk",
                "created": 1,
                "model": "deepseek-v4-pro",
                "choices": [
                    {
                        "index": 0,
                        "delta": {"reasoning_content": "context."},
                        "finish_reason": None,
                    }
                ],
            },
            {
                "id": "chatcmpl-stream",
                "object": "chat.completion.chunk",
                "created": 1,
                "model": "deepseek-v4-pro",
                "choices": [
                    {
                        "index": 0,
                        "delta": {"content": FINAL_CONTENT},
                        "finish_reason": None,
                    }
                ],
            },
            {
                "id": "chatcmpl-stream",
                "object": "chat.completion.chunk",
                "created": 1,
                "model": "deepseek-v4-pro",
                "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
            },
        ]
        for chunk in chunks:
            self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode("utf-8"))
        self.wfile.write(b"data: [DONE]\n\n")
        self.wfile.flush()
 def tool_call_response() -> dict:
    return {
        "id": "chatcmpl-tool",
@ -555,6 +621,79 @@ class StreamingProxyTests(unittest.TestCase):
        self.assertIn("data: [DONE]", body)
 class ReasoningStreamingProxyTests(unittest.TestCase):
    def setUp(self) -> None:
        self.upstream = ServerFixture(
            ThreadingHTTPServer(("127.0.0.1", 0), ReasoningStreamingDeepSeekHandler)
        ).start()
        self.store = ReasoningStore(":memory:")
        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
        proxy.config = ProxyConfig(
            upstream_api_key="upstream-key",
            proxy_api_key="cursor-local-token",
            upstream_base_url=self.upstream.url,
            upstream_model="deepseek-v4-pro",
        )
        proxy.reasoning_store = self.store
        self.proxy = ServerFixture(proxy).start()
    def tearDown(self) -> None:
        self.proxy.close()
        self.upstream.close()
        self.store.close()
    def test_streaming_proxy_mirrors_reasoning_for_cursor_display(
        self,
    ) -> None:
        request_messages = [{"role": "user", "content": "stream reasoning"}]
        request = Request(
            f"{self.proxy.url}/v1/chat/completions",
            data=json.dumps(
                {
                    "model": "deepseek-v4-pro",
                    "stream": True,
                    "messages": request_messages,
                }
            ).encode("utf-8"),
            method="POST",
            headers={
                "Authorization": "Bearer cursor-local-token",
                "Content-Type": "application/json",
            },
        )
        with urlopen(request, timeout=2) as response:
            body = response.read().decode("utf-8")
        chunks = [
            json.loads(line.removeprefix("data: "))
            for line in body.splitlines()
            if line.startswith("data: {")
        ]
        self.assertEqual(chunks[0]["choices"][0]["delta"]["content"], "<think>\nNeed ")
        self.assertEqual(chunks[0]["choices"][0]["delta"]["reasoning_content"], "Need ")
        self.assertEqual(chunks[1]["choices"][0]["delta"]["content"], "context.")
        self.assertEqual(
            chunks[2]["choices"][0]["delta"]["content"],
            "\n</think>\n\n" + FINAL_CONTENT,
        )
        stored_message = {
            "role": "assistant",
            "content": FINAL_CONTENT,
            "reasoning_content": "Need context.",
        }
        self.assertEqual(
            self.store.get(
                "scope:"
                + conversation_scope(request_messages)
                + ":signature:"
                + message_signature(stored_message)
            ),
            "Need context.",
        )
 def first_cursor_request() -> dict:
    return {
        "model": "deepseek-v4-pro",
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@ -3,7 +3,10 @@ from __future__ import annotations
 import unittest
 from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_scope
-from deepseek_cursor_proxy.streaming import StreamAccumulator
+from deepseek_cursor_proxy.streaming import (
    CursorReasoningDisplayAdapter,
    StreamAccumulator,
 )
 class StreamAccumulatorTests(unittest.TestCase):
@ -103,5 +106,101 @@ class StreamAccumulatorTests(unittest.TestCase):
        )
 class CursorReasoningDisplayAdapterTests(unittest.TestCase):
    def test_mirrors_reasoning_content_into_think_tagged_content(self) -> None:
        adapter = CursorReasoningDisplayAdapter()
        reasoning_chunk = {
            "id": "chatcmpl-stream",
            "object": "chat.completion.chunk",
            "created": 1,
            "model": "deepseek-v4-pro",
            "choices": [
                {
                    "index": 0,
                    "delta": {"reasoning_content": "Need context."},
                    "finish_reason": None,
                }
            ],
        }
        answer_chunk = {
            "choices": [
                {
                    "index": 0,
                    "delta": {"content": "Final answer."},
                    "finish_reason": None,
                }
            ],
        }
        adapter.rewrite_chunk(reasoning_chunk)
        adapter.rewrite_chunk(answer_chunk)
        reasoning_delta = reasoning_chunk["choices"][0]["delta"]
        answer_delta = answer_chunk["choices"][0]["delta"]
        self.assertEqual(reasoning_delta["reasoning_content"], "Need context.")
        self.assertEqual(reasoning_delta["content"], "<think>\nNeed context.")
        self.assertEqual(answer_delta["content"], "\n</think>\n\nFinal answer.")
    def test_closes_thinking_block_before_tool_calls(self) -> None:
        adapter = CursorReasoningDisplayAdapter()
        adapter.rewrite_chunk(
            {
                "choices": [
                    {
                        "index": 0,
                        "delta": {"reasoning_content": "Need a tool."},
                    }
                ]
            }
        )
        tool_chunk = {
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "tool_calls": [
                            {
                                "index": 0,
                                "id": "call_1",
                                "type": "function",
                                "function": {"name": "lookup", "arguments": "{}"},
                            }
                        ]
                    },
                }
            ]
        }
        adapter.rewrite_chunk(tool_chunk)
        self.assertEqual(tool_chunk["choices"][0]["delta"]["content"], "\n</think>\n\n")
    def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None:
        adapter = CursorReasoningDisplayAdapter()
        adapter.rewrite_chunk(
            {
                "id": "chatcmpl-stream",
                "object": "chat.completion.chunk",
                "created": 1,
                "choices": [
                    {
                        "index": 0,
                        "delta": {"reasoning_content": "Still thinking."},
                    }
                ],
            }
        )
        closing_chunk = adapter.flush_chunk("deepseek-v4-pro")
        self.assertIsNotNone(closing_chunk)
        assert closing_chunk is not None
        self.assertEqual(closing_chunk["model"], "deepseek-v4-pro")
        self.assertEqual(
            closing_chunk["choices"][0]["delta"]["content"], "\n</think>\n\n"
        )
        self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@ -9,6 +9,7 @@ from deepseek_cursor_proxy.transform import (
    extract_text_content,
    prepare_upstream_request,
    rewrite_response_body,
    strip_cursor_thinking_blocks,
 )
@ -31,6 +32,37 @@ class TransformTests(unittest.TestCase):
            "hello\n[image_url omitted by DeepSeek text proxy]\nworld",
        )
    def test_strips_cursor_display_thinking_blocks_from_assistant_content(
        self,
    ) -> None:
        self.assertEqual(
            strip_cursor_thinking_blocks(
                "<think>\nNeed context.\n</think>\n\nFinal answer."
            ),
            "Final answer.",
        )
    def test_prepares_assistant_content_without_mirrored_thinking_blocks(
        self,
    ) -> None:
        payload = {
            "model": "deepseek-v4-pro",
            "messages": [
                {"role": "user", "content": "hello"},
                {
                    "role": "assistant",
                    "content": "<think>\nHidden.\n</think>\n\nVisible answer.",
                },
                {"role": "user", "content": "continue"},
            ],
        }
        prepared = prepare_upstream_request(
            payload, ProxyConfig(upstream_api_key="key"), self.store
        )
        self.assertEqual(prepared.payload["messages"][1]["content"], "Visible answer.")
    def test_prepares_thinking_request_and_converts_legacy_functions(self) -> None:
        payload = {
            "model": "deepseek-v4-flash",
@ -349,6 +381,51 @@ class TransformTests(unittest.TestCase):
            "Need to call the file tool.",
        )
    def test_restores_reasoning_when_cursor_history_contains_mirrored_think_block(
        self,
    ) -> None:
        prior = [{"role": "user", "content": "inspect repo"}]
        tool_call = {
            "id": "call_original",
            "type": "function",
            "function": {
                "name": "read_file",
                "arguments": '{"path":"README.md"}',
            },
        }
        self.store.store_assistant_message(
            {
                "role": "assistant",
                "content": "",
                "reasoning_content": "Need to call the file tool.",
                "tool_calls": [tool_call],
            },
            conversation_scope(prior),
        )
        prepared = prepare_upstream_request(
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    *prior,
                    {
                        "role": "assistant",
                        "content": "<think>\nNeed to call the file tool.\n</think>\n\n",
                        "tool_calls": [tool_call],
                    },
                ],
            },
            ProxyConfig(upstream_api_key="key"),
            self.store,
        )
        self.assertEqual(prepared.patched_reasoning_messages, 1)
        self.assertEqual(prepared.payload["messages"][1]["content"], "")
        self.assertEqual(
            prepared.payload["messages"][1]["reasoning_content"],
            "Need to call the file tool.",
        )
    def test_adds_fallback_reasoning_for_uncached_assistant_tool_call(self) -> None:
        payload = {
            "model": "deepseek-v4-pro",