From 17173310575b9ee0f3b094f76a266e15aed7f5c2 Mon Sep 17 00:00:00 2001
From: Yixing Lao <yixing.lao@gmail.com>
Date: Fri, 24 Apr 2026 17:11:21 +0800
Subject: [PATCH] feat(proxy): mirror reasoning as think tags for cursor (#2)

---
 .env.example                           |   1 +
 README.md                              |  17 ++-
 src/deepseek_cursor_proxy/config.py    |   2 +
 src/deepseek_cursor_proxy/server.py    |  52 ++++++---
 src/deepseek_cursor_proxy/streaming.py |  82 ++++++++++++++
 src/deepseek_cursor_proxy/transform.py |  12 +++
 tests/test_config.py                   |  11 ++
 tests/test_proxy_end_to_end.py         | 141 ++++++++++++++++++++++++-
 tests/test_streaming.py                | 101 +++++++++++++++++-
 tests/test_transform.py                |  77 ++++++++++++++
 10 files changed, 481 insertions(+), 15 deletions(-)
diff --git a/.env.example b/.env.example
index 0f5d4b8..738b99b 100644
--- a/.env.example
+++ b/.env.example
@@ -10,6 +10,7 @@ DEEPSEEK_MODEL=deepseek-v4-pro
 DEEPSEEK_BASE_URL=https://api.deepseek.com
 DEEPSEEK_THINKING=enabled
 DEEPSEEK_REASONING_EFFORT=high
+CURSOR_DISPLAY_REASONING=true
 
 PROXY_HOST=127.0.0.1
 PROXY_PORT=9000
diff --git a/README.md b/README.md
index 48b6057..dbd1ce2 100644
--- a/README.md
+++ b/README.md
@@ -2,11 +2,19 @@
 
 A simple proxy that caches and restores DeepSeek `reasoning_content` across tool-call turns in Cursor, making thinking models like `deepseek-v4-pro` and `deepseek-v4-flash` work correctly.
 
+## What It Does
+
+- Caches DeepSeek `reasoning_content` from regular and streamed responses, then restores it on later tool-call turns when Cursor omits it.
+- Mirrors streamed `reasoning_content` into Cursor-visible `<think>...</think>` text so thinking tokens are shown in Cursor BYOK/proxy chats. Cursor currently renders this as normal chat text, not as a native collapsible Thinking block.
+- Provides other compatibility fixes for running Cursor with the DeepSeek official API.
+
 ## Why This Exists
 
 DeepSeek thinking mode returns `reasoning_content` separately from final `content`. After an assistant turn with tool calls, DeepSeek requires that same `reasoning_content` to be sent back in later requests. Cursor can omit it in custom OpenAI-compatible flows, causing `The reasoning_content in the thinking mode must be passed back to the API.` This proxy caches reasoning by conversation prefix, message signature, and tool-call IDs, then restores it before forwarding to DeepSeek.
 
-Thi repo fixes the following error:
+For streamed responses, the proxy also mirrors DeepSeek `reasoning_content` into Cursor-visible `<think>...</think>` content while leaving the original `reasoning_content` field intact. This lets Cursor display the thinking text in OpenAI-compatible BYOK/proxy flows, and the proxy strips those display-only tags from later assistant history before replaying it to DeepSeek.
+
+This repo fixes the following error:
 
 ![Error 400 - reasoning_content must be passed back](assets/error_400.png)
 
@@ -41,6 +49,7 @@ Edit `~/.deepseek-cursor-proxy/.env`:
 ```bash
 DEEPSEEK_API_KEY=sk-your-deepseek-key
 PROXY_API_KEY=cursor-local-token
+CURSOR_DISPLAY_REASONING=true
 ```
 
 Keep `PROXY_API_KEY` set when using ngrok because the proxy will be reachable from the public internet.
@@ -91,6 +100,12 @@ Run without ngrok for local curl testing:
 PROXY_NGROK=false deepseek-cursor-proxy --port 9000 --verbose
 ```
 
+Disable the Cursor display mirror if you only want raw OpenAI-compatible response fields:
+
+```bash
+CURSOR_DISPLAY_REASONING=false deepseek-cursor-proxy --verbose
+```
+
 Log full request bodies only when needed:
 
 ```bash
diff --git a/src/deepseek_cursor_proxy/config.py b/src/deepseek_cursor_proxy/config.py
index d9c7759..ecc4aa5 100644
--- a/src/deepseek_cursor_proxy/config.py
+++ b/src/deepseek_cursor_proxy/config.py
@@ -129,6 +129,7 @@ class ProxyConfig:
     reasoning_effort: str = "high"
     request_timeout: float = 300.0
     reasoning_content_path: Path = field(default_factory=default_reasoning_content_path)
+    cursor_display_reasoning: bool = True
     verbose: bool = False
     log_bodies: bool = False
     ngrok: bool = False
@@ -167,6 +168,7 @@ class ProxyConfig:
                 ("REASONING_CONTENT_PATH",),
                 default_reasoning_content_path(),
             ),
+            cursor_display_reasoning=env_bool(values, "CURSOR_DISPLAY_REASONING", True),
             verbose=env_bool(values, "PROXY_VERBOSE", False),
             log_bodies=env_bool(values, "PROXY_LOG_BODIES", False),
             ngrok=env_bool(values, "PROXY_NGROK", False),
diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py
index e0784df..4c15d92 100644
--- a/src/deepseek_cursor_proxy/server.py
+++ b/src/deepseek_cursor_proxy/server.py
@@ -21,7 +21,7 @@ from .config import (
     default_reasoning_content_path,
 )
 from .reasoning_store import ReasoningStore, conversation_scope
-from .streaming import StreamAccumulator
+from .streaming import CursorReasoningDisplayAdapter, StreamAccumulator
 from .tunnel import NgrokTunnel, local_tunnel_target
 from .transform import prepare_upstream_request, rewrite_response_body
 
@@ -319,16 +319,20 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
         self.close_connection = True
 
         accumulator = StreamAccumulator()
+        display_adapter = (
+            CursorReasoningDisplayAdapter()
+            if self.config.cursor_display_reasoning
+            else None
+        )
         scope = conversation_scope(request_messages)
         finalized = False
         while True:
             line = response.readline()
             if not line:
                 break
-            rewritten = self._rewrite_sse_line(line, original_model, accumulator, scope)
-            if rewritten is None:
-                finalized = True
-                rewritten = b"data: [DONE]\n\n"
+            rewritten, finalized = self._rewrite_sse_line(
+                line, original_model, accumulator, scope, display_adapter
+            )
             self.wfile.write(rewritten)
             self.wfile.flush()
             if finalized:
@@ -347,10 +351,11 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
         original_model: str,
         accumulator: StreamAccumulator,
         scope: str,
-    ) -> bytes | None:
+        display_adapter: CursorReasoningDisplayAdapter | None,
+    ) -> tuple[bytes, bool]:
         stripped = line.strip()
         if not stripped.startswith(b"data:"):
-            return line
+            return line, False
 
         data = stripped[len(b"data:") :].strip()
         if data == b"[DONE]":
@@ -359,15 +364,22 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
             stored = accumulator.store_reasoning(self.reasoning_store, scope)
             if stored:
                 LOG.info("stored %s streaming reasoning cache key(s)", stored)
-            return None
+            if display_adapter is None:
+                return b"data: [DONE]\n\n", True
+            closing_chunk = display_adapter.flush_chunk(original_model)
+            if closing_chunk is None:
+                return b"data: [DONE]\n\n", True
+            return sse_data(closing_chunk) + b"data: [DONE]\n\n", True
 
         try:
             chunk = json.loads(data.decode("utf-8"))
         except (json.JSONDecodeError, UnicodeDecodeError):
-            return line
+            return line, False
 
         if isinstance(chunk, dict):
             accumulator.ingest_chunk(chunk)
+            if display_adapter is not None:
+                display_adapter.rewrite_chunk(chunk)
             if "model" in chunk:
                 chunk["model"] = original_model
             ending = b"\r\n" if line.endswith(b"\r\n") else b"\n"
@@ -377,8 +389,8 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
                     "utf-8"
                 )
                 + ending
-            )
-        return line
+            ), False
+        return line, False
 
 
 def build_arg_parser() -> argparse.ArgumentParser:
@@ -424,6 +436,11 @@ def build_arg_parser() -> argparse.ArgumentParser:
         action="store_true",
         help="Log normalized upstream request bodies",
     )
+    parser.add_argument(
+        "--no-cursor-display-reasoning",
+        action="store_true",
+        help="Do not mirror reasoning_content into Cursor-visible <think> content",
+    )
     return parser
 
 
@@ -448,6 +465,14 @@ def log_bytes(label: str, body: bytes) -> None:
     log_json(label, payload)
 
 
+def sse_data(payload: dict[str, Any]) -> bytes:
+    return (
+        b"data: "
+        + json.dumps(payload, ensure_ascii=False, separators=(",", ":")).encode("utf-8")
+        + b"\n\n"
+    )
+
+
 def summarize_chat_payload(payload: dict[str, Any]) -> str:
     messages = payload.get("messages")
     tools = payload.get("tools")
@@ -498,6 +523,8 @@ def main(argv: list[str] | None = None) -> int:
         updates["verbose"] = True
     if args.log_bodies:
         updates["log_bodies"] = True
+    if args.no_cursor_display_reasoning:
+        updates["cursor_display_reasoning"] = False
     if updates:
         config = replace(config, **updates)
 
@@ -519,9 +546,10 @@ def main(argv: list[str] | None = None) -> int:
         config.upstream_model,
     )
     LOG.info(
-        "thinking=%s reasoning_effort=%s reasoning_content_path=%s",
+        "thinking=%s reasoning_effort=%s cursor_display_reasoning=%s reasoning_content_path=%s",
         config.thinking,
         config.reasoning_effort,
+        config.cursor_display_reasoning,
         config.reasoning_content_path,
     )
     if config.verbose:
diff --git a/src/deepseek_cursor_proxy/streaming.py b/src/deepseek_cursor_proxy/streaming.py
index 06b16d2..9221401 100644
--- a/src/deepseek_cursor_proxy/streaming.py
+++ b/src/deepseek_cursor_proxy/streaming.py
@@ -1,11 +1,16 @@
 from __future__ import annotations
 
 from dataclasses import dataclass, field
+import time
 from typing import Any
 
 from .reasoning_store import ReasoningStore
 
 
+THINKING_BLOCK_START = "<think>\n"
+THINKING_BLOCK_END = "\n</think>\n\n"
+
+
 @dataclass
 class StreamingChoice:
     role: str = "assistant"
@@ -109,3 +114,80 @@ class StreamAccumulator:
                 function["arguments"] = (function.get("arguments") or "") + str(
                     function_delta["arguments"]
                 )
+
+
+class CursorReasoningDisplayAdapter:
+    """Mirror reasoning_content into content for Cursor's visible thinking UI path."""
+
+    def __init__(self) -> None:
+        self._open_choices: set[int] = set()
+        self._last_chunk_metadata: dict[str, Any] = {}
+
+    def rewrite_chunk(self, chunk: dict[str, Any]) -> None:
+        self._remember_chunk_metadata(chunk)
+        choices = chunk.get("choices")
+        if not isinstance(choices, list):
+            return
+
+        for raw_choice in choices:
+            if not isinstance(raw_choice, dict):
+                continue
+            index = int(raw_choice.get("index") or 0)
+            delta = raw_choice.get("delta")
+            if not isinstance(delta, dict):
+                delta = {}
+                raw_choice["delta"] = delta
+
+            mirrored_parts: list[str] = []
+            reasoning_content = delta.get("reasoning_content")
+            if isinstance(reasoning_content, str) and reasoning_content:
+                if index not in self._open_choices:
+                    mirrored_parts.append(THINKING_BLOCK_START)
+                    self._open_choices.add(index)
+                mirrored_parts.append(reasoning_content)
+
+            existing_content = delta.get("content")
+            should_close = index in self._open_choices and (
+                bool(existing_content)
+                or bool(delta.get("tool_calls"))
+                or raw_choice.get("finish_reason") is not None
+            )
+            if should_close:
+                mirrored_parts.append(THINKING_BLOCK_END)
+                self._open_choices.discard(index)
+
+            if not mirrored_parts:
+                continue
+            if isinstance(existing_content, str):
+                mirrored_parts.append(existing_content)
+            delta["content"] = "".join(mirrored_parts)
+
+    def flush_chunk(self, model: str) -> dict[str, Any] | None:
+        if not self._open_choices:
+            return None
+
+        choices = [
+            {
+                "index": index,
+                "delta": {"content": THINKING_BLOCK_END},
+                "finish_reason": None,
+            }
+            for index in sorted(self._open_choices)
+        ]
+        self._open_choices.clear()
+
+        chunk: dict[str, Any] = {
+            "id": self._last_chunk_metadata.get("id", "chatcmpl-reasoning-close"),
+            "object": self._last_chunk_metadata.get("object", "chat.completion.chunk"),
+            "created": self._last_chunk_metadata.get("created", int(time.time())),
+            "model": model,
+            "choices": choices,
+        }
+        return chunk
+
+    def _remember_chunk_metadata(self, chunk: dict[str, Any]) -> None:
+        metadata = {
+            key: chunk[key] for key in ("id", "object", "created") if key in chunk
+        }
+        if metadata:
+            self._last_chunk_metadata.update(metadata)
diff --git a/src/deepseek_cursor_proxy/transform.py b/src/deepseek_cursor_proxy/transform.py
index 4d30acb..51945c6 100644
--- a/src/deepseek_cursor_proxy/transform.py
+++ b/src/deepseek_cursor_proxy/transform.py
@@ -2,6 +2,7 @@ from __future__ import annotations
 
 from dataclasses import dataclass
 import json
+import re
 from typing import Any
 
 from .config import ProxyConfig
@@ -60,6 +61,11 @@ EFFORT_ALIASES = {
     "xhigh": "max",
 }
 
+CURSOR_THINKING_BLOCK_RE = re.compile(
+    r"<(?:think|thinking)>[\s\S]*?(?:</(?:think|thinking)>|$)\s*",
+    re.IGNORECASE,
+)
+
 
 @dataclass(frozen=True)
 class PreparedRequest:
@@ -102,6 +108,10 @@ def extract_text_content(content: Any) -> str | None:
     return str(content)
 
 
+def strip_cursor_thinking_blocks(content: str) -> str:
+    return CURSOR_THINKING_BLOCK_RE.sub("", content).lstrip("\r\n")
+
+
 def normalize_tool_call(tool_call: Any) -> dict[str, Any]:
     if not isinstance(tool_call, dict):
         tool_call = {}
@@ -190,6 +200,8 @@ def normalize_message(
         normalized["content"] = extract_text_content(normalized["content"]) or ""
     elif normalized["role"] in {"assistant", "tool", "system", "user"}:
         normalized["content"] = ""
+    if normalized["role"] == "assistant" and isinstance(normalized.get("content"), str):
+        normalized["content"] = strip_cursor_thinking_blocks(normalized["content"])
 
     if normalized.get("tool_calls"):
         normalized["tool_calls"] = [
diff --git a/tests/test_config.py b/tests/test_config.py
index f03998a..07679ac 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -109,6 +109,17 @@ class ConfigTests(unittest.TestCase):
         self.assertTrue(config.log_bodies)
         self.assertTrue(config.ngrok)
 
+    def test_cursor_reasoning_display_can_be_disabled_from_env(self) -> None:
+        config = ProxyConfig.from_env(
+            env={
+                "DEEPSEEK_API_KEY": "key",
+                "CURSOR_DISPLAY_REASONING": "false",
+            },
+            env_file_path=Path("/does/not/exist"),
+        )
+
+        self.assertFalse(config.cursor_display_reasoning)
+
     def test_config_path_can_be_overridden_from_environment(self) -> None:
         with TemporaryDirectory() as temp_dir:
             first_env_path = Path(temp_dir) / "first.env"
diff --git a/tests/test_proxy_end_to_end.py b/tests/test_proxy_end_to_end.py
index dc3bc36..d20863c 100644
--- a/tests/test_proxy_end_to_end.py
+++ b/tests/test_proxy_end_to_end.py
@@ -9,7 +9,11 @@ from urllib.error import HTTPError
 from urllib.request import Request, urlopen
 
 from deepseek_cursor_proxy.config import ProxyConfig
-from deepseek_cursor_proxy.reasoning_store import ReasoningStore
+from deepseek_cursor_proxy.reasoning_store import (
+    ReasoningStore,
+    conversation_scope,
+    message_signature,
+)
 from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
 
 
@@ -184,6 +188,68 @@ class SlowAfterDoneStreamingDeepSeekHandler(BaseHTTPRequestHandler):
         time.sleep(2)
 
 
+class ReasoningStreamingDeepSeekHandler(BaseHTTPRequestHandler):
+    def log_message(self, fmt: str, *args: object) -> None:
+        return
+
+    def do_POST(self) -> None:
+        self.send_response(200)
+        self.send_header("Content-Type", "text/event-stream")
+        self.end_headers()
+        chunks = [
+            {
+                "id": "chatcmpl-stream",
+                "object": "chat.completion.chunk",
+                "created": 1,
+                "model": "deepseek-v4-pro",
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {"role": "assistant", "reasoning_content": "Need "},
+                        "finish_reason": None,
+                    }
+                ],
+            },
+            {
+                "id": "chatcmpl-stream",
+                "object": "chat.completion.chunk",
+                "created": 1,
+                "model": "deepseek-v4-pro",
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {"reasoning_content": "context."},
+                        "finish_reason": None,
+                    }
+                ],
+            },
+            {
+                "id": "chatcmpl-stream",
+                "object": "chat.completion.chunk",
+                "created": 1,
+                "model": "deepseek-v4-pro",
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {"content": FINAL_CONTENT},
+                        "finish_reason": None,
+                    }
+                ],
+            },
+            {
+                "id": "chatcmpl-stream",
+                "object": "chat.completion.chunk",
+                "created": 1,
+                "model": "deepseek-v4-pro",
+                "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+            },
+        ]
+        for chunk in chunks:
+            self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode("utf-8"))
+        self.wfile.write(b"data: [DONE]\n\n")
+        self.wfile.flush()
+
+
 def tool_call_response() -> dict:
     return {
         "id": "chatcmpl-tool",
@@ -555,6 +621,79 @@ class StreamingProxyTests(unittest.TestCase):
         self.assertIn("data: [DONE]", body)
 
 
+class ReasoningStreamingProxyTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.upstream = ServerFixture(
+            ThreadingHTTPServer(("127.0.0.1", 0), ReasoningStreamingDeepSeekHandler)
+        ).start()
+        self.store = ReasoningStore(":memory:")
+        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
+        proxy.config = ProxyConfig(
+            upstream_api_key="upstream-key",
+            proxy_api_key="cursor-local-token",
+            upstream_base_url=self.upstream.url,
+            upstream_model="deepseek-v4-pro",
+        )
+        proxy.reasoning_store = self.store
+        self.proxy = ServerFixture(proxy).start()
+
+    def tearDown(self) -> None:
+        self.proxy.close()
+        self.upstream.close()
+        self.store.close()
+
+    def test_streaming_proxy_mirrors_reasoning_for_cursor_display(
+        self,
+    ) -> None:
+        request_messages = [{"role": "user", "content": "stream reasoning"}]
+        request = Request(
+            f"{self.proxy.url}/v1/chat/completions",
+            data=json.dumps(
+                {
+                    "model": "deepseek-v4-pro",
+                    "stream": True,
+                    "messages": request_messages,
+                }
+            ).encode("utf-8"),
+            method="POST",
+            headers={
+                "Authorization": "Bearer cursor-local-token",
+                "Content-Type": "application/json",
+            },
+        )
+
+        with urlopen(request, timeout=2) as response:
+            body = response.read().decode("utf-8")
+
+        chunks = [
+            json.loads(line.removeprefix("data: "))
+            for line in body.splitlines()
+            if line.startswith("data: {")
+        ]
+        self.assertEqual(chunks[0]["choices"][0]["delta"]["content"], "<think>\nNeed ")
+        self.assertEqual(chunks[0]["choices"][0]["delta"]["reasoning_content"], "Need ")
+        self.assertEqual(chunks[1]["choices"][0]["delta"]["content"], "context.")
+        self.assertEqual(
+            chunks[2]["choices"][0]["delta"]["content"],
+            "\n</think>\n\n" + FINAL_CONTENT,
+        )
+
+        stored_message = {
+            "role": "assistant",
+            "content": FINAL_CONTENT,
+            "reasoning_content": "Need context.",
+        }
+        self.assertEqual(
+            self.store.get(
+                "scope:"
+                + conversation_scope(request_messages)
+                + ":signature:"
+                + message_signature(stored_message)
+            ),
+            "Need context.",
+        )
+
+
 def first_cursor_request() -> dict:
     return {
         "model": "deepseek-v4-pro",
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
index c7d8c57..01ad47d 100644
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@@ -3,7 +3,10 @@ from __future__ import annotations
 import unittest
 
 from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_scope
-from deepseek_cursor_proxy.streaming import StreamAccumulator
+from deepseek_cursor_proxy.streaming import (
+    CursorReasoningDisplayAdapter,
+    StreamAccumulator,
+)
 
 
 class StreamAccumulatorTests(unittest.TestCase):
@@ -103,5 +106,101 @@ class StreamAccumulatorTests(unittest.TestCase):
         )
 
 
+class CursorReasoningDisplayAdapterTests(unittest.TestCase):
+    def test_mirrors_reasoning_content_into_think_tagged_content(self) -> None:
+        adapter = CursorReasoningDisplayAdapter()
+        reasoning_chunk = {
+            "id": "chatcmpl-stream",
+            "object": "chat.completion.chunk",
+            "created": 1,
+            "model": "deepseek-v4-pro",
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {"reasoning_content": "Need context."},
+                    "finish_reason": None,
+                }
+            ],
+        }
+        answer_chunk = {
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {"content": "Final answer."},
+                    "finish_reason": None,
+                }
+            ],
+        }
+
+        adapter.rewrite_chunk(reasoning_chunk)
+        adapter.rewrite_chunk(answer_chunk)
+
+        reasoning_delta = reasoning_chunk["choices"][0]["delta"]
+        answer_delta = answer_chunk["choices"][0]["delta"]
+        self.assertEqual(reasoning_delta["reasoning_content"], "Need context.")
+        self.assertEqual(reasoning_delta["content"], "<think>\nNeed context.")
+        self.assertEqual(answer_delta["content"], "\n</think>\n\nFinal answer.")
+
+    def test_closes_thinking_block_before_tool_calls(self) -> None:
+        adapter = CursorReasoningDisplayAdapter()
+        adapter.rewrite_chunk(
+            {
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {"reasoning_content": "Need a tool."},
+                    }
+                ]
+            }
+        )
+        tool_chunk = {
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "tool_calls": [
+                            {
+                                "index": 0,
+                                "id": "call_1",
+                                "type": "function",
+                                "function": {"name": "lookup", "arguments": "{}"},
+                            }
+                        ]
+                    },
+                }
+            ]
+        }
+
+        adapter.rewrite_chunk(tool_chunk)
+
+        self.assertEqual(tool_chunk["choices"][0]["delta"]["content"], "\n</think>\n\n")
+
+    def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None:
+        adapter = CursorReasoningDisplayAdapter()
+        adapter.rewrite_chunk(
+            {
+                "id": "chatcmpl-stream",
+                "object": "chat.completion.chunk",
+                "created": 1,
+                "choices": [
+                    {
+                        "index": 0,
+                        "delta": {"reasoning_content": "Still thinking."},
+                    }
+                ],
+            }
+        )
+
+        closing_chunk = adapter.flush_chunk("deepseek-v4-pro")
+
+        self.assertIsNotNone(closing_chunk)
+        assert closing_chunk is not None
+        self.assertEqual(closing_chunk["model"], "deepseek-v4-pro")
+        self.assertEqual(
+            closing_chunk["choices"][0]["delta"]["content"], "\n</think>\n\n"
+        )
+        self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_transform.py b/tests/test_transform.py
index dba5bc8..9d27230 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -9,6 +9,7 @@ from deepseek_cursor_proxy.transform import (
     extract_text_content,
     prepare_upstream_request,
     rewrite_response_body,
+    strip_cursor_thinking_blocks,
 )
 
 
@@ -31,6 +32,37 @@ class TransformTests(unittest.TestCase):
             "hello\n[image_url omitted by DeepSeek text proxy]\nworld",
         )
 
+    def test_strips_cursor_display_thinking_blocks_from_assistant_content(
+        self,
+    ) -> None:
+        self.assertEqual(
+            strip_cursor_thinking_blocks(
+                "<think>\nNeed context.\n</think>\n\nFinal answer."
+            ),
+            "Final answer.",
+        )
+
+    def test_prepares_assistant_content_without_mirrored_thinking_blocks(
+        self,
+    ) -> None:
+        payload = {
+            "model": "deepseek-v4-pro",
+            "messages": [
+                {"role": "user", "content": "hello"},
+                {
+                    "role": "assistant",
+                    "content": "<think>\nHidden.\n</think>\n\nVisible answer.",
+                },
+                {"role": "user", "content": "continue"},
+            ],
+        }
+
+        prepared = prepare_upstream_request(
+            payload, ProxyConfig(upstream_api_key="key"), self.store
+        )
+
+        self.assertEqual(prepared.payload["messages"][1]["content"], "Visible answer.")
+
     def test_prepares_thinking_request_and_converts_legacy_functions(self) -> None:
         payload = {
             "model": "deepseek-v4-flash",
@@ -349,6 +381,51 @@ class TransformTests(unittest.TestCase):
             "Need to call the file tool.",
         )
 
+    def test_restores_reasoning_when_cursor_history_contains_mirrored_think_block(
+        self,
+    ) -> None:
+        prior = [{"role": "user", "content": "inspect repo"}]
+        tool_call = {
+            "id": "call_original",
+            "type": "function",
+            "function": {
+                "name": "read_file",
+                "arguments": '{"path":"README.md"}',
+            },
+        }
+        self.store.store_assistant_message(
+            {
+                "role": "assistant",
+                "content": "",
+                "reasoning_content": "Need to call the file tool.",
+                "tool_calls": [tool_call],
+            },
+            conversation_scope(prior),
+        )
+
+        prepared = prepare_upstream_request(
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    *prior,
+                    {
+                        "role": "assistant",
+                        "content": "<think>\nNeed to call the file tool.\n</think>\n\n",
+                        "tool_calls": [tool_call],
+                    },
+                ],
+            },
+            ProxyConfig(upstream_api_key="key"),
+            self.store,
+        )
+
+        self.assertEqual(prepared.patched_reasoning_messages, 1)
+        self.assertEqual(prepared.payload["messages"][1]["content"], "")
+        self.assertEqual(
+            prepared.payload["messages"][1]["reasoning_content"],
+            "Need to call the file tool.",
+        )
+
     def test_adds_fallback_reasoning_for_uncached_assistant_tool_call(self) -> None:
         payload = {
             "model": "deepseek-v4-pro",