feat(proxy): mirror reasoning as think tags for cursor (#2)

main
Yixing Lao 2026-04-24 17:11:21 +08:00 committed by GitHub
parent 27f332616a
commit 1717331057
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 481 additions and 15 deletions

View File

@ -10,6 +10,7 @@ DEEPSEEK_MODEL=deepseek-v4-pro
DEEPSEEK_BASE_URL=https://api.deepseek.com
DEEPSEEK_THINKING=enabled
DEEPSEEK_REASONING_EFFORT=high
CURSOR_DISPLAY_REASONING=true
PROXY_HOST=127.0.0.1
PROXY_PORT=9000

View File

@ -2,11 +2,19 @@
A simple proxy that caches and restores DeepSeek `reasoning_content` across tool-call turns in Cursor, making thinking models like `deepseek-v4-pro` and `deepseek-v4-flash` work correctly.
## What It Does
- Caches DeepSeek `reasoning_content` from regular and streamed responses, then restores it on later tool-call turns when Cursor omits it.
- Mirrors streamed `reasoning_content` into Cursor-visible `<think>...</think>` text so thinking tokens are shown in Cursor BYOK/proxy chats. Cursor currently renders this as normal chat text, not as a native collapsible Thinking block.
- Provides other compatibility fixes for running Cursor with the DeepSeek official API.
## Why This Exists
DeepSeek thinking mode returns `reasoning_content` separately from final `content`. After an assistant turn with tool calls, DeepSeek requires that same `reasoning_content` to be sent back in later requests. Cursor can omit it in custom OpenAI-compatible flows, causing `The reasoning_content in the thinking mode must be passed back to the API.` This proxy caches reasoning by conversation prefix, message signature, and tool-call IDs, then restores it before forwarding to DeepSeek.
Thi repo fixes the following error:
For streamed responses, the proxy also mirrors DeepSeek `reasoning_content` into Cursor-visible `<think>...</think>` content while leaving the original `reasoning_content` field intact. This lets Cursor display the thinking text in OpenAI-compatible BYOK/proxy flows, and the proxy strips those display-only tags from later assistant history before replaying it to DeepSeek.
This repo fixes the following error:
![Error 400 - reasoning_content must be passed back](assets/error_400.png)
@ -41,6 +49,7 @@ Edit `~/.deepseek-cursor-proxy/.env`:
```bash
DEEPSEEK_API_KEY=sk-your-deepseek-key
PROXY_API_KEY=cursor-local-token
CURSOR_DISPLAY_REASONING=true
```
Keep `PROXY_API_KEY` set when using ngrok because the proxy will be reachable from the public internet.
@ -91,6 +100,12 @@ Run without ngrok for local curl testing:
PROXY_NGROK=false deepseek-cursor-proxy --port 9000 --verbose
```
Disable the Cursor display mirror if you only want raw OpenAI-compatible response fields:
```bash
CURSOR_DISPLAY_REASONING=false deepseek-cursor-proxy --verbose
```
Log full request bodies only when needed:
```bash

View File

@ -129,6 +129,7 @@ class ProxyConfig:
reasoning_effort: str = "high"
request_timeout: float = 300.0
reasoning_content_path: Path = field(default_factory=default_reasoning_content_path)
cursor_display_reasoning: bool = True
verbose: bool = False
log_bodies: bool = False
ngrok: bool = False
@ -167,6 +168,7 @@ class ProxyConfig:
("REASONING_CONTENT_PATH",),
default_reasoning_content_path(),
),
cursor_display_reasoning=env_bool(values, "CURSOR_DISPLAY_REASONING", True),
verbose=env_bool(values, "PROXY_VERBOSE", False),
log_bodies=env_bool(values, "PROXY_LOG_BODIES", False),
ngrok=env_bool(values, "PROXY_NGROK", False),

View File

@ -21,7 +21,7 @@ from .config import (
default_reasoning_content_path,
)
from .reasoning_store import ReasoningStore, conversation_scope
from .streaming import StreamAccumulator
from .streaming import CursorReasoningDisplayAdapter, StreamAccumulator
from .tunnel import NgrokTunnel, local_tunnel_target
from .transform import prepare_upstream_request, rewrite_response_body
@ -319,16 +319,20 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
self.close_connection = True
accumulator = StreamAccumulator()
display_adapter = (
CursorReasoningDisplayAdapter()
if self.config.cursor_display_reasoning
else None
)
scope = conversation_scope(request_messages)
finalized = False
while True:
line = response.readline()
if not line:
break
rewritten = self._rewrite_sse_line(line, original_model, accumulator, scope)
if rewritten is None:
finalized = True
rewritten = b"data: [DONE]\n\n"
rewritten, finalized = self._rewrite_sse_line(
line, original_model, accumulator, scope, display_adapter
)
self.wfile.write(rewritten)
self.wfile.flush()
if finalized:
@ -347,10 +351,11 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
original_model: str,
accumulator: StreamAccumulator,
scope: str,
) -> bytes | None:
display_adapter: CursorReasoningDisplayAdapter | None,
) -> tuple[bytes, bool]:
stripped = line.strip()
if not stripped.startswith(b"data:"):
return line
return line, False
data = stripped[len(b"data:") :].strip()
if data == b"[DONE]":
@ -359,15 +364,22 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
stored = accumulator.store_reasoning(self.reasoning_store, scope)
if stored:
LOG.info("stored %s streaming reasoning cache key(s)", stored)
return None
if display_adapter is None:
return b"data: [DONE]\n\n", True
closing_chunk = display_adapter.flush_chunk(original_model)
if closing_chunk is None:
return b"data: [DONE]\n\n", True
return sse_data(closing_chunk) + b"data: [DONE]\n\n", True
try:
chunk = json.loads(data.decode("utf-8"))
except (json.JSONDecodeError, UnicodeDecodeError):
return line
return line, False
if isinstance(chunk, dict):
accumulator.ingest_chunk(chunk)
if display_adapter is not None:
display_adapter.rewrite_chunk(chunk)
if "model" in chunk:
chunk["model"] = original_model
ending = b"\r\n" if line.endswith(b"\r\n") else b"\n"
@ -377,8 +389,8 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
"utf-8"
)
+ ending
)
return line
), False
return line, False
def build_arg_parser() -> argparse.ArgumentParser:
@ -424,6 +436,11 @@ def build_arg_parser() -> argparse.ArgumentParser:
action="store_true",
help="Log normalized upstream request bodies",
)
parser.add_argument(
"--no-cursor-display-reasoning",
action="store_true",
help="Do not mirror reasoning_content into Cursor-visible <think> content",
)
return parser
@ -448,6 +465,14 @@ def log_bytes(label: str, body: bytes) -> None:
log_json(label, payload)
def sse_data(payload: dict[str, Any]) -> bytes:
return (
b"data: "
+ json.dumps(payload, ensure_ascii=False, separators=(",", ":")).encode("utf-8")
+ b"\n\n"
)
def summarize_chat_payload(payload: dict[str, Any]) -> str:
messages = payload.get("messages")
tools = payload.get("tools")
@ -498,6 +523,8 @@ def main(argv: list[str] | None = None) -> int:
updates["verbose"] = True
if args.log_bodies:
updates["log_bodies"] = True
if args.no_cursor_display_reasoning:
updates["cursor_display_reasoning"] = False
if updates:
config = replace(config, **updates)
@ -519,9 +546,10 @@ def main(argv: list[str] | None = None) -> int:
config.upstream_model,
)
LOG.info(
"thinking=%s reasoning_effort=%s reasoning_content_path=%s",
"thinking=%s reasoning_effort=%s cursor_display_reasoning=%s reasoning_content_path=%s",
config.thinking,
config.reasoning_effort,
config.cursor_display_reasoning,
config.reasoning_content_path,
)
if config.verbose:

View File

@ -1,11 +1,16 @@
from __future__ import annotations
from dataclasses import dataclass, field
import time
from typing import Any
from .reasoning_store import ReasoningStore
THINKING_BLOCK_START = "<think>\n"
THINKING_BLOCK_END = "\n</think>\n\n"
@dataclass
class StreamingChoice:
role: str = "assistant"
@ -109,3 +114,80 @@ class StreamAccumulator:
function["arguments"] = (function.get("arguments") or "") + str(
function_delta["arguments"]
)
class CursorReasoningDisplayAdapter:
"""Mirror reasoning_content into content for Cursor's visible thinking UI path."""
def __init__(self) -> None:
self._open_choices: set[int] = set()
self._last_chunk_metadata: dict[str, Any] = {}
def rewrite_chunk(self, chunk: dict[str, Any]) -> None:
self._remember_chunk_metadata(chunk)
choices = chunk.get("choices")
if not isinstance(choices, list):
return
for raw_choice in choices:
if not isinstance(raw_choice, dict):
continue
index = int(raw_choice.get("index") or 0)
delta = raw_choice.get("delta")
if not isinstance(delta, dict):
delta = {}
raw_choice["delta"] = delta
mirrored_parts: list[str] = []
reasoning_content = delta.get("reasoning_content")
if isinstance(reasoning_content, str) and reasoning_content:
if index not in self._open_choices:
mirrored_parts.append(THINKING_BLOCK_START)
self._open_choices.add(index)
mirrored_parts.append(reasoning_content)
existing_content = delta.get("content")
should_close = index in self._open_choices and (
bool(existing_content)
or bool(delta.get("tool_calls"))
or raw_choice.get("finish_reason") is not None
)
if should_close:
mirrored_parts.append(THINKING_BLOCK_END)
self._open_choices.discard(index)
if not mirrored_parts:
continue
if isinstance(existing_content, str):
mirrored_parts.append(existing_content)
delta["content"] = "".join(mirrored_parts)
def flush_chunk(self, model: str) -> dict[str, Any] | None:
if not self._open_choices:
return None
choices = [
{
"index": index,
"delta": {"content": THINKING_BLOCK_END},
"finish_reason": None,
}
for index in sorted(self._open_choices)
]
self._open_choices.clear()
chunk: dict[str, Any] = {
"id": self._last_chunk_metadata.get("id", "chatcmpl-reasoning-close"),
"object": self._last_chunk_metadata.get("object", "chat.completion.chunk"),
"created": self._last_chunk_metadata.get("created", int(time.time())),
"model": model,
"choices": choices,
}
return chunk
def _remember_chunk_metadata(self, chunk: dict[str, Any]) -> None:
metadata = {
key: chunk[key] for key in ("id", "object", "created") if key in chunk
}
if metadata:
self._last_chunk_metadata.update(metadata)

View File

@ -2,6 +2,7 @@ from __future__ import annotations
from dataclasses import dataclass
import json
import re
from typing import Any
from .config import ProxyConfig
@ -60,6 +61,11 @@ EFFORT_ALIASES = {
"xhigh": "max",
}
CURSOR_THINKING_BLOCK_RE = re.compile(
r"<(?:think|thinking)>[\s\S]*?(?:</(?:think|thinking)>|$)\s*",
re.IGNORECASE,
)
@dataclass(frozen=True)
class PreparedRequest:
@ -102,6 +108,10 @@ def extract_text_content(content: Any) -> str | None:
return str(content)
def strip_cursor_thinking_blocks(content: str) -> str:
return CURSOR_THINKING_BLOCK_RE.sub("", content).lstrip("\r\n")
def normalize_tool_call(tool_call: Any) -> dict[str, Any]:
if not isinstance(tool_call, dict):
tool_call = {}
@ -190,6 +200,8 @@ def normalize_message(
normalized["content"] = extract_text_content(normalized["content"]) or ""
elif normalized["role"] in {"assistant", "tool", "system", "user"}:
normalized["content"] = ""
if normalized["role"] == "assistant" and isinstance(normalized.get("content"), str):
normalized["content"] = strip_cursor_thinking_blocks(normalized["content"])
if normalized.get("tool_calls"):
normalized["tool_calls"] = [

View File

@ -109,6 +109,17 @@ class ConfigTests(unittest.TestCase):
self.assertTrue(config.log_bodies)
self.assertTrue(config.ngrok)
def test_cursor_reasoning_display_can_be_disabled_from_env(self) -> None:
config = ProxyConfig.from_env(
env={
"DEEPSEEK_API_KEY": "key",
"CURSOR_DISPLAY_REASONING": "false",
},
env_file_path=Path("/does/not/exist"),
)
self.assertFalse(config.cursor_display_reasoning)
def test_config_path_can_be_overridden_from_environment(self) -> None:
with TemporaryDirectory() as temp_dir:
first_env_path = Path(temp_dir) / "first.env"

View File

@ -9,7 +9,11 @@ from urllib.error import HTTPError
from urllib.request import Request, urlopen
from deepseek_cursor_proxy.config import ProxyConfig
from deepseek_cursor_proxy.reasoning_store import ReasoningStore
from deepseek_cursor_proxy.reasoning_store import (
ReasoningStore,
conversation_scope,
message_signature,
)
from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
@ -184,6 +188,68 @@ class SlowAfterDoneStreamingDeepSeekHandler(BaseHTTPRequestHandler):
time.sleep(2)
class ReasoningStreamingDeepSeekHandler(BaseHTTPRequestHandler):
def log_message(self, fmt: str, *args: object) -> None:
return
def do_POST(self) -> None:
self.send_response(200)
self.send_header("Content-Type", "text/event-stream")
self.end_headers()
chunks = [
{
"id": "chatcmpl-stream",
"object": "chat.completion.chunk",
"created": 1,
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"delta": {"role": "assistant", "reasoning_content": "Need "},
"finish_reason": None,
}
],
},
{
"id": "chatcmpl-stream",
"object": "chat.completion.chunk",
"created": 1,
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"delta": {"reasoning_content": "context."},
"finish_reason": None,
}
],
},
{
"id": "chatcmpl-stream",
"object": "chat.completion.chunk",
"created": 1,
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"delta": {"content": FINAL_CONTENT},
"finish_reason": None,
}
],
},
{
"id": "chatcmpl-stream",
"object": "chat.completion.chunk",
"created": 1,
"model": "deepseek-v4-pro",
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
},
]
for chunk in chunks:
self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode("utf-8"))
self.wfile.write(b"data: [DONE]\n\n")
self.wfile.flush()
def tool_call_response() -> dict:
return {
"id": "chatcmpl-tool",
@ -555,6 +621,79 @@ class StreamingProxyTests(unittest.TestCase):
self.assertIn("data: [DONE]", body)
class ReasoningStreamingProxyTests(unittest.TestCase):
def setUp(self) -> None:
self.upstream = ServerFixture(
ThreadingHTTPServer(("127.0.0.1", 0), ReasoningStreamingDeepSeekHandler)
).start()
self.store = ReasoningStore(":memory:")
proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
proxy.config = ProxyConfig(
upstream_api_key="upstream-key",
proxy_api_key="cursor-local-token",
upstream_base_url=self.upstream.url,
upstream_model="deepseek-v4-pro",
)
proxy.reasoning_store = self.store
self.proxy = ServerFixture(proxy).start()
def tearDown(self) -> None:
self.proxy.close()
self.upstream.close()
self.store.close()
def test_streaming_proxy_mirrors_reasoning_for_cursor_display(
self,
) -> None:
request_messages = [{"role": "user", "content": "stream reasoning"}]
request = Request(
f"{self.proxy.url}/v1/chat/completions",
data=json.dumps(
{
"model": "deepseek-v4-pro",
"stream": True,
"messages": request_messages,
}
).encode("utf-8"),
method="POST",
headers={
"Authorization": "Bearer cursor-local-token",
"Content-Type": "application/json",
},
)
with urlopen(request, timeout=2) as response:
body = response.read().decode("utf-8")
chunks = [
json.loads(line.removeprefix("data: "))
for line in body.splitlines()
if line.startswith("data: {")
]
self.assertEqual(chunks[0]["choices"][0]["delta"]["content"], "<think>\nNeed ")
self.assertEqual(chunks[0]["choices"][0]["delta"]["reasoning_content"], "Need ")
self.assertEqual(chunks[1]["choices"][0]["delta"]["content"], "context.")
self.assertEqual(
chunks[2]["choices"][0]["delta"]["content"],
"\n</think>\n\n" + FINAL_CONTENT,
)
stored_message = {
"role": "assistant",
"content": FINAL_CONTENT,
"reasoning_content": "Need context.",
}
self.assertEqual(
self.store.get(
"scope:"
+ conversation_scope(request_messages)
+ ":signature:"
+ message_signature(stored_message)
),
"Need context.",
)
def first_cursor_request() -> dict:
return {
"model": "deepseek-v4-pro",

View File

@ -3,7 +3,10 @@ from __future__ import annotations
import unittest
from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_scope
from deepseek_cursor_proxy.streaming import StreamAccumulator
from deepseek_cursor_proxy.streaming import (
CursorReasoningDisplayAdapter,
StreamAccumulator,
)
class StreamAccumulatorTests(unittest.TestCase):
@ -103,5 +106,101 @@ class StreamAccumulatorTests(unittest.TestCase):
)
class CursorReasoningDisplayAdapterTests(unittest.TestCase):
def test_mirrors_reasoning_content_into_think_tagged_content(self) -> None:
adapter = CursorReasoningDisplayAdapter()
reasoning_chunk = {
"id": "chatcmpl-stream",
"object": "chat.completion.chunk",
"created": 1,
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"delta": {"reasoning_content": "Need context."},
"finish_reason": None,
}
],
}
answer_chunk = {
"choices": [
{
"index": 0,
"delta": {"content": "Final answer."},
"finish_reason": None,
}
],
}
adapter.rewrite_chunk(reasoning_chunk)
adapter.rewrite_chunk(answer_chunk)
reasoning_delta = reasoning_chunk["choices"][0]["delta"]
answer_delta = answer_chunk["choices"][0]["delta"]
self.assertEqual(reasoning_delta["reasoning_content"], "Need context.")
self.assertEqual(reasoning_delta["content"], "<think>\nNeed context.")
self.assertEqual(answer_delta["content"], "\n</think>\n\nFinal answer.")
def test_closes_thinking_block_before_tool_calls(self) -> None:
adapter = CursorReasoningDisplayAdapter()
adapter.rewrite_chunk(
{
"choices": [
{
"index": 0,
"delta": {"reasoning_content": "Need a tool."},
}
]
}
)
tool_chunk = {
"choices": [
{
"index": 0,
"delta": {
"tool_calls": [
{
"index": 0,
"id": "call_1",
"type": "function",
"function": {"name": "lookup", "arguments": "{}"},
}
]
},
}
]
}
adapter.rewrite_chunk(tool_chunk)
self.assertEqual(tool_chunk["choices"][0]["delta"]["content"], "\n</think>\n\n")
def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None:
adapter = CursorReasoningDisplayAdapter()
adapter.rewrite_chunk(
{
"id": "chatcmpl-stream",
"object": "chat.completion.chunk",
"created": 1,
"choices": [
{
"index": 0,
"delta": {"reasoning_content": "Still thinking."},
}
],
}
)
closing_chunk = adapter.flush_chunk("deepseek-v4-pro")
self.assertIsNotNone(closing_chunk)
assert closing_chunk is not None
self.assertEqual(closing_chunk["model"], "deepseek-v4-pro")
self.assertEqual(
closing_chunk["choices"][0]["delta"]["content"], "\n</think>\n\n"
)
self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
if __name__ == "__main__":
unittest.main()

View File

@ -9,6 +9,7 @@ from deepseek_cursor_proxy.transform import (
extract_text_content,
prepare_upstream_request,
rewrite_response_body,
strip_cursor_thinking_blocks,
)
@ -31,6 +32,37 @@ class TransformTests(unittest.TestCase):
"hello\n[image_url omitted by DeepSeek text proxy]\nworld",
)
def test_strips_cursor_display_thinking_blocks_from_assistant_content(
self,
) -> None:
self.assertEqual(
strip_cursor_thinking_blocks(
"<think>\nNeed context.\n</think>\n\nFinal answer."
),
"Final answer.",
)
def test_prepares_assistant_content_without_mirrored_thinking_blocks(
self,
) -> None:
payload = {
"model": "deepseek-v4-pro",
"messages": [
{"role": "user", "content": "hello"},
{
"role": "assistant",
"content": "<think>\nHidden.\n</think>\n\nVisible answer.",
},
{"role": "user", "content": "continue"},
],
}
prepared = prepare_upstream_request(
payload, ProxyConfig(upstream_api_key="key"), self.store
)
self.assertEqual(prepared.payload["messages"][1]["content"], "Visible answer.")
def test_prepares_thinking_request_and_converts_legacy_functions(self) -> None:
payload = {
"model": "deepseek-v4-flash",
@ -349,6 +381,51 @@ class TransformTests(unittest.TestCase):
"Need to call the file tool.",
)
def test_restores_reasoning_when_cursor_history_contains_mirrored_think_block(
self,
) -> None:
prior = [{"role": "user", "content": "inspect repo"}]
tool_call = {
"id": "call_original",
"type": "function",
"function": {
"name": "read_file",
"arguments": '{"path":"README.md"}',
},
}
self.store.store_assistant_message(
{
"role": "assistant",
"content": "",
"reasoning_content": "Need to call the file tool.",
"tool_calls": [tool_call],
},
conversation_scope(prior),
)
prepared = prepare_upstream_request(
{
"model": "deepseek-v4-pro",
"messages": [
*prior,
{
"role": "assistant",
"content": "<think>\nNeed to call the file tool.\n</think>\n\n",
"tool_calls": [tool_call],
},
],
},
ProxyConfig(upstream_api_key="key"),
self.store,
)
self.assertEqual(prepared.patched_reasoning_messages, 1)
self.assertEqual(prepared.payload["messages"][1]["content"], "")
self.assertEqual(
prepared.payload["messages"][1]["reasoning_content"],
"Need to call the file tool.",
)
def test_adds_fallback_reasoning_for_uncached_assistant_tool_call(self) -> None:
payload = {
"model": "deepseek-v4-pro",