feat(streaming): add collapsible reasoning display (#32)

main
Yixing Lao 2026-05-01 17:05:06 +08:00 committed by GitHub
parent 0c9ec1a079
commit b65f0dd8a2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 240 additions and 33 deletions

View File

@ -8,7 +8,7 @@ This proxy can also help **other applications and coding agents** beyond Cursor
## What It Does
- ✅ Injects `reasoning_content` into outgoing tool-call requests since Cursor does not include the field, restoring previously cached reasoning from regular and streamed DeepSeek responses. See [DeepSeek docs](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) for more details.
- ✅ Displays DeepSeek's thinking tokens in Cursor by forwarding them into Cursor-visible `<think>...</think>` blocks. In BYOK (bring your own key) mode, Cursor renders these thinking blocks as plain text instead of a native collapsible thinking view. You can disable thinking token display with `--no-display-reasoning` or setting `display_reasoning: false` in the config file.
- ✅ Displays DeepSeek's thinking tokens in Cursor by forwarding them into Cursor-visible collapsible Markdown `<details><summary>Thinking</summary>...</details>` blocks.
- ✅ Starts an ngrok tunnel so Cursor can reach the local proxy through a public HTTPS URL.
- ✅ Provides other compatibility fixes to make DeepSeek models run well in Cursor.
@ -137,7 +137,7 @@ Select `deepseek-v4-pro` in Cursor and use chat or agent mode as usual.
- **Core fix:** DeepSeek's [thinking mode](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) requires `reasoning_content` from assistant tool-call messages to be passed back in subsequent requests, but Cursor omits this field, causing a 400 error. The proxy (`Cursor → ngrok → proxy → DeepSeek API`) stores `reasoning_content` from every DeepSeek response in a local SQLite cache, keyed by message signature, tool-call ID, and tool-call function signature, and patches outgoing requests with missing `reasoning_content` before they reach DeepSeek. On a cold cache (proxy restart, model switch), it logs and drops unrecoverable history, continues from the latest user request, and prefixes the next Cursor response with a notice.
- **Multi-conversation isolation:** To avoid collisions across concurrent conversations, the proxy scopes cache keys by a SHA-256 hash of the canonical conversation prefix (roles, content, and tool calls, excluding `reasoning_content`) plus the upstream model, configuration, and an API-key hash. Different threads get different scopes, so reused tool-call IDs do not collide. Byte-identical cloned histories produce identical scopes.
- **Context caching compatibility:** The proxy preserves compatibility by never injecting synthetic thread IDs, timestamps, or cache-control messages. It restores `reasoning_content` as the exact original string, so repeated prefixes remain intact for [DeepSeek context cache](https://api-docs.deepseek.com/guides/kv_cache). Cache hit rates are logged in the terminal output.
- **Additional compatibility fixes:** Beyond reasoning repair, the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases, strips mirrored `<think>` blocks from assistant content, flattens multi-part content arrays to plain text, and mirrors `reasoning_content` into Cursor-visible `<think>...</think>` blocks.
- **Additional compatibility fixes:** Beyond reasoning repair, the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases, strips mirrored thinking display blocks from assistant content, flattens multi-part content arrays to plain text, and mirrors `reasoning_content` into Cursor-visible Markdown details blocks.
## Development

View File

@ -21,7 +21,8 @@ DEFAULT_UPSTREAM_BASE_URL = "https://api.deepseek.com"
DEFAULT_UPSTREAM_MODEL = "deepseek-v4-pro"
DEFAULT_THINKING = "enabled"
DEFAULT_REASONING_EFFORT = "high"
DEFAULT_CURSOR_DISPLAY_REASONING = True
DEFAULT_DISPLAY_REASONING = True
DEFAULT_COLLAPSIBLE_REASONING = True
DEFAULT_NGROK = True
DEFAULT_VERBOSE = False
DEFAULT_REQUEST_TIMEOUT = 300.0
@ -43,7 +44,8 @@ base_url: {DEFAULT_UPSTREAM_BASE_URL}
model: {DEFAULT_UPSTREAM_MODEL}
thinking: {DEFAULT_THINKING}
reasoning_effort: {DEFAULT_REASONING_EFFORT}
display_reasoning: {str(DEFAULT_CURSOR_DISPLAY_REASONING).lower()}
display_reasoning: {str(DEFAULT_DISPLAY_REASONING).lower()}
collasible_reasoning: {str(DEFAULT_COLLAPSIBLE_REASONING).lower()}
host: {DEFAULT_HOST}
port: {DEFAULT_PORT}
@ -103,6 +105,14 @@ def setting_value(settings: Mapping[str, Any], key: str) -> Any:
return settings.get(key, MISSING)
def setting_value_any(settings: Mapping[str, Any], *keys: str) -> Any:
for key in keys:
value = setting_value(settings, key)
if value is not MISSING:
return value
return MISSING
def as_str(value: Any, default: str) -> str:
if value is MISSING or value is None:
return default
@ -190,7 +200,8 @@ class ProxyConfig:
missing_reasoning_strategy: str = DEFAULT_MISSING_REASONING_STRATEGY
reasoning_cache_max_age_seconds: int = DEFAULT_REASONING_CACHE_MAX_AGE_SECONDS
reasoning_cache_max_rows: int = DEFAULT_REASONING_CACHE_MAX_ROWS
cursor_display_reasoning: bool = DEFAULT_CURSOR_DISPLAY_REASONING
display_reasoning: bool = DEFAULT_DISPLAY_REASONING
collapsible_reasoning: bool = DEFAULT_COLLAPSIBLE_REASONING
cors: bool = DEFAULT_CORS
verbose: bool = DEFAULT_VERBOSE
ngrok: bool = DEFAULT_NGROK
@ -250,9 +261,17 @@ class ProxyConfig:
setting_value(settings, "reasoning_cache_max_rows"),
DEFAULT_REASONING_CACHE_MAX_ROWS,
),
cursor_display_reasoning=as_bool(
display_reasoning=as_bool(
setting_value(settings, "display_reasoning"),
DEFAULT_CURSOR_DISPLAY_REASONING,
DEFAULT_DISPLAY_REASONING,
),
collapsible_reasoning=as_bool(
setting_value_any(
settings,
"collasible_reasoning",
"collapsible_reasoning",
),
DEFAULT_COLLAPSIBLE_REASONING,
),
cors=as_bool(
setting_value(settings, "cors"),

View File

@ -625,8 +625,8 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
accumulator = StreamAccumulator()
usage: dict[str, Any] | None = None
display_adapter = (
CursorReasoningDisplayAdapter()
if self.config.cursor_display_reasoning
CursorReasoningDisplayAdapter(self.config.collapsible_reasoning)
if self.config.display_reasoning
else None
)
scope = (
@ -849,7 +849,29 @@ def build_arg_parser() -> argparse.ArgumentParser:
"--display-reasoning",
action=argparse.BooleanOptionalAction,
default=None,
help="Mirror reasoning_content into Cursor-visible <think> content",
help="Mirror reasoning_content into Cursor-visible content",
)
parser.add_argument(
"--collapsible-reasoning",
action=argparse.BooleanOptionalAction,
default=None,
help="Use Markdown details for mirrored reasoning when display is enabled",
)
parser.add_argument(
"--collasible-reasoning",
"--collasible-resoning",
dest="collapsible_reasoning",
action="store_true",
default=argparse.SUPPRESS,
help=argparse.SUPPRESS,
)
parser.add_argument(
"--no-collasible-reasoning",
"--no-collasible-resoning",
dest="collapsible_reasoning",
action="store_false",
default=argparse.SUPPRESS,
help=argparse.SUPPRESS,
)
parser.add_argument(
"--cors",
@ -1181,7 +1203,9 @@ def main(argv: list[str] | None = None) -> int:
if args.trace_dir is not None:
updates["trace_dir"] = args.trace_dir
if args.display_reasoning is not None:
updates["cursor_display_reasoning"] = args.display_reasoning
updates["display_reasoning"] = args.display_reasoning
if args.collapsible_reasoning is not None:
updates["collapsible_reasoning"] = args.collapsible_reasoning
if args.cors is not None:
updates["cors"] = args.cors
if args.request_timeout is not None:
@ -1231,12 +1255,14 @@ def main(argv: list[str] | None = None) -> int:
)
LOG.info(
(
"thinking=%s reasoning_effort=%s cursor_display_reasoning=%s "
"missing_reasoning_strategy=%s reasoning_content_path=%s"
"thinking=%s reasoning_effort=%s display_reasoning=%s "
"collapsible_reasoning=%s missing_reasoning_strategy=%s "
"reasoning_content_path=%s"
),
config.thinking,
config.reasoning_effort,
config.cursor_display_reasoning,
config.display_reasoning,
config.collapsible_reasoning,
config.missing_reasoning_strategy,
config.reasoning_content_path,
)

View File

@ -9,6 +9,8 @@ from .reasoning_store import ReasoningStore
THINKING_BLOCK_START = "<think>\n"
THINKING_BLOCK_END = "\n</think>\n\n"
COLLAPSIBLE_THINKING_BLOCK_START = "<details>\n<summary>Thinking</summary>\n\n"
COLLAPSIBLE_THINKING_BLOCK_END = "\n</details>\n\n"
@dataclass
@ -212,9 +214,15 @@ class StreamAccumulator:
class CursorReasoningDisplayAdapter:
"""Mirror reasoning_content into content for Cursor's visible thinking UI path."""
def __init__(self) -> None:
def __init__(self, collapsible: bool = True) -> None:
self._open_choices: set[int] = set()
self._last_chunk_metadata: dict[str, Any] = {}
self._block_start = (
COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
)
self._block_end = (
COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
)
def rewrite_chunk(self, chunk: dict[str, Any]) -> None:
self._remember_chunk_metadata(chunk)
@ -235,7 +243,7 @@ class CursorReasoningDisplayAdapter:
reasoning_content = delta.get("reasoning_content")
if isinstance(reasoning_content, str) and reasoning_content:
if index not in self._open_choices:
mirrored_parts.append(THINKING_BLOCK_START)
mirrored_parts.append(self._block_start)
self._open_choices.add(index)
mirrored_parts.append(reasoning_content)
@ -246,7 +254,7 @@ class CursorReasoningDisplayAdapter:
or raw_choice.get("finish_reason") is not None
)
if should_close:
mirrored_parts.append(THINKING_BLOCK_END)
mirrored_parts.append(self._block_end)
self._open_choices.discard(index)
if not mirrored_parts:
@ -262,7 +270,7 @@ class CursorReasoningDisplayAdapter:
choices = [
{
"index": index,
"delta": {"content": THINKING_BLOCK_END},
"delta": {"content": self._block_end},
"finish_reason": None,
}
for index in sorted(self._open_choices)

View File

@ -70,8 +70,16 @@ EFFORT_ALIASES = {
}
CURSOR_THINKING_BLOCK_RE = re.compile(
r"<(?:think|thinking)>[\s\S]*?(?:</(?:think|thinking)>|$)\s*",
re.IGNORECASE,
r"""
(?:
<(?:think|thinking)\b[^>]*>[\s\S]*?(?:</(?:think|thinking)>|\Z)
|
<details\b[^>]*>\s*
<summary\b[^>]*>\s*Thinking\s*</summary>
[\s\S]*?(?:</details>|\Z)
)\s*
""",
re.IGNORECASE | re.VERBOSE,
)
RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history."

View File

@ -8,6 +8,7 @@ import unittest
from unittest.mock import patch
from deepseek_cursor_proxy.config import (
DEFAULT_COLLAPSIBLE_REASONING,
DEFAULT_MISSING_REASONING_STRATEGY,
DEFAULT_NGROK,
DEFAULT_PORT,
@ -39,6 +40,10 @@ class ConfigTests(unittest.TestCase):
home / ".deepseek-cursor-proxy" / "reasoning_content.sqlite3",
)
self.assertEqual(ProxyConfig().ngrok, DEFAULT_NGROK)
self.assertEqual(
ProxyConfig().collapsible_reasoning,
DEFAULT_COLLAPSIBLE_REASONING,
)
self.assertIsNone(ProxyConfig().trace_dir)
def test_missing_default_config_file_is_populated(self) -> None:
@ -67,9 +72,18 @@ class ConfigTests(unittest.TestCase):
config_text,
)
self.assertIn(f"ngrok: {str(DEFAULT_NGROK).lower()}", config_text)
self.assertIn(
"collasible_reasoning: "
f"{str(DEFAULT_COLLAPSIBLE_REASONING).lower()}",
config_text,
)
self.assertEqual(stat.S_IMODE(config_path.stat().st_mode), 0o600)
self.assertEqual(config.upstream_model, DEFAULT_UPSTREAM_MODEL)
self.assertEqual(config.ngrok, DEFAULT_NGROK)
self.assertEqual(
config.collapsible_reasoning,
DEFAULT_COLLAPSIBLE_REASONING,
)
self.assertEqual(
config.missing_reasoning_strategy, DEFAULT_MISSING_REASONING_STRATEGY
)
@ -117,6 +131,7 @@ class ConfigTests(unittest.TestCase):
"max_request_body_bytes: 1234",
"cors: true",
"display_reasoning: false",
"collasible_reasoning: false",
f"reasoning_content_path: {reasoning_content_path}",
"missing_reasoning_strategy: reject",
"reasoning_cache_max_age_seconds: 60",
@ -139,7 +154,8 @@ class ConfigTests(unittest.TestCase):
self.assertEqual(config.request_timeout, 123.5)
self.assertEqual(config.max_request_body_bytes, 1234)
self.assertTrue(config.cors)
self.assertFalse(config.cursor_display_reasoning)
self.assertFalse(config.display_reasoning)
self.assertFalse(config.collapsible_reasoning)
self.assertEqual(config.reasoning_content_path, reasoning_content_path)
self.assertEqual(config.missing_reasoning_strategy, "reject")
self.assertEqual(config.reasoning_cache_max_age_seconds, 60)
@ -155,6 +171,7 @@ class ConfigTests(unittest.TestCase):
"missing_reasoning_strategy: maybe",
"port: nope",
"verbose: maybe",
"collasible_reasoning: maybe",
]
),
encoding="utf-8",
@ -169,6 +186,10 @@ class ConfigTests(unittest.TestCase):
self.assertEqual(config.port, DEFAULT_PORT)
self.assertEqual(config.ngrok, DEFAULT_NGROK)
self.assertEqual(config.verbose, DEFAULT_VERBOSE)
self.assertEqual(
config.collapsible_reasoning,
DEFAULT_COLLAPSIBLE_REASONING,
)
def test_relative_reasoning_content_path_in_config_is_relative_to_config_file(
self,
@ -204,7 +225,16 @@ class ConfigTests(unittest.TestCase):
config = ProxyConfig.from_file(config_path=config_path)
self.assertFalse(config.cursor_display_reasoning)
self.assertFalse(config.display_reasoning)
def test_collapsible_reasoning_can_use_corrected_config_key(self) -> None:
with TemporaryDirectory() as temp_dir:
config_path = Path(temp_dir) / "config.yaml"
config_path.write_text("collapsible_reasoning: false\n", encoding="utf-8")
config = ProxyConfig.from_file(config_path=config_path)
self.assertFalse(config.collapsible_reasoning)
def test_invalid_yaml_config_raises_value_error(self) -> None:
with TemporaryDirectory() as temp_dir:

View File

@ -1024,12 +1024,15 @@ class ReasoningStreamingProxyTests(unittest.TestCase):
for line in body.splitlines()
if line.startswith("data: {")
]
self.assertEqual(chunks[0]["choices"][0]["delta"]["content"], "<think>\nNeed ")
self.assertEqual(
chunks[0]["choices"][0]["delta"]["content"],
"<details>\n<summary>Thinking</summary>\n\nNeed ",
)
self.assertEqual(chunks[0]["choices"][0]["delta"]["reasoning_content"], "Need ")
self.assertEqual(chunks[1]["choices"][0]["delta"]["content"], "context.")
self.assertEqual(
chunks[2]["choices"][0]["delta"]["content"],
"\n</think>\n\n" + FINAL_CONTENT,
"\n</details>\n\n" + FINAL_CONTENT,
)
stored_message = {
@ -1085,7 +1088,7 @@ class ReasoningStreamingProxyTests(unittest.TestCase):
trace["upstream"]["stream"]["chunks"][0]["line"],
)
self.assertIn(
"<think>",
"<details>",
trace["cursor_response"]["stream"]["chunks"][0]["line"],
)
self.assertEqual(
@ -1116,7 +1119,7 @@ class ReasoningStreamingProxyTests(unittest.TestCase):
]
self.assertEqual(
chunks[2]["choices"][0]["delta"]["content"],
"\n</think>\n\n" + RECOVERY_NOTICE_CONTENT + FINAL_CONTENT,
"\n</details>\n\n" + RECOVERY_NOTICE_CONTENT + FINAL_CONTENT,
)

View File

@ -80,6 +80,7 @@ class ServerTests(unittest.TestCase):
"--no-ngrok",
"--no-verbose",
"--no-display-reasoning",
"--no-collasible-resoning",
"--cors",
"--trace-dir",
"/tmp/dcp-traces",
@ -89,6 +90,7 @@ class ServerTests(unittest.TestCase):
self.assertFalse(args.ngrok)
self.assertFalse(args.verbose)
self.assertFalse(args.display_reasoning)
self.assertFalse(args.collapsible_reasoning)
self.assertTrue(args.cors)
self.assertEqual(args.trace_dir, Path("/tmp/dcp-traces"))
@ -206,6 +208,48 @@ class ServerTests(unittest.TestCase):
"\n".join(captured.output),
)
def test_collapsible_reasoning_has_no_effect_when_display_is_disabled(
self,
) -> None:
wfile = BytesIO()
handler = make_proxy_handler(wfile)
handler.server.config = ProxyConfig(
display_reasoning=False,
collapsible_reasoning=True,
)
chunk = {
"id": "chatcmpl-stream",
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"delta": {"reasoning_content": "Need context."},
}
],
}
response = FakeStreamingResponse(
[
f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
b"data: [DONE]\n\n",
]
)
try:
sent = handler._proxy_streaming_response(
response,
"deepseek-v4-pro",
[{"role": "user", "content": "hi"}],
"cache-namespace",
)
finally:
handler.server.reasoning_store.close()
body = wfile.getvalue().decode("utf-8")
self.assertTrue(sent.sent)
self.assertIn("reasoning_content", body)
self.assertNotIn("<details>", body)
self.assertNotIn("<think>", body)
if __name__ == "__main__":
unittest.main()

View File

@ -298,7 +298,7 @@ class StreamAccumulatorTests(unittest.TestCase):
class CursorReasoningDisplayAdapterTests(unittest.TestCase):
def test_mirrors_reasoning_content_into_think_tagged_content(self) -> None:
def test_mirrors_reasoning_content_into_details_content(self) -> None:
adapter = CursorReasoningDisplayAdapter()
reasoning_chunk = {
"id": "chatcmpl-stream",
@ -329,8 +329,43 @@ class CursorReasoningDisplayAdapterTests(unittest.TestCase):
reasoning_delta = reasoning_chunk["choices"][0]["delta"]
answer_delta = answer_chunk["choices"][0]["delta"]
self.assertEqual(reasoning_delta["reasoning_content"], "Need context.")
self.assertEqual(reasoning_delta["content"], "<think>\nNeed context.")
self.assertEqual(answer_delta["content"], "\n</think>\n\nFinal answer.")
self.assertEqual(
reasoning_delta["content"],
"<details>\n<summary>Thinking</summary>\n\nNeed context.",
)
self.assertEqual(answer_delta["content"], "\n</details>\n\nFinal answer.")
def test_can_mirror_reasoning_content_into_legacy_think_content(self) -> None:
adapter = CursorReasoningDisplayAdapter(collapsible=False)
reasoning_chunk = {
"choices": [
{
"index": 0,
"delta": {"reasoning_content": "Need context."},
"finish_reason": None,
}
],
}
answer_chunk = {
"choices": [
{
"index": 0,
"delta": {"content": "Final answer."},
"finish_reason": None,
}
],
}
adapter.rewrite_chunk(reasoning_chunk)
adapter.rewrite_chunk(answer_chunk)
self.assertEqual(
reasoning_chunk["choices"][0]["delta"]["content"], "<think>\nNeed context."
)
self.assertEqual(
answer_chunk["choices"][0]["delta"]["content"],
"\n</think>\n\nFinal answer.",
)
def test_closes_thinking_block_before_tool_calls(self) -> None:
adapter = CursorReasoningDisplayAdapter()
@ -364,7 +399,9 @@ class CursorReasoningDisplayAdapterTests(unittest.TestCase):
adapter.rewrite_chunk(tool_chunk)
self.assertEqual(tool_chunk["choices"][0]["delta"]["content"], "\n</think>\n\n")
self.assertEqual(
tool_chunk["choices"][0]["delta"]["content"], "\n</details>\n\n"
)
def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None:
adapter = CursorReasoningDisplayAdapter()
@ -388,7 +425,7 @@ class CursorReasoningDisplayAdapterTests(unittest.TestCase):
assert closing_chunk is not None
self.assertEqual(closing_chunk["model"], "deepseek-v4-pro")
self.assertEqual(
closing_chunk["choices"][0]["delta"]["content"], "\n</think>\n\n"
closing_chunk["choices"][0]["delta"]["content"], "\n</details>\n\n"
)
self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))

View File

@ -60,6 +60,27 @@ class TransformTests(unittest.TestCase):
),
"Final answer.",
)
self.assertEqual(
strip_cursor_thinking_blocks(
"<details>\n"
"<summary>Thinking</summary>\n\n"
"Need context.\n"
"</details>\n\n"
"Final answer."
),
"Final answer.",
)
def test_preserves_regular_markdown_details_in_assistant_content(self) -> None:
content = (
"<details>\n"
"<summary>Example</summary>\n\n"
"Visible details.\n"
"</details>\n\n"
"Final answer."
)
self.assertEqual(strip_cursor_thinking_blocks(content), content)
def test_prepares_assistant_content_without_mirrored_thinking_blocks(
self,
@ -70,7 +91,13 @@ class TransformTests(unittest.TestCase):
{"role": "user", "content": "hello"},
{
"role": "assistant",
"content": "<think>\nHidden.\n</think>\n\nVisible answer.",
"content": (
"<details>\n"
"<summary>Thinking</summary>\n\n"
"Hidden.\n"
"</details>\n\n"
"Visible answer."
),
},
{"role": "user", "content": "continue"},
],
@ -736,7 +763,12 @@ class TransformTests(unittest.TestCase):
*prior,
{
"role": "assistant",
"content": "<think>\nNeed to call the file tool.\n</think>\n\n",
"content": (
"<details>\n"
"<summary>Thinking</summary>\n\n"
"Need to call the file tool.\n"
"</details>\n\n"
),
"tool_calls": [tool_call],
},
],