diff --git a/README.md b/README.md index e41a7e5..5750d35 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ This proxy can also help **other applications and coding agents** beyond Cursor ## What It Does - ✅ Injects `reasoning_content` into outgoing tool-call requests since Cursor does not include the field, restoring previously cached reasoning from regular and streamed DeepSeek responses. See [DeepSeek docs](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) for more details. -- ✅ Displays DeepSeek's thinking tokens in Cursor by forwarding them into Cursor-visible `...` blocks. In BYOK (bring your own key) mode, Cursor renders these thinking blocks as plain text instead of a native collapsible thinking view. You can disable thinking token display with `--no-display-reasoning` or setting `display_reasoning: false` in the config file. +- ✅ Displays DeepSeek's thinking tokens in Cursor by forwarding them into Cursor-visible collapsible Markdown `
Thinking...
` blocks. - ✅ Starts an ngrok tunnel so Cursor can reach the local proxy through a public HTTPS URL. - ✅ Provides other compatibility fixes to make DeepSeek models run well in Cursor. @@ -137,7 +137,7 @@ Select `deepseek-v4-pro` in Cursor and use chat or agent mode as usual. - **Core fix:** DeepSeek's [thinking mode](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) requires `reasoning_content` from assistant tool-call messages to be passed back in subsequent requests, but Cursor omits this field, causing a 400 error. The proxy (`Cursor → ngrok → proxy → DeepSeek API`) stores `reasoning_content` from every DeepSeek response in a local SQLite cache, keyed by message signature, tool-call ID, and tool-call function signature, and patches outgoing requests with missing `reasoning_content` before they reach DeepSeek. On a cold cache (proxy restart, model switch), it logs and drops unrecoverable history, continues from the latest user request, and prefixes the next Cursor response with a notice. - **Multi-conversation isolation:** To avoid collisions across concurrent conversations, the proxy scopes cache keys by a SHA-256 hash of the canonical conversation prefix (roles, content, and tool calls, excluding `reasoning_content`) plus the upstream model, configuration, and an API-key hash. Different threads get different scopes, so reused tool-call IDs do not collide. Byte-identical cloned histories produce identical scopes. - **Context caching compatibility:** The proxy preserves compatibility by never injecting synthetic thread IDs, timestamps, or cache-control messages. It restores `reasoning_content` as the exact original string, so repeated prefixes remain intact for [DeepSeek context cache](https://api-docs.deepseek.com/guides/kv_cache). Cache hit rates are logged in the terminal output. -- **Additional compatibility fixes:** Beyond reasoning repair, the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases, strips mirrored `` blocks from assistant content, flattens multi-part content arrays to plain text, and mirrors `reasoning_content` into Cursor-visible `...` blocks. +- **Additional compatibility fixes:** Beyond reasoning repair, the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases, strips mirrored thinking display blocks from assistant content, flattens multi-part content arrays to plain text, and mirrors `reasoning_content` into Cursor-visible Markdown details blocks. ## Development diff --git a/src/deepseek_cursor_proxy/config.py b/src/deepseek_cursor_proxy/config.py index fb4981c..427887b 100644 --- a/src/deepseek_cursor_proxy/config.py +++ b/src/deepseek_cursor_proxy/config.py @@ -21,7 +21,8 @@ DEFAULT_UPSTREAM_BASE_URL = "https://api.deepseek.com" DEFAULT_UPSTREAM_MODEL = "deepseek-v4-pro" DEFAULT_THINKING = "enabled" DEFAULT_REASONING_EFFORT = "high" -DEFAULT_CURSOR_DISPLAY_REASONING = True +DEFAULT_DISPLAY_REASONING = True +DEFAULT_COLLAPSIBLE_REASONING = True DEFAULT_NGROK = True DEFAULT_VERBOSE = False DEFAULT_REQUEST_TIMEOUT = 300.0 @@ -43,7 +44,8 @@ base_url: {DEFAULT_UPSTREAM_BASE_URL} model: {DEFAULT_UPSTREAM_MODEL} thinking: {DEFAULT_THINKING} reasoning_effort: {DEFAULT_REASONING_EFFORT} -display_reasoning: {str(DEFAULT_CURSOR_DISPLAY_REASONING).lower()} +display_reasoning: {str(DEFAULT_DISPLAY_REASONING).lower()} +collasible_reasoning: {str(DEFAULT_COLLAPSIBLE_REASONING).lower()} host: {DEFAULT_HOST} port: {DEFAULT_PORT} @@ -103,6 +105,14 @@ def setting_value(settings: Mapping[str, Any], key: str) -> Any: return settings.get(key, MISSING) +def setting_value_any(settings: Mapping[str, Any], *keys: str) -> Any: + for key in keys: + value = setting_value(settings, key) + if value is not MISSING: + return value + return MISSING + + def as_str(value: Any, default: str) -> str: if value is MISSING or value is None: return default @@ -190,7 +200,8 @@ class ProxyConfig: missing_reasoning_strategy: str = DEFAULT_MISSING_REASONING_STRATEGY reasoning_cache_max_age_seconds: int = DEFAULT_REASONING_CACHE_MAX_AGE_SECONDS reasoning_cache_max_rows: int = DEFAULT_REASONING_CACHE_MAX_ROWS - cursor_display_reasoning: bool = DEFAULT_CURSOR_DISPLAY_REASONING + display_reasoning: bool = DEFAULT_DISPLAY_REASONING + collapsible_reasoning: bool = DEFAULT_COLLAPSIBLE_REASONING cors: bool = DEFAULT_CORS verbose: bool = DEFAULT_VERBOSE ngrok: bool = DEFAULT_NGROK @@ -250,9 +261,17 @@ class ProxyConfig: setting_value(settings, "reasoning_cache_max_rows"), DEFAULT_REASONING_CACHE_MAX_ROWS, ), - cursor_display_reasoning=as_bool( + display_reasoning=as_bool( setting_value(settings, "display_reasoning"), - DEFAULT_CURSOR_DISPLAY_REASONING, + DEFAULT_DISPLAY_REASONING, + ), + collapsible_reasoning=as_bool( + setting_value_any( + settings, + "collasible_reasoning", + "collapsible_reasoning", + ), + DEFAULT_COLLAPSIBLE_REASONING, ), cors=as_bool( setting_value(settings, "cors"), diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py index 3120ba2..7356e4f 100644 --- a/src/deepseek_cursor_proxy/server.py +++ b/src/deepseek_cursor_proxy/server.py @@ -625,8 +625,8 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): accumulator = StreamAccumulator() usage: dict[str, Any] | None = None display_adapter = ( - CursorReasoningDisplayAdapter() - if self.config.cursor_display_reasoning + CursorReasoningDisplayAdapter(self.config.collapsible_reasoning) + if self.config.display_reasoning else None ) scope = ( @@ -849,7 +849,29 @@ def build_arg_parser() -> argparse.ArgumentParser: "--display-reasoning", action=argparse.BooleanOptionalAction, default=None, - help="Mirror reasoning_content into Cursor-visible content", + help="Mirror reasoning_content into Cursor-visible content", + ) + parser.add_argument( + "--collapsible-reasoning", + action=argparse.BooleanOptionalAction, + default=None, + help="Use Markdown details for mirrored reasoning when display is enabled", + ) + parser.add_argument( + "--collasible-reasoning", + "--collasible-resoning", + dest="collapsible_reasoning", + action="store_true", + default=argparse.SUPPRESS, + help=argparse.SUPPRESS, + ) + parser.add_argument( + "--no-collasible-reasoning", + "--no-collasible-resoning", + dest="collapsible_reasoning", + action="store_false", + default=argparse.SUPPRESS, + help=argparse.SUPPRESS, ) parser.add_argument( "--cors", @@ -1181,7 +1203,9 @@ def main(argv: list[str] | None = None) -> int: if args.trace_dir is not None: updates["trace_dir"] = args.trace_dir if args.display_reasoning is not None: - updates["cursor_display_reasoning"] = args.display_reasoning + updates["display_reasoning"] = args.display_reasoning + if args.collapsible_reasoning is not None: + updates["collapsible_reasoning"] = args.collapsible_reasoning if args.cors is not None: updates["cors"] = args.cors if args.request_timeout is not None: @@ -1231,12 +1255,14 @@ def main(argv: list[str] | None = None) -> int: ) LOG.info( ( - "thinking=%s reasoning_effort=%s cursor_display_reasoning=%s " - "missing_reasoning_strategy=%s reasoning_content_path=%s" + "thinking=%s reasoning_effort=%s display_reasoning=%s " + "collapsible_reasoning=%s missing_reasoning_strategy=%s " + "reasoning_content_path=%s" ), config.thinking, config.reasoning_effort, - config.cursor_display_reasoning, + config.display_reasoning, + config.collapsible_reasoning, config.missing_reasoning_strategy, config.reasoning_content_path, ) diff --git a/src/deepseek_cursor_proxy/streaming.py b/src/deepseek_cursor_proxy/streaming.py index 520be63..9157d1c 100644 --- a/src/deepseek_cursor_proxy/streaming.py +++ b/src/deepseek_cursor_proxy/streaming.py @@ -9,6 +9,8 @@ from .reasoning_store import ReasoningStore THINKING_BLOCK_START = "\n" THINKING_BLOCK_END = "\n\n\n" +COLLAPSIBLE_THINKING_BLOCK_START = "
\nThinking\n\n" +COLLAPSIBLE_THINKING_BLOCK_END = "\n
\n\n" @dataclass @@ -212,9 +214,15 @@ class StreamAccumulator: class CursorReasoningDisplayAdapter: """Mirror reasoning_content into content for Cursor's visible thinking UI path.""" - def __init__(self) -> None: + def __init__(self, collapsible: bool = True) -> None: self._open_choices: set[int] = set() self._last_chunk_metadata: dict[str, Any] = {} + self._block_start = ( + COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START + ) + self._block_end = ( + COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END + ) def rewrite_chunk(self, chunk: dict[str, Any]) -> None: self._remember_chunk_metadata(chunk) @@ -235,7 +243,7 @@ class CursorReasoningDisplayAdapter: reasoning_content = delta.get("reasoning_content") if isinstance(reasoning_content, str) and reasoning_content: if index not in self._open_choices: - mirrored_parts.append(THINKING_BLOCK_START) + mirrored_parts.append(self._block_start) self._open_choices.add(index) mirrored_parts.append(reasoning_content) @@ -246,7 +254,7 @@ class CursorReasoningDisplayAdapter: or raw_choice.get("finish_reason") is not None ) if should_close: - mirrored_parts.append(THINKING_BLOCK_END) + mirrored_parts.append(self._block_end) self._open_choices.discard(index) if not mirrored_parts: @@ -262,7 +270,7 @@ class CursorReasoningDisplayAdapter: choices = [ { "index": index, - "delta": {"content": THINKING_BLOCK_END}, + "delta": {"content": self._block_end}, "finish_reason": None, } for index in sorted(self._open_choices) diff --git a/src/deepseek_cursor_proxy/transform.py b/src/deepseek_cursor_proxy/transform.py index 3dda5a5..1ea53ee 100644 --- a/src/deepseek_cursor_proxy/transform.py +++ b/src/deepseek_cursor_proxy/transform.py @@ -70,8 +70,16 @@ EFFORT_ALIASES = { } CURSOR_THINKING_BLOCK_RE = re.compile( - r"<(?:think|thinking)>[\s\S]*?(?:|$)\s*", - re.IGNORECASE, + r""" + (?: + <(?:think|thinking)\b[^>]*>[\s\S]*?(?:|\Z) + | + ]*>\s* + ]*>\s*Thinking\s* + [\s\S]*?(?:|\Z) + )\s* + """, + re.IGNORECASE | re.VERBOSE, ) RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history." diff --git a/tests/test_config.py b/tests/test_config.py index 956d2c9..68651e8 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -8,6 +8,7 @@ import unittest from unittest.mock import patch from deepseek_cursor_proxy.config import ( + DEFAULT_COLLAPSIBLE_REASONING, DEFAULT_MISSING_REASONING_STRATEGY, DEFAULT_NGROK, DEFAULT_PORT, @@ -39,6 +40,10 @@ class ConfigTests(unittest.TestCase): home / ".deepseek-cursor-proxy" / "reasoning_content.sqlite3", ) self.assertEqual(ProxyConfig().ngrok, DEFAULT_NGROK) + self.assertEqual( + ProxyConfig().collapsible_reasoning, + DEFAULT_COLLAPSIBLE_REASONING, + ) self.assertIsNone(ProxyConfig().trace_dir) def test_missing_default_config_file_is_populated(self) -> None: @@ -67,9 +72,18 @@ class ConfigTests(unittest.TestCase): config_text, ) self.assertIn(f"ngrok: {str(DEFAULT_NGROK).lower()}", config_text) + self.assertIn( + "collasible_reasoning: " + f"{str(DEFAULT_COLLAPSIBLE_REASONING).lower()}", + config_text, + ) self.assertEqual(stat.S_IMODE(config_path.stat().st_mode), 0o600) self.assertEqual(config.upstream_model, DEFAULT_UPSTREAM_MODEL) self.assertEqual(config.ngrok, DEFAULT_NGROK) + self.assertEqual( + config.collapsible_reasoning, + DEFAULT_COLLAPSIBLE_REASONING, + ) self.assertEqual( config.missing_reasoning_strategy, DEFAULT_MISSING_REASONING_STRATEGY ) @@ -117,6 +131,7 @@ class ConfigTests(unittest.TestCase): "max_request_body_bytes: 1234", "cors: true", "display_reasoning: false", + "collasible_reasoning: false", f"reasoning_content_path: {reasoning_content_path}", "missing_reasoning_strategy: reject", "reasoning_cache_max_age_seconds: 60", @@ -139,7 +154,8 @@ class ConfigTests(unittest.TestCase): self.assertEqual(config.request_timeout, 123.5) self.assertEqual(config.max_request_body_bytes, 1234) self.assertTrue(config.cors) - self.assertFalse(config.cursor_display_reasoning) + self.assertFalse(config.display_reasoning) + self.assertFalse(config.collapsible_reasoning) self.assertEqual(config.reasoning_content_path, reasoning_content_path) self.assertEqual(config.missing_reasoning_strategy, "reject") self.assertEqual(config.reasoning_cache_max_age_seconds, 60) @@ -155,6 +171,7 @@ class ConfigTests(unittest.TestCase): "missing_reasoning_strategy: maybe", "port: nope", "verbose: maybe", + "collasible_reasoning: maybe", ] ), encoding="utf-8", @@ -169,6 +186,10 @@ class ConfigTests(unittest.TestCase): self.assertEqual(config.port, DEFAULT_PORT) self.assertEqual(config.ngrok, DEFAULT_NGROK) self.assertEqual(config.verbose, DEFAULT_VERBOSE) + self.assertEqual( + config.collapsible_reasoning, + DEFAULT_COLLAPSIBLE_REASONING, + ) def test_relative_reasoning_content_path_in_config_is_relative_to_config_file( self, @@ -204,7 +225,16 @@ class ConfigTests(unittest.TestCase): config = ProxyConfig.from_file(config_path=config_path) - self.assertFalse(config.cursor_display_reasoning) + self.assertFalse(config.display_reasoning) + + def test_collapsible_reasoning_can_use_corrected_config_key(self) -> None: + with TemporaryDirectory() as temp_dir: + config_path = Path(temp_dir) / "config.yaml" + config_path.write_text("collapsible_reasoning: false\n", encoding="utf-8") + + config = ProxyConfig.from_file(config_path=config_path) + + self.assertFalse(config.collapsible_reasoning) def test_invalid_yaml_config_raises_value_error(self) -> None: with TemporaryDirectory() as temp_dir: diff --git a/tests/test_proxy_end_to_end.py b/tests/test_proxy_end_to_end.py index 9e5c61c..18d5cf8 100644 --- a/tests/test_proxy_end_to_end.py +++ b/tests/test_proxy_end_to_end.py @@ -1024,12 +1024,15 @@ class ReasoningStreamingProxyTests(unittest.TestCase): for line in body.splitlines() if line.startswith("data: {") ] - self.assertEqual(chunks[0]["choices"][0]["delta"]["content"], "\nNeed ") + self.assertEqual( + chunks[0]["choices"][0]["delta"]["content"], + "
\nThinking\n\nNeed ", + ) self.assertEqual(chunks[0]["choices"][0]["delta"]["reasoning_content"], "Need ") self.assertEqual(chunks[1]["choices"][0]["delta"]["content"], "context.") self.assertEqual( chunks[2]["choices"][0]["delta"]["content"], - "\n\n\n" + FINAL_CONTENT, + "\n
\n\n" + FINAL_CONTENT, ) stored_message = { @@ -1085,7 +1088,7 @@ class ReasoningStreamingProxyTests(unittest.TestCase): trace["upstream"]["stream"]["chunks"][0]["line"], ) self.assertIn( - "", + "
", trace["cursor_response"]["stream"]["chunks"][0]["line"], ) self.assertEqual( @@ -1116,7 +1119,7 @@ class ReasoningStreamingProxyTests(unittest.TestCase): ] self.assertEqual( chunks[2]["choices"][0]["delta"]["content"], - "\n\n\n" + RECOVERY_NOTICE_CONTENT + FINAL_CONTENT, + "\n
\n\n" + RECOVERY_NOTICE_CONTENT + FINAL_CONTENT, ) diff --git a/tests/test_server.py b/tests/test_server.py index 577ca41..69dd7da 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -80,6 +80,7 @@ class ServerTests(unittest.TestCase): "--no-ngrok", "--no-verbose", "--no-display-reasoning", + "--no-collasible-resoning", "--cors", "--trace-dir", "/tmp/dcp-traces", @@ -89,6 +90,7 @@ class ServerTests(unittest.TestCase): self.assertFalse(args.ngrok) self.assertFalse(args.verbose) self.assertFalse(args.display_reasoning) + self.assertFalse(args.collapsible_reasoning) self.assertTrue(args.cors) self.assertEqual(args.trace_dir, Path("/tmp/dcp-traces")) @@ -206,6 +208,48 @@ class ServerTests(unittest.TestCase): "\n".join(captured.output), ) + def test_collapsible_reasoning_has_no_effect_when_display_is_disabled( + self, + ) -> None: + wfile = BytesIO() + handler = make_proxy_handler(wfile) + handler.server.config = ProxyConfig( + display_reasoning=False, + collapsible_reasoning=True, + ) + chunk = { + "id": "chatcmpl-stream", + "model": "deepseek-v4-pro", + "choices": [ + { + "index": 0, + "delta": {"reasoning_content": "Need context."}, + } + ], + } + response = FakeStreamingResponse( + [ + f"data: {json.dumps(chunk)}\n\n".encode("utf-8"), + b"data: [DONE]\n\n", + ] + ) + + try: + sent = handler._proxy_streaming_response( + response, + "deepseek-v4-pro", + [{"role": "user", "content": "hi"}], + "cache-namespace", + ) + finally: + handler.server.reasoning_store.close() + + body = wfile.getvalue().decode("utf-8") + self.assertTrue(sent.sent) + self.assertIn("reasoning_content", body) + self.assertNotIn("
", body) + self.assertNotIn("", body) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_streaming.py b/tests/test_streaming.py index ade94c0..d2019b1 100644 --- a/tests/test_streaming.py +++ b/tests/test_streaming.py @@ -298,7 +298,7 @@ class StreamAccumulatorTests(unittest.TestCase): class CursorReasoningDisplayAdapterTests(unittest.TestCase): - def test_mirrors_reasoning_content_into_think_tagged_content(self) -> None: + def test_mirrors_reasoning_content_into_details_content(self) -> None: adapter = CursorReasoningDisplayAdapter() reasoning_chunk = { "id": "chatcmpl-stream", @@ -329,8 +329,43 @@ class CursorReasoningDisplayAdapterTests(unittest.TestCase): reasoning_delta = reasoning_chunk["choices"][0]["delta"] answer_delta = answer_chunk["choices"][0]["delta"] self.assertEqual(reasoning_delta["reasoning_content"], "Need context.") - self.assertEqual(reasoning_delta["content"], "\nNeed context.") - self.assertEqual(answer_delta["content"], "\n\n\nFinal answer.") + self.assertEqual( + reasoning_delta["content"], + "
\nThinking\n\nNeed context.", + ) + self.assertEqual(answer_delta["content"], "\n
\n\nFinal answer.") + + def test_can_mirror_reasoning_content_into_legacy_think_content(self) -> None: + adapter = CursorReasoningDisplayAdapter(collapsible=False) + reasoning_chunk = { + "choices": [ + { + "index": 0, + "delta": {"reasoning_content": "Need context."}, + "finish_reason": None, + } + ], + } + answer_chunk = { + "choices": [ + { + "index": 0, + "delta": {"content": "Final answer."}, + "finish_reason": None, + } + ], + } + + adapter.rewrite_chunk(reasoning_chunk) + adapter.rewrite_chunk(answer_chunk) + + self.assertEqual( + reasoning_chunk["choices"][0]["delta"]["content"], "\nNeed context." + ) + self.assertEqual( + answer_chunk["choices"][0]["delta"]["content"], + "\n\n\nFinal answer.", + ) def test_closes_thinking_block_before_tool_calls(self) -> None: adapter = CursorReasoningDisplayAdapter() @@ -364,7 +399,9 @@ class CursorReasoningDisplayAdapterTests(unittest.TestCase): adapter.rewrite_chunk(tool_chunk) - self.assertEqual(tool_chunk["choices"][0]["delta"]["content"], "\n
\n\n") + self.assertEqual( + tool_chunk["choices"][0]["delta"]["content"], "\n
\n\n" + ) def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None: adapter = CursorReasoningDisplayAdapter() @@ -388,7 +425,7 @@ class CursorReasoningDisplayAdapterTests(unittest.TestCase): assert closing_chunk is not None self.assertEqual(closing_chunk["model"], "deepseek-v4-pro") self.assertEqual( - closing_chunk["choices"][0]["delta"]["content"], "\n
\n\n" + closing_chunk["choices"][0]["delta"]["content"], "\n\n\n" ) self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro")) diff --git a/tests/test_transform.py b/tests/test_transform.py index ffcb01c..6e8aac2 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -60,6 +60,27 @@ class TransformTests(unittest.TestCase): ), "Final answer.", ) + self.assertEqual( + strip_cursor_thinking_blocks( + "
\n" + "Thinking\n\n" + "Need context.\n" + "
\n\n" + "Final answer." + ), + "Final answer.", + ) + + def test_preserves_regular_markdown_details_in_assistant_content(self) -> None: + content = ( + "
\n" + "Example\n\n" + "Visible details.\n" + "
\n\n" + "Final answer." + ) + + self.assertEqual(strip_cursor_thinking_blocks(content), content) def test_prepares_assistant_content_without_mirrored_thinking_blocks( self, @@ -70,7 +91,13 @@ class TransformTests(unittest.TestCase): {"role": "user", "content": "hello"}, { "role": "assistant", - "content": "\nHidden.\n\n\nVisible answer.", + "content": ( + "
\n" + "Thinking\n\n" + "Hidden.\n" + "
\n\n" + "Visible answer." + ), }, {"role": "user", "content": "continue"}, ], @@ -736,7 +763,12 @@ class TransformTests(unittest.TestCase): *prior, { "role": "assistant", - "content": "\nNeed to call the file tool.\n\n\n", + "content": ( + "
\n" + "Thinking\n\n" + "Need to call the file tool.\n" + "
\n\n" + ), "tool_calls": [tool_call], }, ],