feat(proxy): mirror reasoning as think tags for cursor (#2)
parent
27f332616a
commit
1717331057
|
|
@ -10,6 +10,7 @@ DEEPSEEK_MODEL=deepseek-v4-pro
|
||||||
DEEPSEEK_BASE_URL=https://api.deepseek.com
|
DEEPSEEK_BASE_URL=https://api.deepseek.com
|
||||||
DEEPSEEK_THINKING=enabled
|
DEEPSEEK_THINKING=enabled
|
||||||
DEEPSEEK_REASONING_EFFORT=high
|
DEEPSEEK_REASONING_EFFORT=high
|
||||||
|
CURSOR_DISPLAY_REASONING=true
|
||||||
|
|
||||||
PROXY_HOST=127.0.0.1
|
PROXY_HOST=127.0.0.1
|
||||||
PROXY_PORT=9000
|
PROXY_PORT=9000
|
||||||
|
|
|
||||||
17
README.md
17
README.md
|
|
@ -2,11 +2,19 @@
|
||||||
|
|
||||||
A simple proxy that caches and restores DeepSeek `reasoning_content` across tool-call turns in Cursor, making thinking models like `deepseek-v4-pro` and `deepseek-v4-flash` work correctly.
|
A simple proxy that caches and restores DeepSeek `reasoning_content` across tool-call turns in Cursor, making thinking models like `deepseek-v4-pro` and `deepseek-v4-flash` work correctly.
|
||||||
|
|
||||||
|
## What It Does
|
||||||
|
|
||||||
|
- Caches DeepSeek `reasoning_content` from regular and streamed responses, then restores it on later tool-call turns when Cursor omits it.
|
||||||
|
- Mirrors streamed `reasoning_content` into Cursor-visible `<think>...</think>` text so thinking tokens are shown in Cursor BYOK/proxy chats. Cursor currently renders this as normal chat text, not as a native collapsible Thinking block.
|
||||||
|
- Provides other compatibility fixes for running Cursor with the DeepSeek official API.
|
||||||
|
|
||||||
## Why This Exists
|
## Why This Exists
|
||||||
|
|
||||||
DeepSeek thinking mode returns `reasoning_content` separately from final `content`. After an assistant turn with tool calls, DeepSeek requires that same `reasoning_content` to be sent back in later requests. Cursor can omit it in custom OpenAI-compatible flows, causing `The reasoning_content in the thinking mode must be passed back to the API.` This proxy caches reasoning by conversation prefix, message signature, and tool-call IDs, then restores it before forwarding to DeepSeek.
|
DeepSeek thinking mode returns `reasoning_content` separately from final `content`. After an assistant turn with tool calls, DeepSeek requires that same `reasoning_content` to be sent back in later requests. Cursor can omit it in custom OpenAI-compatible flows, causing `The reasoning_content in the thinking mode must be passed back to the API.` This proxy caches reasoning by conversation prefix, message signature, and tool-call IDs, then restores it before forwarding to DeepSeek.
|
||||||
|
|
||||||
Thi repo fixes the following error:
|
For streamed responses, the proxy also mirrors DeepSeek `reasoning_content` into Cursor-visible `<think>...</think>` content while leaving the original `reasoning_content` field intact. This lets Cursor display the thinking text in OpenAI-compatible BYOK/proxy flows, and the proxy strips those display-only tags from later assistant history before replaying it to DeepSeek.
|
||||||
|
|
||||||
|
This repo fixes the following error:
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
|
@ -41,6 +49,7 @@ Edit `~/.deepseek-cursor-proxy/.env`:
|
||||||
```bash
|
```bash
|
||||||
DEEPSEEK_API_KEY=sk-your-deepseek-key
|
DEEPSEEK_API_KEY=sk-your-deepseek-key
|
||||||
PROXY_API_KEY=cursor-local-token
|
PROXY_API_KEY=cursor-local-token
|
||||||
|
CURSOR_DISPLAY_REASONING=true
|
||||||
```
|
```
|
||||||
|
|
||||||
Keep `PROXY_API_KEY` set when using ngrok because the proxy will be reachable from the public internet.
|
Keep `PROXY_API_KEY` set when using ngrok because the proxy will be reachable from the public internet.
|
||||||
|
|
@ -91,6 +100,12 @@ Run without ngrok for local curl testing:
|
||||||
PROXY_NGROK=false deepseek-cursor-proxy --port 9000 --verbose
|
PROXY_NGROK=false deepseek-cursor-proxy --port 9000 --verbose
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Disable the Cursor display mirror if you only want raw OpenAI-compatible response fields:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CURSOR_DISPLAY_REASONING=false deepseek-cursor-proxy --verbose
|
||||||
|
```
|
||||||
|
|
||||||
Log full request bodies only when needed:
|
Log full request bodies only when needed:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
|
||||||
|
|
@ -129,6 +129,7 @@ class ProxyConfig:
|
||||||
reasoning_effort: str = "high"
|
reasoning_effort: str = "high"
|
||||||
request_timeout: float = 300.0
|
request_timeout: float = 300.0
|
||||||
reasoning_content_path: Path = field(default_factory=default_reasoning_content_path)
|
reasoning_content_path: Path = field(default_factory=default_reasoning_content_path)
|
||||||
|
cursor_display_reasoning: bool = True
|
||||||
verbose: bool = False
|
verbose: bool = False
|
||||||
log_bodies: bool = False
|
log_bodies: bool = False
|
||||||
ngrok: bool = False
|
ngrok: bool = False
|
||||||
|
|
@ -167,6 +168,7 @@ class ProxyConfig:
|
||||||
("REASONING_CONTENT_PATH",),
|
("REASONING_CONTENT_PATH",),
|
||||||
default_reasoning_content_path(),
|
default_reasoning_content_path(),
|
||||||
),
|
),
|
||||||
|
cursor_display_reasoning=env_bool(values, "CURSOR_DISPLAY_REASONING", True),
|
||||||
verbose=env_bool(values, "PROXY_VERBOSE", False),
|
verbose=env_bool(values, "PROXY_VERBOSE", False),
|
||||||
log_bodies=env_bool(values, "PROXY_LOG_BODIES", False),
|
log_bodies=env_bool(values, "PROXY_LOG_BODIES", False),
|
||||||
ngrok=env_bool(values, "PROXY_NGROK", False),
|
ngrok=env_bool(values, "PROXY_NGROK", False),
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ from .config import (
|
||||||
default_reasoning_content_path,
|
default_reasoning_content_path,
|
||||||
)
|
)
|
||||||
from .reasoning_store import ReasoningStore, conversation_scope
|
from .reasoning_store import ReasoningStore, conversation_scope
|
||||||
from .streaming import StreamAccumulator
|
from .streaming import CursorReasoningDisplayAdapter, StreamAccumulator
|
||||||
from .tunnel import NgrokTunnel, local_tunnel_target
|
from .tunnel import NgrokTunnel, local_tunnel_target
|
||||||
from .transform import prepare_upstream_request, rewrite_response_body
|
from .transform import prepare_upstream_request, rewrite_response_body
|
||||||
|
|
||||||
|
|
@ -319,16 +319,20 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
|
||||||
self.close_connection = True
|
self.close_connection = True
|
||||||
|
|
||||||
accumulator = StreamAccumulator()
|
accumulator = StreamAccumulator()
|
||||||
|
display_adapter = (
|
||||||
|
CursorReasoningDisplayAdapter()
|
||||||
|
if self.config.cursor_display_reasoning
|
||||||
|
else None
|
||||||
|
)
|
||||||
scope = conversation_scope(request_messages)
|
scope = conversation_scope(request_messages)
|
||||||
finalized = False
|
finalized = False
|
||||||
while True:
|
while True:
|
||||||
line = response.readline()
|
line = response.readline()
|
||||||
if not line:
|
if not line:
|
||||||
break
|
break
|
||||||
rewritten = self._rewrite_sse_line(line, original_model, accumulator, scope)
|
rewritten, finalized = self._rewrite_sse_line(
|
||||||
if rewritten is None:
|
line, original_model, accumulator, scope, display_adapter
|
||||||
finalized = True
|
)
|
||||||
rewritten = b"data: [DONE]\n\n"
|
|
||||||
self.wfile.write(rewritten)
|
self.wfile.write(rewritten)
|
||||||
self.wfile.flush()
|
self.wfile.flush()
|
||||||
if finalized:
|
if finalized:
|
||||||
|
|
@ -347,10 +351,11 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
|
||||||
original_model: str,
|
original_model: str,
|
||||||
accumulator: StreamAccumulator,
|
accumulator: StreamAccumulator,
|
||||||
scope: str,
|
scope: str,
|
||||||
) -> bytes | None:
|
display_adapter: CursorReasoningDisplayAdapter | None,
|
||||||
|
) -> tuple[bytes, bool]:
|
||||||
stripped = line.strip()
|
stripped = line.strip()
|
||||||
if not stripped.startswith(b"data:"):
|
if not stripped.startswith(b"data:"):
|
||||||
return line
|
return line, False
|
||||||
|
|
||||||
data = stripped[len(b"data:") :].strip()
|
data = stripped[len(b"data:") :].strip()
|
||||||
if data == b"[DONE]":
|
if data == b"[DONE]":
|
||||||
|
|
@ -359,15 +364,22 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
|
||||||
stored = accumulator.store_reasoning(self.reasoning_store, scope)
|
stored = accumulator.store_reasoning(self.reasoning_store, scope)
|
||||||
if stored:
|
if stored:
|
||||||
LOG.info("stored %s streaming reasoning cache key(s)", stored)
|
LOG.info("stored %s streaming reasoning cache key(s)", stored)
|
||||||
return None
|
if display_adapter is None:
|
||||||
|
return b"data: [DONE]\n\n", True
|
||||||
|
closing_chunk = display_adapter.flush_chunk(original_model)
|
||||||
|
if closing_chunk is None:
|
||||||
|
return b"data: [DONE]\n\n", True
|
||||||
|
return sse_data(closing_chunk) + b"data: [DONE]\n\n", True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
chunk = json.loads(data.decode("utf-8"))
|
chunk = json.loads(data.decode("utf-8"))
|
||||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||||
return line
|
return line, False
|
||||||
|
|
||||||
if isinstance(chunk, dict):
|
if isinstance(chunk, dict):
|
||||||
accumulator.ingest_chunk(chunk)
|
accumulator.ingest_chunk(chunk)
|
||||||
|
if display_adapter is not None:
|
||||||
|
display_adapter.rewrite_chunk(chunk)
|
||||||
if "model" in chunk:
|
if "model" in chunk:
|
||||||
chunk["model"] = original_model
|
chunk["model"] = original_model
|
||||||
ending = b"\r\n" if line.endswith(b"\r\n") else b"\n"
|
ending = b"\r\n" if line.endswith(b"\r\n") else b"\n"
|
||||||
|
|
@ -377,8 +389,8 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
|
||||||
"utf-8"
|
"utf-8"
|
||||||
)
|
)
|
||||||
+ ending
|
+ ending
|
||||||
)
|
), False
|
||||||
return line
|
return line, False
|
||||||
|
|
||||||
|
|
||||||
def build_arg_parser() -> argparse.ArgumentParser:
|
def build_arg_parser() -> argparse.ArgumentParser:
|
||||||
|
|
@ -424,6 +436,11 @@ def build_arg_parser() -> argparse.ArgumentParser:
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Log normalized upstream request bodies",
|
help="Log normalized upstream request bodies",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-cursor-display-reasoning",
|
||||||
|
action="store_true",
|
||||||
|
help="Do not mirror reasoning_content into Cursor-visible <think> content",
|
||||||
|
)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -448,6 +465,14 @@ def log_bytes(label: str, body: bytes) -> None:
|
||||||
log_json(label, payload)
|
log_json(label, payload)
|
||||||
|
|
||||||
|
|
||||||
|
def sse_data(payload: dict[str, Any]) -> bytes:
|
||||||
|
return (
|
||||||
|
b"data: "
|
||||||
|
+ json.dumps(payload, ensure_ascii=False, separators=(",", ":")).encode("utf-8")
|
||||||
|
+ b"\n\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def summarize_chat_payload(payload: dict[str, Any]) -> str:
|
def summarize_chat_payload(payload: dict[str, Any]) -> str:
|
||||||
messages = payload.get("messages")
|
messages = payload.get("messages")
|
||||||
tools = payload.get("tools")
|
tools = payload.get("tools")
|
||||||
|
|
@ -498,6 +523,8 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
updates["verbose"] = True
|
updates["verbose"] = True
|
||||||
if args.log_bodies:
|
if args.log_bodies:
|
||||||
updates["log_bodies"] = True
|
updates["log_bodies"] = True
|
||||||
|
if args.no_cursor_display_reasoning:
|
||||||
|
updates["cursor_display_reasoning"] = False
|
||||||
if updates:
|
if updates:
|
||||||
config = replace(config, **updates)
|
config = replace(config, **updates)
|
||||||
|
|
||||||
|
|
@ -519,9 +546,10 @@ def main(argv: list[str] | None = None) -> int:
|
||||||
config.upstream_model,
|
config.upstream_model,
|
||||||
)
|
)
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"thinking=%s reasoning_effort=%s reasoning_content_path=%s",
|
"thinking=%s reasoning_effort=%s cursor_display_reasoning=%s reasoning_content_path=%s",
|
||||||
config.thinking,
|
config.thinking,
|
||||||
config.reasoning_effort,
|
config.reasoning_effort,
|
||||||
|
config.cursor_display_reasoning,
|
||||||
config.reasoning_content_path,
|
config.reasoning_content_path,
|
||||||
)
|
)
|
||||||
if config.verbose:
|
if config.verbose:
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,16 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
import time
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from .reasoning_store import ReasoningStore
|
from .reasoning_store import ReasoningStore
|
||||||
|
|
||||||
|
|
||||||
|
THINKING_BLOCK_START = "<think>\n"
|
||||||
|
THINKING_BLOCK_END = "\n</think>\n\n"
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class StreamingChoice:
|
class StreamingChoice:
|
||||||
role: str = "assistant"
|
role: str = "assistant"
|
||||||
|
|
@ -109,3 +114,80 @@ class StreamAccumulator:
|
||||||
function["arguments"] = (function.get("arguments") or "") + str(
|
function["arguments"] = (function.get("arguments") or "") + str(
|
||||||
function_delta["arguments"]
|
function_delta["arguments"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CursorReasoningDisplayAdapter:
|
||||||
|
"""Mirror reasoning_content into content for Cursor's visible thinking UI path."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._open_choices: set[int] = set()
|
||||||
|
self._last_chunk_metadata: dict[str, Any] = {}
|
||||||
|
|
||||||
|
def rewrite_chunk(self, chunk: dict[str, Any]) -> None:
|
||||||
|
self._remember_chunk_metadata(chunk)
|
||||||
|
choices = chunk.get("choices")
|
||||||
|
if not isinstance(choices, list):
|
||||||
|
return
|
||||||
|
|
||||||
|
for raw_choice in choices:
|
||||||
|
if not isinstance(raw_choice, dict):
|
||||||
|
continue
|
||||||
|
index = int(raw_choice.get("index") or 0)
|
||||||
|
delta = raw_choice.get("delta")
|
||||||
|
if not isinstance(delta, dict):
|
||||||
|
delta = {}
|
||||||
|
raw_choice["delta"] = delta
|
||||||
|
|
||||||
|
mirrored_parts: list[str] = []
|
||||||
|
reasoning_content = delta.get("reasoning_content")
|
||||||
|
if isinstance(reasoning_content, str) and reasoning_content:
|
||||||
|
if index not in self._open_choices:
|
||||||
|
mirrored_parts.append(THINKING_BLOCK_START)
|
||||||
|
self._open_choices.add(index)
|
||||||
|
mirrored_parts.append(reasoning_content)
|
||||||
|
|
||||||
|
existing_content = delta.get("content")
|
||||||
|
should_close = index in self._open_choices and (
|
||||||
|
bool(existing_content)
|
||||||
|
or bool(delta.get("tool_calls"))
|
||||||
|
or raw_choice.get("finish_reason") is not None
|
||||||
|
)
|
||||||
|
if should_close:
|
||||||
|
mirrored_parts.append(THINKING_BLOCK_END)
|
||||||
|
self._open_choices.discard(index)
|
||||||
|
|
||||||
|
if not mirrored_parts:
|
||||||
|
continue
|
||||||
|
if isinstance(existing_content, str):
|
||||||
|
mirrored_parts.append(existing_content)
|
||||||
|
delta["content"] = "".join(mirrored_parts)
|
||||||
|
|
||||||
|
def flush_chunk(self, model: str) -> dict[str, Any] | None:
|
||||||
|
if not self._open_choices:
|
||||||
|
return None
|
||||||
|
|
||||||
|
choices = [
|
||||||
|
{
|
||||||
|
"index": index,
|
||||||
|
"delta": {"content": THINKING_BLOCK_END},
|
||||||
|
"finish_reason": None,
|
||||||
|
}
|
||||||
|
for index in sorted(self._open_choices)
|
||||||
|
]
|
||||||
|
self._open_choices.clear()
|
||||||
|
|
||||||
|
chunk: dict[str, Any] = {
|
||||||
|
"id": self._last_chunk_metadata.get("id", "chatcmpl-reasoning-close"),
|
||||||
|
"object": self._last_chunk_metadata.get("object", "chat.completion.chunk"),
|
||||||
|
"created": self._last_chunk_metadata.get("created", int(time.time())),
|
||||||
|
"model": model,
|
||||||
|
"choices": choices,
|
||||||
|
}
|
||||||
|
return chunk
|
||||||
|
|
||||||
|
def _remember_chunk_metadata(self, chunk: dict[str, Any]) -> None:
|
||||||
|
metadata = {
|
||||||
|
key: chunk[key] for key in ("id", "object", "created") if key in chunk
|
||||||
|
}
|
||||||
|
if metadata:
|
||||||
|
self._last_chunk_metadata.update(metadata)
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from .config import ProxyConfig
|
from .config import ProxyConfig
|
||||||
|
|
@ -60,6 +61,11 @@ EFFORT_ALIASES = {
|
||||||
"xhigh": "max",
|
"xhigh": "max",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CURSOR_THINKING_BLOCK_RE = re.compile(
|
||||||
|
r"<(?:think|thinking)>[\s\S]*?(?:</(?:think|thinking)>|$)\s*",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class PreparedRequest:
|
class PreparedRequest:
|
||||||
|
|
@ -102,6 +108,10 @@ def extract_text_content(content: Any) -> str | None:
|
||||||
return str(content)
|
return str(content)
|
||||||
|
|
||||||
|
|
||||||
|
def strip_cursor_thinking_blocks(content: str) -> str:
|
||||||
|
return CURSOR_THINKING_BLOCK_RE.sub("", content).lstrip("\r\n")
|
||||||
|
|
||||||
|
|
||||||
def normalize_tool_call(tool_call: Any) -> dict[str, Any]:
|
def normalize_tool_call(tool_call: Any) -> dict[str, Any]:
|
||||||
if not isinstance(tool_call, dict):
|
if not isinstance(tool_call, dict):
|
||||||
tool_call = {}
|
tool_call = {}
|
||||||
|
|
@ -190,6 +200,8 @@ def normalize_message(
|
||||||
normalized["content"] = extract_text_content(normalized["content"]) or ""
|
normalized["content"] = extract_text_content(normalized["content"]) or ""
|
||||||
elif normalized["role"] in {"assistant", "tool", "system", "user"}:
|
elif normalized["role"] in {"assistant", "tool", "system", "user"}:
|
||||||
normalized["content"] = ""
|
normalized["content"] = ""
|
||||||
|
if normalized["role"] == "assistant" and isinstance(normalized.get("content"), str):
|
||||||
|
normalized["content"] = strip_cursor_thinking_blocks(normalized["content"])
|
||||||
|
|
||||||
if normalized.get("tool_calls"):
|
if normalized.get("tool_calls"):
|
||||||
normalized["tool_calls"] = [
|
normalized["tool_calls"] = [
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,17 @@ class ConfigTests(unittest.TestCase):
|
||||||
self.assertTrue(config.log_bodies)
|
self.assertTrue(config.log_bodies)
|
||||||
self.assertTrue(config.ngrok)
|
self.assertTrue(config.ngrok)
|
||||||
|
|
||||||
|
def test_cursor_reasoning_display_can_be_disabled_from_env(self) -> None:
|
||||||
|
config = ProxyConfig.from_env(
|
||||||
|
env={
|
||||||
|
"DEEPSEEK_API_KEY": "key",
|
||||||
|
"CURSOR_DISPLAY_REASONING": "false",
|
||||||
|
},
|
||||||
|
env_file_path=Path("/does/not/exist"),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertFalse(config.cursor_display_reasoning)
|
||||||
|
|
||||||
def test_config_path_can_be_overridden_from_environment(self) -> None:
|
def test_config_path_can_be_overridden_from_environment(self) -> None:
|
||||||
with TemporaryDirectory() as temp_dir:
|
with TemporaryDirectory() as temp_dir:
|
||||||
first_env_path = Path(temp_dir) / "first.env"
|
first_env_path = Path(temp_dir) / "first.env"
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,11 @@ from urllib.error import HTTPError
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
from deepseek_cursor_proxy.config import ProxyConfig
|
from deepseek_cursor_proxy.config import ProxyConfig
|
||||||
from deepseek_cursor_proxy.reasoning_store import ReasoningStore
|
from deepseek_cursor_proxy.reasoning_store import (
|
||||||
|
ReasoningStore,
|
||||||
|
conversation_scope,
|
||||||
|
message_signature,
|
||||||
|
)
|
||||||
from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
|
from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -184,6 +188,68 @@ class SlowAfterDoneStreamingDeepSeekHandler(BaseHTTPRequestHandler):
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
||||||
|
class ReasoningStreamingDeepSeekHandler(BaseHTTPRequestHandler):
|
||||||
|
def log_message(self, fmt: str, *args: object) -> None:
|
||||||
|
return
|
||||||
|
|
||||||
|
def do_POST(self) -> None:
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/event-stream")
|
||||||
|
self.end_headers()
|
||||||
|
chunks = [
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-stream",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": 1,
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"role": "assistant", "reasoning_content": "Need "},
|
||||||
|
"finish_reason": None,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-stream",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": 1,
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"reasoning_content": "context."},
|
||||||
|
"finish_reason": None,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-stream",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": 1,
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"content": FINAL_CONTENT},
|
||||||
|
"finish_reason": None,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-stream",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": 1,
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
for chunk in chunks:
|
||||||
|
self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode("utf-8"))
|
||||||
|
self.wfile.write(b"data: [DONE]\n\n")
|
||||||
|
self.wfile.flush()
|
||||||
|
|
||||||
|
|
||||||
def tool_call_response() -> dict:
|
def tool_call_response() -> dict:
|
||||||
return {
|
return {
|
||||||
"id": "chatcmpl-tool",
|
"id": "chatcmpl-tool",
|
||||||
|
|
@ -555,6 +621,79 @@ class StreamingProxyTests(unittest.TestCase):
|
||||||
self.assertIn("data: [DONE]", body)
|
self.assertIn("data: [DONE]", body)
|
||||||
|
|
||||||
|
|
||||||
|
class ReasoningStreamingProxyTests(unittest.TestCase):
|
||||||
|
def setUp(self) -> None:
|
||||||
|
self.upstream = ServerFixture(
|
||||||
|
ThreadingHTTPServer(("127.0.0.1", 0), ReasoningStreamingDeepSeekHandler)
|
||||||
|
).start()
|
||||||
|
self.store = ReasoningStore(":memory:")
|
||||||
|
proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
|
||||||
|
proxy.config = ProxyConfig(
|
||||||
|
upstream_api_key="upstream-key",
|
||||||
|
proxy_api_key="cursor-local-token",
|
||||||
|
upstream_base_url=self.upstream.url,
|
||||||
|
upstream_model="deepseek-v4-pro",
|
||||||
|
)
|
||||||
|
proxy.reasoning_store = self.store
|
||||||
|
self.proxy = ServerFixture(proxy).start()
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
self.proxy.close()
|
||||||
|
self.upstream.close()
|
||||||
|
self.store.close()
|
||||||
|
|
||||||
|
def test_streaming_proxy_mirrors_reasoning_for_cursor_display(
|
||||||
|
self,
|
||||||
|
) -> None:
|
||||||
|
request_messages = [{"role": "user", "content": "stream reasoning"}]
|
||||||
|
request = Request(
|
||||||
|
f"{self.proxy.url}/v1/chat/completions",
|
||||||
|
data=json.dumps(
|
||||||
|
{
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"stream": True,
|
||||||
|
"messages": request_messages,
|
||||||
|
}
|
||||||
|
).encode("utf-8"),
|
||||||
|
method="POST",
|
||||||
|
headers={
|
||||||
|
"Authorization": "Bearer cursor-local-token",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
with urlopen(request, timeout=2) as response:
|
||||||
|
body = response.read().decode("utf-8")
|
||||||
|
|
||||||
|
chunks = [
|
||||||
|
json.loads(line.removeprefix("data: "))
|
||||||
|
for line in body.splitlines()
|
||||||
|
if line.startswith("data: {")
|
||||||
|
]
|
||||||
|
self.assertEqual(chunks[0]["choices"][0]["delta"]["content"], "<think>\nNeed ")
|
||||||
|
self.assertEqual(chunks[0]["choices"][0]["delta"]["reasoning_content"], "Need ")
|
||||||
|
self.assertEqual(chunks[1]["choices"][0]["delta"]["content"], "context.")
|
||||||
|
self.assertEqual(
|
||||||
|
chunks[2]["choices"][0]["delta"]["content"],
|
||||||
|
"\n</think>\n\n" + FINAL_CONTENT,
|
||||||
|
)
|
||||||
|
|
||||||
|
stored_message = {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": FINAL_CONTENT,
|
||||||
|
"reasoning_content": "Need context.",
|
||||||
|
}
|
||||||
|
self.assertEqual(
|
||||||
|
self.store.get(
|
||||||
|
"scope:"
|
||||||
|
+ conversation_scope(request_messages)
|
||||||
|
+ ":signature:"
|
||||||
|
+ message_signature(stored_message)
|
||||||
|
),
|
||||||
|
"Need context.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def first_cursor_request() -> dict:
|
def first_cursor_request() -> dict:
|
||||||
return {
|
return {
|
||||||
"model": "deepseek-v4-pro",
|
"model": "deepseek-v4-pro",
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,10 @@ from __future__ import annotations
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_scope
|
from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_scope
|
||||||
from deepseek_cursor_proxy.streaming import StreamAccumulator
|
from deepseek_cursor_proxy.streaming import (
|
||||||
|
CursorReasoningDisplayAdapter,
|
||||||
|
StreamAccumulator,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class StreamAccumulatorTests(unittest.TestCase):
|
class StreamAccumulatorTests(unittest.TestCase):
|
||||||
|
|
@ -103,5 +106,101 @@ class StreamAccumulatorTests(unittest.TestCase):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CursorReasoningDisplayAdapterTests(unittest.TestCase):
|
||||||
|
def test_mirrors_reasoning_content_into_think_tagged_content(self) -> None:
|
||||||
|
adapter = CursorReasoningDisplayAdapter()
|
||||||
|
reasoning_chunk = {
|
||||||
|
"id": "chatcmpl-stream",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": 1,
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"reasoning_content": "Need context."},
|
||||||
|
"finish_reason": None,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
answer_chunk = {
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"content": "Final answer."},
|
||||||
|
"finish_reason": None,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
adapter.rewrite_chunk(reasoning_chunk)
|
||||||
|
adapter.rewrite_chunk(answer_chunk)
|
||||||
|
|
||||||
|
reasoning_delta = reasoning_chunk["choices"][0]["delta"]
|
||||||
|
answer_delta = answer_chunk["choices"][0]["delta"]
|
||||||
|
self.assertEqual(reasoning_delta["reasoning_content"], "Need context.")
|
||||||
|
self.assertEqual(reasoning_delta["content"], "<think>\nNeed context.")
|
||||||
|
self.assertEqual(answer_delta["content"], "\n</think>\n\nFinal answer.")
|
||||||
|
|
||||||
|
def test_closes_thinking_block_before_tool_calls(self) -> None:
|
||||||
|
adapter = CursorReasoningDisplayAdapter()
|
||||||
|
adapter.rewrite_chunk(
|
||||||
|
{
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"reasoning_content": "Need a tool."},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
tool_chunk = {
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"id": "call_1",
|
||||||
|
"type": "function",
|
||||||
|
"function": {"name": "lookup", "arguments": "{}"},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
adapter.rewrite_chunk(tool_chunk)
|
||||||
|
|
||||||
|
self.assertEqual(tool_chunk["choices"][0]["delta"]["content"], "\n</think>\n\n")
|
||||||
|
|
||||||
|
def test_flush_chunk_closes_unfinished_thinking_block_at_done(self) -> None:
|
||||||
|
adapter = CursorReasoningDisplayAdapter()
|
||||||
|
adapter.rewrite_chunk(
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-stream",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": 1,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {"reasoning_content": "Still thinking."},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
closing_chunk = adapter.flush_chunk("deepseek-v4-pro")
|
||||||
|
|
||||||
|
self.assertIsNotNone(closing_chunk)
|
||||||
|
assert closing_chunk is not None
|
||||||
|
self.assertEqual(closing_chunk["model"], "deepseek-v4-pro")
|
||||||
|
self.assertEqual(
|
||||||
|
closing_chunk["choices"][0]["delta"]["content"], "\n</think>\n\n"
|
||||||
|
)
|
||||||
|
self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from deepseek_cursor_proxy.transform import (
|
||||||
extract_text_content,
|
extract_text_content,
|
||||||
prepare_upstream_request,
|
prepare_upstream_request,
|
||||||
rewrite_response_body,
|
rewrite_response_body,
|
||||||
|
strip_cursor_thinking_blocks,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -31,6 +32,37 @@ class TransformTests(unittest.TestCase):
|
||||||
"hello\n[image_url omitted by DeepSeek text proxy]\nworld",
|
"hello\n[image_url omitted by DeepSeek text proxy]\nworld",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_strips_cursor_display_thinking_blocks_from_assistant_content(
|
||||||
|
self,
|
||||||
|
) -> None:
|
||||||
|
self.assertEqual(
|
||||||
|
strip_cursor_thinking_blocks(
|
||||||
|
"<think>\nNeed context.\n</think>\n\nFinal answer."
|
||||||
|
),
|
||||||
|
"Final answer.",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_prepares_assistant_content_without_mirrored_thinking_blocks(
|
||||||
|
self,
|
||||||
|
) -> None:
|
||||||
|
payload = {
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "hello"},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "<think>\nHidden.\n</think>\n\nVisible answer.",
|
||||||
|
},
|
||||||
|
{"role": "user", "content": "continue"},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
prepared = prepare_upstream_request(
|
||||||
|
payload, ProxyConfig(upstream_api_key="key"), self.store
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(prepared.payload["messages"][1]["content"], "Visible answer.")
|
||||||
|
|
||||||
def test_prepares_thinking_request_and_converts_legacy_functions(self) -> None:
|
def test_prepares_thinking_request_and_converts_legacy_functions(self) -> None:
|
||||||
payload = {
|
payload = {
|
||||||
"model": "deepseek-v4-flash",
|
"model": "deepseek-v4-flash",
|
||||||
|
|
@ -349,6 +381,51 @@ class TransformTests(unittest.TestCase):
|
||||||
"Need to call the file tool.",
|
"Need to call the file tool.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_restores_reasoning_when_cursor_history_contains_mirrored_think_block(
|
||||||
|
self,
|
||||||
|
) -> None:
|
||||||
|
prior = [{"role": "user", "content": "inspect repo"}]
|
||||||
|
tool_call = {
|
||||||
|
"id": "call_original",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "read_file",
|
||||||
|
"arguments": '{"path":"README.md"}',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
self.store.store_assistant_message(
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"reasoning_content": "Need to call the file tool.",
|
||||||
|
"tool_calls": [tool_call],
|
||||||
|
},
|
||||||
|
conversation_scope(prior),
|
||||||
|
)
|
||||||
|
|
||||||
|
prepared = prepare_upstream_request(
|
||||||
|
{
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"messages": [
|
||||||
|
*prior,
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "<think>\nNeed to call the file tool.\n</think>\n\n",
|
||||||
|
"tool_calls": [tool_call],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
ProxyConfig(upstream_api_key="key"),
|
||||||
|
self.store,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(prepared.patched_reasoning_messages, 1)
|
||||||
|
self.assertEqual(prepared.payload["messages"][1]["content"], "")
|
||||||
|
self.assertEqual(
|
||||||
|
prepared.payload["messages"][1]["reasoning_content"],
|
||||||
|
"Need to call the file tool.",
|
||||||
|
)
|
||||||
|
|
||||||
def test_adds_fallback_reasoning_for_uncached_assistant_tool_call(self) -> None:
|
def test_adds_fallback_reasoning_for_uncached_assistant_tool_call(self) -> None:
|
||||||
payload = {
|
payload = {
|
||||||
"model": "deepseek-v4-pro",
|
"model": "deepseek-v4-pro",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue