refactor(proxy): audit thinking-mode protocol and refactor test suite (#33)

main
Yixing Lao 2026-05-01 19:48:08 +08:00 committed by GitHub
parent b65f0dd8a2
commit be0310751c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 2223 additions and 2894 deletions

3
.gitignore vendored
View File

@ -1,3 +1,6 @@
# AIs
.claude/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]

View File

@ -134,7 +134,7 @@ Select `deepseek-v4-pro` in Cursor and use chat or agent mode as usual.
## How It Works
- **Core fix:** DeepSeek's [thinking mode](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) requires `reasoning_content` from assistant tool-call messages to be passed back in subsequent requests, but Cursor omits this field, causing a 400 error. The proxy (`Cursor → ngrok → proxy → DeepSeek API`) stores `reasoning_content` from every DeepSeek response in a local SQLite cache, keyed by message signature, tool-call ID, and tool-call function signature, and patches outgoing requests with missing `reasoning_content` before they reach DeepSeek. On a cold cache (proxy restart, model switch), it logs and drops unrecoverable history, continues from the latest user request, and prefixes the next Cursor response with a notice.
- **Core fix:** DeepSeek [thinking-mode tool calls](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) require the complete **multi-round** `reasoning_content` chain to be sent back in later requests. Cursor omits that field, causing a 400 error. The proxy (`Cursor -> ngrok -> proxy -> DeepSeek API`) stores DeepSeek's original `reasoning_content` and patches missing blocks back into outgoing tool-call history.
- **Multi-conversation isolation:** To avoid collisions across concurrent conversations, the proxy scopes cache keys by a SHA-256 hash of the canonical conversation prefix (roles, content, and tool calls, excluding `reasoning_content`) plus the upstream model, configuration, and an API-key hash. Different threads get different scopes, so reused tool-call IDs do not collide. Byte-identical cloned histories produce identical scopes.
- **Context caching compatibility:** The proxy preserves compatibility by never injecting synthetic thread IDs, timestamps, or cache-control messages. It restores `reasoning_content` as the exact original string, so repeated prefixes remain intact for [DeepSeek context cache](https://api-docs.deepseek.com/guides/kv_cache). Cache hit rates are logged in the terminal output.
- **Additional compatibility fixes:** Beyond reasoning repair, the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases, strips mirrored thinking display blocks from assistant content, flattens multi-part content arrays to plain text, and mirrors `reasoning_content` into Cursor-visible Markdown details blocks.

View File

@ -172,8 +172,6 @@ def settings_from_config(
def normalize_thinking(value: Any) -> str:
thinking = as_str(value, DEFAULT_THINKING).strip().lower()
if thinking in {"passthrough", "pass-through", "pass_through"}:
return "pass-through"
if thinking in {"enabled", "disabled"}:
return thinking
return DEFAULT_THINKING

View File

@ -540,6 +540,8 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
scope=record_response_scope,
prior_messages=record_response_messages,
recording_contexts=record_response_contexts,
display_reasoning=self.config.display_reasoning,
collapsible_reasoning=self.config.collapsible_reasoning,
)
except (json.JSONDecodeError, UnicodeDecodeError) as exc:
LOG.warning("failed to rewrite upstream JSON response: %s", exc)
@ -812,7 +814,7 @@ def build_arg_parser() -> argparse.ArgumentParser:
)
parser.add_argument(
"--thinking",
choices=["enabled", "disabled", "pass-through"],
choices=["enabled", "disabled"],
help="DeepSeek thinking mode, default from config or enabled",
)
parser.add_argument(

View File

@ -292,3 +292,34 @@ class CursorReasoningDisplayAdapter:
}
if metadata:
self._last_chunk_metadata.update(metadata)
def fold_reasoning_into_content(
response_payload: dict[str, Any],
collapsible: bool,
) -> None:
"""Mirror `reasoning_content` into the visible `content` field for
non-streaming responses, matching the streaming `<details>` layout."""
block_start = (
COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
)
block_end = COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
choices = response_payload.get("choices")
if not isinstance(choices, list):
return
for choice in choices:
if not isinstance(choice, dict):
continue
message = choice.get("message")
if not isinstance(message, dict):
continue
reasoning = message.get("reasoning_content")
if not isinstance(reasoning, str) or not reasoning:
continue
content = message.get("content")
message["content"] = (
block_start
+ reasoning
+ block_end
+ (content if isinstance(content, str) else "")
)

View File

@ -3,6 +3,7 @@ from __future__ import annotations
from dataclasses import dataclass, field
import hashlib
import json
import logging
import re
from typing import Any
@ -15,6 +16,10 @@ from .reasoning_store import (
tool_call_signature,
turn_context_signature,
)
from .streaming import fold_reasoning_into_content
LOG = logging.getLogger("deepseek_cursor_proxy")
SUPPORTED_REQUEST_FIELDS = {
@ -35,6 +40,13 @@ SUPPORTED_REQUEST_FIELDS = {
"frequency_penalty",
"logprobs",
"top_logprobs",
# Standard OpenAI Chat Completions fields that DeepSeek either honors or
# safely ignores. Cursor and most OpenAI SDKs send these unconditionally,
# so forwarding keeps clients happy and avoids log spam.
"user",
"seed",
"n",
"logit_bias",
}
MESSAGE_FIELDS = {
@ -83,10 +95,6 @@ CURSOR_THINKING_BLOCK_RE = re.compile(
)
RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history."
LEGACY_RECOVERY_NOTICE_TEXT = (
"Note: recovered this DeepSeek chat because older tool-call reasoning "
"was unavailable; continuing with recent context only."
)
RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n"
RECOVERY_SYSTEM_CONTENT = (
"deepseek-cursor-proxy recovered this request because older DeepSeek "
@ -460,10 +468,33 @@ def has_recovery_notice(message: dict[str, Any]) -> bool:
return (
message.get("role") == "assistant"
and isinstance(content, str)
and content.startswith((RECOVERY_NOTICE_TEXT, LEGACY_RECOVERY_NOTICE_TEXT))
and content.startswith(RECOVERY_NOTICE_TEXT)
)
def strip_recovery_notice_for_upstream(
messages: list[dict[str, Any]],
) -> list[dict[str, Any]]:
"""Cursor echoes the proxy's recovery notice back to us in later turns.
The notice serves as a boundary marker for the proxy, but DeepSeek must
not see proxy-generated prose. Return a copy with assistant prefixes
stripped; leave the input untouched so cache scopes/recording contexts
keep matching the with-prefix history that Cursor will send next time."""
stripped: list[dict[str, Any]] = []
for message in messages:
if message.get("role") != "assistant":
stripped.append(message)
continue
content = message.get("content")
if not isinstance(content, str) or not content.startswith(RECOVERY_NOTICE_TEXT):
stripped.append(message)
continue
cleaned = dict(message)
cleaned["content"] = content[len(RECOVERY_NOTICE_TEXT) :].lstrip("\r\n")
stripped.append(cleaned)
return stripped
def leading_system_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
leading_messages: list[dict[str, Any]] = []
for message in messages:
@ -628,6 +659,11 @@ def assistant_needs_reasoning_for_tool_context(
def upstream_model_for(original_model: str, config: ProxyConfig) -> str:
if original_model.startswith("deepseek-"):
return original_model
LOG.warning(
"rewriting non-DeepSeek model %r to configured fallback %r",
original_model,
config.upstream_model,
)
return config.upstream_model
@ -688,6 +724,16 @@ def prepare_upstream_request(
prepared = {
key: value for key, value in payload.items() if key in SUPPORTED_REQUEST_FIELDS
}
dropped_fields = sorted(
key
for key in payload.keys()
if key not in SUPPORTED_REQUEST_FIELDS
and key not in {"max_completion_tokens", "functions", "function_call"}
)
if dropped_fields:
LOG.warning(
"dropping unsupported request field(s): %s", ", ".join(dropped_fields)
)
if "max_tokens" not in prepared and "max_completion_tokens" in payload:
prepared["max_tokens"] = payload["max_completion_tokens"]
@ -719,14 +765,9 @@ def prepare_upstream_request(
if tool_choice is not None:
prepared["tool_choice"] = tool_choice
if config.thinking != "pass-through":
prepared["thinking"] = {"type": config.thinking}
thinking = prepared.get("thinking")
thinking_enabled = isinstance(thinking, dict) and thinking.get("type") == "enabled"
thinking_disabled = (
isinstance(thinking, dict) and thinking.get("type") == "disabled"
)
prepared["thinking"] = {"type": config.thinking}
thinking_enabled = config.thinking == "enabled"
thinking_disabled = config.thinking == "disabled"
if thinking_enabled:
prepared["reasoning_effort"] = normalize_reasoning_effort(
prepared.get("reasoning_effort") or config.reasoning_effort
@ -797,12 +838,12 @@ def prepare_upstream_request(
keep_reasoning=not thinking_disabled,
)
reasoning_diagnostics.extend(latest_diagnostics)
prepared["messages"] = messages
active_record_response_scope = conversation_scope(messages, cache_namespace)
record_response_contexts = response_recording_contexts(
(record_response_scope, record_response_messages),
(active_record_response_scope, messages),
)
prepared["messages"] = strip_recovery_notice_for_upstream(messages)
return PreparedRequest(
payload=prepared,
@ -874,6 +915,8 @@ def rewrite_response_body(
scope: str | None = None,
prior_messages: list[dict[str, Any]] | None = None,
recording_contexts: list[tuple[str, list[dict[str, Any]]]] | None = None,
display_reasoning: bool = False,
collapsible_reasoning: bool = True,
) -> bytes:
response_payload = json.loads(body.decode("utf-8"))
if isinstance(response_payload, dict):
@ -888,6 +931,8 @@ def rewrite_response_body(
prior_messages=prior_messages,
recording_contexts=recording_contexts,
)
if display_reasoning:
fold_reasoning_into_content(response_payload, collapsible_reasoning)
if "model" in response_payload:
response_payload["model"] = original_model
return json.dumps(

View File

@ -121,7 +121,7 @@ class ConfigTests(unittest.TestCase):
[
"base_url: https://example.com/v1/",
"model: deepseek-v4-flash",
"thinking: pass_through",
"thinking: disabled",
"reasoning_effort: max",
"port: 9100",
"host: 0.0.0.0",
@ -145,7 +145,7 @@ class ConfigTests(unittest.TestCase):
self.assertEqual(config.upstream_base_url, "https://example.com/v1")
self.assertEqual(config.upstream_model, "deepseek-v4-flash")
self.assertEqual(config.thinking, "pass-through")
self.assertEqual(config.thinking, "disabled")
self.assertEqual(config.reasoning_effort, "max")
self.assertEqual(config.host, "0.0.0.0")
self.assertEqual(config.port, 9100)

1327
tests/test_protocol.py Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,24 +1,45 @@
"""Server boundary, CLI, and operational tests.
Pure helper tests (gzip, summarize) and stub-handler tests (client
disconnect) live near the top. The bottom of the file boots a real proxy +
tiny upstream to exercise things that need the HTTP layer: bearer token
forwarding, oversized body, missing-bearer rejection, logging modes, and
streaming connection close.
"""
from __future__ import annotations
from dataclasses import replace
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from io import BytesIO
import gzip
import json
from pathlib import Path
import threading
import time
from types import SimpleNamespace
import unittest
import zlib
from urllib.error import HTTPError
from urllib.request import Request, urlopen
from deepseek_cursor_proxy.config import ProxyConfig
from deepseek_cursor_proxy.reasoning_store import ReasoningStore
from deepseek_cursor_proxy.server import (
DeepSeekProxyHandler,
DeepSeekProxyServer,
build_arg_parser,
read_response_body,
summarize_chat_payload,
)
class FakeResponse:
# ---------------------------------------------------------------------------
# Stubs for fast in-process tests of internal handler methods
# ---------------------------------------------------------------------------
class _FakeResponse:
def __init__(self, body: bytes, encoding: str = "", status: int = 200) -> None:
self._body = BytesIO(body)
self.headers = {"Content-Encoding": encoding} if encoding else {}
@ -28,7 +49,7 @@ class FakeResponse:
return self._body.read()
class FakeStreamingResponse:
class _FakeStreamingResponse:
status = 200
headers = {"Content-Type": "text/event-stream"}
@ -43,7 +64,7 @@ class FakeStreamingResponse:
return self._lines.pop(0)
class FailingStreamingResponse:
class _FailingStreamingResponse:
status = 200
headers = {"Content-Type": "text/event-stream"}
@ -51,7 +72,7 @@ class FailingStreamingResponse:
raise OSError("record layer failure")
class BrokenPipeWfile:
class _BrokenPipeWfile:
def write(self, body: bytes) -> None:
raise BrokenPipeError("test disconnect")
@ -59,10 +80,10 @@ class BrokenPipeWfile:
raise BrokenPipeError("test disconnect")
def make_proxy_handler(wfile: object) -> DeepSeekProxyHandler:
def _make_handler_stub(wfile: object, **config: object) -> DeepSeekProxyHandler:
handler = object.__new__(DeepSeekProxyHandler)
handler.server = SimpleNamespace(
config=ProxyConfig(),
config=ProxyConfig(**config),
reasoning_store=ReasoningStore(":memory:"),
)
handler.wfile = wfile
@ -73,8 +94,13 @@ def make_proxy_handler(wfile: object) -> DeepSeekProxyHandler:
return handler
class ServerTests(unittest.TestCase):
def test_cli_boolean_overrides_have_on_and_off_forms(self) -> None:
# ---------------------------------------------------------------------------
# CLI / pure helpers
# ---------------------------------------------------------------------------
class CliAndHelperTests(unittest.TestCase):
def test_cli_boolean_flags_have_on_and_off_forms(self) -> None:
args = build_arg_parser().parse_args(
[
"--no-ngrok",
@ -86,7 +112,6 @@ class ServerTests(unittest.TestCase):
"/tmp/dcp-traces",
]
)
self.assertFalse(args.ngrok)
self.assertFalse(args.verbose)
self.assertFalse(args.display_reasoning)
@ -94,19 +119,17 @@ class ServerTests(unittest.TestCase):
self.assertTrue(args.cors)
self.assertEqual(args.trace_dir, Path("/tmp/dcp-traces"))
def test_read_response_body_handles_gzip(self) -> None:
body = gzip.compress(b'{"ok":true}')
self.assertEqual(read_response_body(FakeResponse(body, "gzip")), b'{"ok":true}')
def test_read_response_body_handles_deflate(self) -> None:
body = zlib.compress(b'{"ok":true}')
def test_read_response_body_decodes_gzip_and_deflate(self) -> None:
self.assertEqual(
read_response_body(FakeResponse(body, "deflate")), b'{"ok":true}'
read_response_body(_FakeResponse(gzip.compress(b'{"ok":1}'), "gzip")),
b'{"ok":1}',
)
self.assertEqual(
read_response_body(_FakeResponse(zlib.compress(b'{"ok":1}'), "deflate")),
b'{"ok":1}',
)
def test_summarize_chat_payload_does_not_include_message_content(self) -> None:
def test_summarize_chat_payload_omits_message_content(self) -> None:
summary = summarize_chat_payload(
{
"model": "deepseek-v4-pro",
@ -116,18 +139,22 @@ class ServerTests(unittest.TestCase):
"tool_choice": "auto",
}
)
self.assertIn("model='deepseek-v4-pro'", summary)
self.assertIn("stream=True", summary)
self.assertIn("messages=1", summary)
self.assertIn("tools=1", summary)
self.assertNotIn("secret prompt", summary)
# ---------------------------------------------------------------------------
# Client-disconnect / upstream-failure stubs (no real HTTP needed)
# ---------------------------------------------------------------------------
class HandlerStubTests(unittest.TestCase):
def test_regular_response_handles_client_disconnect(self) -> None:
handler = make_proxy_handler(BrokenPipeWfile())
handler = _make_handler_stub(_BrokenPipeWfile())
body = json.dumps(
{
"id": "chatcmpl-test",
"id": "x",
"object": "chat.completion",
"model": "deepseek-v4-pro",
"choices": [
@ -139,116 +166,324 @@ class ServerTests(unittest.TestCase):
],
}
).encode("utf-8")
try:
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
sent = handler._proxy_regular_response(
FakeResponse(body),
result = handler._proxy_regular_response(
_FakeResponse(body),
"deepseek-v4-pro",
[{"role": "user", "content": "hi"}],
"cache-namespace",
"ns",
)
finally:
handler.server.reasoning_store.close()
self.assertFalse(sent.sent)
self.assertFalse(result.sent)
self.assertIn("sending upstream response body", "\n".join(captured.output))
def test_streaming_response_stops_on_client_disconnect(self) -> None:
handler = make_proxy_handler(BrokenPipeWfile())
handler = _make_handler_stub(_BrokenPipeWfile())
chunk = {
"id": "chatcmpl-stream",
"id": "stream",
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"delta": {"role": "assistant", "content": "hello"},
}
],
"choices": [{"index": 0, "delta": {"role": "assistant", "content": "hi"}}],
}
response = FakeStreamingResponse(
response = _FakeStreamingResponse(
[
f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
b"data: [DONE]\n\n",
]
)
try:
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
sent = handler._proxy_streaming_response(
result = handler._proxy_streaming_response(
response,
"deepseek-v4-pro",
[{"role": "user", "content": "hi"}],
"cache-namespace",
"ns",
)
finally:
handler.server.reasoning_store.close()
self.assertFalse(sent.sent)
self.assertFalse(result.sent)
self.assertEqual(response.readline_calls, 1)
self.assertIn("sending streaming response chunk", "\n".join(captured.output))
def test_streaming_response_handles_upstream_read_failure(self) -> None:
handler = make_proxy_handler(BytesIO())
handler = _make_handler_stub(BytesIO())
try:
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
sent = handler._proxy_streaming_response(
FailingStreamingResponse(),
result = handler._proxy_streaming_response(
_FailingStreamingResponse(),
"deepseek-v4-pro",
[{"role": "user", "content": "hi"}],
"cache-namespace",
"ns",
)
finally:
handler.server.reasoning_store.close()
self.assertFalse(sent.sent)
self.assertFalse(result.sent)
self.assertIn(
"upstream streaming response read failed",
"\n".join(captured.output),
"upstream streaming response read failed", "\n".join(captured.output)
)
def test_collapsible_reasoning_has_no_effect_when_display_is_disabled(
self,
) -> None:
def test_collapsible_reasoning_no_effect_when_display_disabled(self) -> None:
wfile = BytesIO()
handler = make_proxy_handler(wfile)
handler.server.config = ProxyConfig(
display_reasoning=False,
collapsible_reasoning=True,
handler = _make_handler_stub(
wfile, display_reasoning=False, collapsible_reasoning=True
)
chunk = {
"id": "chatcmpl-stream",
"id": "stream",
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"delta": {"reasoning_content": "Need context."},
}
],
"choices": [{"index": 0, "delta": {"reasoning_content": "Need context."}}],
}
response = FakeStreamingResponse(
response = _FakeStreamingResponse(
[
f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
b"data: [DONE]\n\n",
]
)
try:
sent = handler._proxy_streaming_response(
handler._proxy_streaming_response(
response,
"deepseek-v4-pro",
[{"role": "user", "content": "hi"}],
"cache-namespace",
"ns",
)
finally:
handler.server.reasoning_store.close()
body = wfile.getvalue().decode("utf-8")
self.assertTrue(sent.sent)
self.assertIn("reasoning_content", body)
self.assertNotIn("<details>", body)
self.assertNotIn("<think>", body)
# ---------------------------------------------------------------------------
# HTTP-level boundary tests: real proxy + tiny upstream
# ---------------------------------------------------------------------------
class _PlainFakeUpstream(BaseHTTPRequestHandler):
"""Returns a fixed plain response and records every request."""
requests: list[dict[str, object]] = []
auth_headers: list[str] = []
delay_after_done: float = 0.0
response: dict[str, object] = {}
def log_message(self, fmt: str, *args: object) -> None:
return
def do_POST(self) -> None:
length = int(self.headers.get("Content-Length") or 0)
payload = json.loads(self.rfile.read(length).decode("utf-8"))
self.__class__.requests.append(payload)
self.__class__.auth_headers.append(self.headers.get("Authorization", ""))
if payload.get("stream"):
self.send_response(200)
self.send_header("Content-Type", "text/event-stream")
self.end_headers()
self.wfile.write(
b'data: {"choices":[{"index":0,"delta":{"content":"x"}}]}\n\n'
)
self.wfile.write(b"data: [DONE]\n\n")
self.wfile.flush()
if self.__class__.delay_after_done:
time.sleep(self.__class__.delay_after_done)
return
body = json.dumps(self.__class__.response).encode("utf-8")
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
_BASE_RESPONSE: dict[str, object] = {
"id": "x",
"object": "chat.completion",
"created": 1,
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"finish_reason": "stop",
"message": {"role": "assistant", "content": "ok"},
}
],
"usage": {
"prompt_tokens": 20,
"completion_tokens": 5,
"total_tokens": 25,
"prompt_cache_hit_tokens": 12,
"prompt_cache_miss_tokens": 8,
"completion_tokens_details": {"reasoning_tokens": 3},
},
}
class _Fixture:
def __init__(self, server: ThreadingHTTPServer) -> None:
self.server = server
self.thread = threading.Thread(target=server.serve_forever, daemon=True)
self.thread.start()
@property
def url(self) -> str:
host, port = self.server.server_address
return f"http://{host}:{port}"
def close(self) -> None:
self.server.shutdown()
self.server.server_close()
self.thread.join(timeout=5)
def _post(url: str, payload: dict, api_key: str = "sk-test") -> tuple[int, dict]:
request = Request(
url,
data=json.dumps(payload).encode("utf-8"),
method="POST",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
)
try:
with urlopen(request, timeout=5) as response:
return response.status, json.loads(response.read().decode("utf-8"))
except HTTPError as exc:
return exc.code, json.loads(exc.read().decode("utf-8"))
class HttpBoundaryTests(unittest.TestCase):
"""Real-HTTP tests that don't fit the protocol suite: things the proxy
must do at the HTTP boundary regardless of what DeepSeek answers."""
def setUp(self) -> None:
_PlainFakeUpstream.requests = []
_PlainFakeUpstream.auth_headers = []
_PlainFakeUpstream.delay_after_done = 0.0
_PlainFakeUpstream.response = dict(_BASE_RESPONSE)
self.upstream = _Fixture(
ThreadingHTTPServer(("127.0.0.1", 0), _PlainFakeUpstream)
)
self.store = ReasoningStore(":memory:")
proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
proxy.config = ProxyConfig(
upstream_base_url=self.upstream.url,
upstream_model="deepseek-v4-pro",
ngrok=False,
)
proxy.reasoning_store = self.store
self.proxy = _Fixture(proxy)
def tearDown(self) -> None:
self.proxy.close()
self.upstream.close()
self.store.close()
def _request(self) -> dict:
return {
"model": "deepseek-v4-pro",
"messages": [{"role": "user", "content": "hi"}],
}
def test_rejects_missing_bearer_token(self) -> None:
request = Request(
f"{self.proxy.url}/v1/chat/completions",
data=json.dumps(self._request()).encode("utf-8"),
method="POST",
headers={"Content-Type": "application/json"},
)
with self.assertRaises(HTTPError) as caught:
urlopen(request, timeout=5)
self.assertEqual(caught.exception.code, 401)
self.assertEqual(_PlainFakeUpstream.requests, [])
def test_rejects_oversized_request_body(self) -> None:
self.proxy.server.config = replace(
self.proxy.server.config, max_request_body_bytes=10
)
status, payload = _post(
f"{self.proxy.url}/v1/chat/completions", self._request()
)
self.assertEqual(status, 413)
self.assertIn("too large", payload["error"]["message"])
self.assertEqual(_PlainFakeUpstream.requests, [])
def test_forwards_bearer_token_to_upstream(self) -> None:
status, _ = _post(
f"{self.proxy.url}/v1/chat/completions",
self._request(),
api_key="sk-from-cursor",
)
self.assertEqual(status, 200)
self.assertEqual(_PlainFakeUpstream.auth_headers[0], "Bearer sk-from-cursor")
def test_streaming_response_closes_after_done_when_upstream_lingers(
self,
) -> None:
"""Cursor relies on the proxy ending the SSE stream at [DONE], even
if the upstream socket stays open."""
_PlainFakeUpstream.delay_after_done = 2.0
request = Request(
f"{self.proxy.url}/v1/chat/completions",
data=json.dumps(
{
"model": "deepseek-v4-pro",
"stream": True,
"messages": [{"role": "user", "content": "stream"}],
}
).encode("utf-8"),
method="POST",
headers={
"Authorization": "Bearer sk-test",
"Content-Type": "application/json",
},
)
started = time.monotonic()
with urlopen(request, timeout=1) as response:
body = response.read().decode("utf-8")
self.assertLess(time.monotonic() - started, 1.0)
self.assertIn("data: [DONE]", body)
def test_normal_logging_summarizes_without_bodies_or_keys(self) -> None:
with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
status, _ = _post(
f"{self.proxy.url}/v1/chat/completions",
self._request(),
api_key="sk-from-cursor",
)
# `└ stats` is emitted on the handler thread *after* the response
# body hits the socket, so the client may return before it lands.
deadline = time.monotonic() + 2
while time.monotonic() < deadline and not any(
"└ stats" in record for record in captured.output
):
time.sleep(0.01)
output = "\n".join(captured.output)
self.assertEqual(status, 200)
# Single-line stage records keep the log readable.
for marker in ("┌ cursor", "├ context", "├ send", "└ stats"):
self.assertIn(marker, output)
self.assertNotIn("hi", output.split("┌ cursor")[1].split("\n")[0])
self.assertNotIn("sk-from-cursor", output)
def test_verbose_logging_includes_bodies_but_redacts_api_key(self) -> None:
self.proxy.server.config = replace(self.proxy.server.config, verbose=True)
with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
_post(
f"{self.proxy.url}/v1/chat/completions",
self._request(),
api_key="sk-from-cursor",
)
output = "\n".join(captured.output)
self.assertIn("cursor request body", output)
self.assertIn("upstream request body", output)
self.assertNotIn("sk-from-cursor", output)
def test_healthz_returns_ok(self) -> None:
with urlopen(f"{self.proxy.url}/healthz", timeout=2) as response:
self.assertEqual(response.status, 200)
self.assertEqual(json.loads(response.read())["ok"], True)
if __name__ == "__main__":

View File

@ -6,6 +6,7 @@ from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_s
from deepseek_cursor_proxy.streaming import (
CursorReasoningDisplayAdapter,
StreamAccumulator,
fold_reasoning_into_content,
)
@ -430,5 +431,44 @@ class CursorReasoningDisplayAdapterTests(unittest.TestCase):
self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
class FoldReasoningTests(unittest.TestCase):
def test_fold_reasoning_into_non_streaming_content(self) -> None:
"""Non-streaming responses mirror reasoning_content into a visible
<details> block, matching the streaming layout."""
payload = {
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "answer",
"reasoning_content": "thinking",
},
}
]
}
fold_reasoning_into_content(payload, collapsible=True)
self.assertEqual(
payload["choices"][0]["message"]["content"],
"<details>\n<summary>Thinking</summary>\n\nthinking\n</details>\n\nanswer",
)
def test_fold_reasoning_skips_empty_reasoning(self) -> None:
payload = {
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "answer",
"reasoning_content": "",
},
}
]
}
fold_reasoning_into_content(payload, collapsible=True)
self.assertEqual(payload["choices"][0]["message"]["content"], "answer")
if __name__ == "__main__":
unittest.main()

View File

@ -1,14 +1,25 @@
"""Trace writer tests, both as a unit (writes/redacts files) and integrated
through the proxy (captures real request flow on disk)."""
from __future__ import annotations
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
import json
from pathlib import Path
import stat
import threading
from tempfile import TemporaryDirectory
import time
import unittest
from urllib.request import Request, urlopen
from deepseek_cursor_proxy.config import ProxyConfig
from deepseek_cursor_proxy.reasoning_store import ReasoningStore
from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
from deepseek_cursor_proxy.trace import TraceWriter
class TraceWriterTests(unittest.TestCase):
class TraceWriterUnitTests(unittest.TestCase):
def test_writes_manifest_and_numbered_request_files(self) -> None:
with TemporaryDirectory() as temp_dir:
writer = TraceWriter(temp_dir)
@ -47,17 +58,244 @@ class TraceWriterTests(unittest.TestCase):
headers={"Authorization": "Bearer sk-secret"},
)
trace.finish("completed", http_status=200)
payload = json.loads(trace.path.read_text(encoding="utf-8"))
serialized = json.dumps(payload)
serialized = trace.path.read_text(encoding="utf-8")
self.assertNotIn("sk-secret", serialized)
payload = json.loads(serialized)
self.assertEqual(
payload["request"]["headers"]["Authorization"]["present"],
True,
payload["request"]["headers"]["Authorization"]["present"], True
)
self.assertIn("sha256", payload["request"]["headers"]["Authorization"])
# ---------------------------------------------------------------------------
# Integration: trace writer attached to a running proxy.
# ---------------------------------------------------------------------------
class _CannedUpstream(BaseHTTPRequestHandler):
"""Returns a tool-call response for the first POST and a streamed
reasoning response for the second."""
requests: list[dict[str, object]] = []
def log_message(self, fmt: str, *args: object) -> None:
return
def do_POST(self) -> None:
length = int(self.headers.get("Content-Length") or 0)
payload = json.loads(self.rfile.read(length).decode("utf-8"))
self.__class__.requests.append(payload)
if payload.get("stream"):
self.send_response(200)
self.send_header("Content-Type", "text/event-stream")
self.end_headers()
self.wfile.write(
b'data: {"id":"s","object":"chat.completion.chunk","choices":'
b'[{"index":0,"delta":{"role":"assistant","reasoning_content":"think"},'
b'"finish_reason":null}]}\n\n'
)
self.wfile.write(
b'data: {"id":"s","object":"chat.completion.chunk","choices":'
b'[{"index":0,"delta":{"content":"answer"},"finish_reason":null}],'
b'"usage":{"completion_tokens_details":{"reasoning_tokens":1}}}\n\n'
)
self.wfile.write(
b'data: {"id":"s","object":"chat.completion.chunk",'
b'"choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n'
)
self.wfile.write(b"data: [DONE]\n\n")
self.wfile.flush()
return
body = json.dumps(
{
"id": "tool",
"object": "chat.completion",
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"finish_reason": "tool_calls",
"message": {
"role": "assistant",
"content": "",
"reasoning_content": "I need the date.",
"tool_calls": [
{
"id": "call_date",
"type": "function",
"function": {
"name": "get_date",
"arguments": "{}",
},
}
],
},
}
],
}
).encode("utf-8")
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)
class _Fixture:
def __init__(self, server: ThreadingHTTPServer) -> None:
self.server = server
self.thread = threading.Thread(target=server.serve_forever, daemon=True)
self.thread.start()
@property
def url(self) -> str:
host, port = self.server.server_address
return f"http://{host}:{port}"
def close(self) -> None:
self.server.shutdown()
self.server.server_close()
self.thread.join(timeout=5)
def _read_single_trace(session_dir: Path) -> dict:
deadline = time.monotonic() + 2
files = sorted(session_dir.glob("request-*.json"))
while not files and time.monotonic() < deadline:
time.sleep(0.01)
files = sorted(session_dir.glob("request-*.json"))
if len(files) != 1:
raise AssertionError(f"expected one trace, found {files}")
return json.loads(files[0].read_text(encoding="utf-8"))
class TraceIntegrationTests(unittest.TestCase):
def setUp(self) -> None:
_CannedUpstream.requests = []
self.upstream = _Fixture(ThreadingHTTPServer(("127.0.0.1", 0), _CannedUpstream))
self.store = ReasoningStore(":memory:")
self.temp_dir = TemporaryDirectory()
self.writer = TraceWriter(self.temp_dir.name)
proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
proxy.config = ProxyConfig(
upstream_base_url=self.upstream.url,
upstream_model="deepseek-v4-pro",
ngrok=False,
)
proxy.reasoning_store = self.store
proxy.trace_writer = self.writer
self.proxy = _Fixture(proxy)
def tearDown(self) -> None:
self.proxy.close()
self.upstream.close()
self.store.close()
self.temp_dir.cleanup()
def _post(self, payload: dict) -> dict:
request = Request(
f"{self.proxy.url}/v1/chat/completions",
data=json.dumps(payload).encode("utf-8"),
method="POST",
headers={
"Authorization": "Bearer sk-from-cursor",
"Content-Type": "application/json",
},
)
with urlopen(request, timeout=5) as response:
return json.loads(response.read())
def test_captures_non_streaming_replay_without_api_key(self) -> None:
self._post(
{
"model": "deepseek-v4-pro",
"messages": [{"role": "user", "content": "What is tomorrow's date?"}],
}
)
trace = _read_single_trace(self.writer.session_dir)
serialized = json.dumps(trace)
self.assertEqual(trace["completion"]["status"], "completed")
self.assertEqual(
trace["request"]["body"]["messages"][0]["content"],
"What is tomorrow's date?",
)
self.assertEqual(
trace["upstream"]["response"]["body"]["json"]["choices"][0]["message"][
"reasoning_content"
],
"I need the date.",
)
self.assertNotIn("sk-from-cursor", serialized)
def test_captures_streaming_replay_chunks(self) -> None:
request = Request(
f"{self.proxy.url}/v1/chat/completions",
data=json.dumps(
{
"model": "deepseek-v4-pro",
"stream": True,
"messages": [{"role": "user", "content": "stream"}],
}
).encode("utf-8"),
method="POST",
headers={
"Authorization": "Bearer sk-test",
"Content-Type": "application/json",
},
)
with urlopen(request, timeout=2) as response:
response.read()
trace = _read_single_trace(self.writer.session_dir)
self.assertEqual(trace["completion"]["status"], "completed")
self.assertIn(
"reasoning_content",
trace["upstream"]["stream"]["chunks"][0]["line"],
)
self.assertIn(
"<details>", trace["cursor_response"]["stream"]["chunks"][0]["line"]
)
def test_captures_recovery_diagnostics(self) -> None:
"""A request that triggers cold-cache recovery records the recovery
steps + diagnostic counters in the trace."""
self._post(
{
"model": "deepseek-v4-pro",
"messages": [
{"role": "user", "content": "old"},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": "call_x",
"type": "function",
"function": {"name": "f", "arguments": "{}"},
}
],
},
{"role": "tool", "tool_call_id": "call_x", "content": "result"},
{"role": "user", "content": "new"},
],
}
)
trace = _read_single_trace(self.writer.session_dir)
self.assertEqual(
trace["transform"]["recovery_steps"][0]["strategy"], "latest_user"
)
self.assertGreaterEqual(
len(
[
item
for item in trace["transform"]["reasoning_diagnostics"]
if item["missing"]
]
),
1,
)
if __name__ == "__main__":
unittest.main()

File diff suppressed because it is too large Load Diff