refactor(proxy): audit thinking-mode protocol and refactor test suite (#33)
parent
b65f0dd8a2
commit
be0310751c
|
|
@ -1,3 +1,6 @@
|
||||||
|
# AIs
|
||||||
|
.claude/
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[codz]
|
*.py[codz]
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@ Select `deepseek-v4-pro` in Cursor and use chat or agent mode as usual.
|
||||||
|
|
||||||
## How It Works
|
## How It Works
|
||||||
|
|
||||||
- **Core fix:** DeepSeek's [thinking mode](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) requires `reasoning_content` from assistant tool-call messages to be passed back in subsequent requests, but Cursor omits this field, causing a 400 error. The proxy (`Cursor → ngrok → proxy → DeepSeek API`) stores `reasoning_content` from every DeepSeek response in a local SQLite cache, keyed by message signature, tool-call ID, and tool-call function signature, and patches outgoing requests with missing `reasoning_content` before they reach DeepSeek. On a cold cache (proxy restart, model switch), it logs and drops unrecoverable history, continues from the latest user request, and prefixes the next Cursor response with a notice.
|
- **Core fix:** DeepSeek [thinking-mode tool calls](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) require the complete **multi-round** `reasoning_content` chain to be sent back in later requests. Cursor omits that field, causing a 400 error. The proxy (`Cursor -> ngrok -> proxy -> DeepSeek API`) stores DeepSeek's original `reasoning_content` and patches missing blocks back into outgoing tool-call history.
|
||||||
- **Multi-conversation isolation:** To avoid collisions across concurrent conversations, the proxy scopes cache keys by a SHA-256 hash of the canonical conversation prefix (roles, content, and tool calls, excluding `reasoning_content`) plus the upstream model, configuration, and an API-key hash. Different threads get different scopes, so reused tool-call IDs do not collide. Byte-identical cloned histories produce identical scopes.
|
- **Multi-conversation isolation:** To avoid collisions across concurrent conversations, the proxy scopes cache keys by a SHA-256 hash of the canonical conversation prefix (roles, content, and tool calls, excluding `reasoning_content`) plus the upstream model, configuration, and an API-key hash. Different threads get different scopes, so reused tool-call IDs do not collide. Byte-identical cloned histories produce identical scopes.
|
||||||
- **Context caching compatibility:** The proxy preserves compatibility by never injecting synthetic thread IDs, timestamps, or cache-control messages. It restores `reasoning_content` as the exact original string, so repeated prefixes remain intact for [DeepSeek context cache](https://api-docs.deepseek.com/guides/kv_cache). Cache hit rates are logged in the terminal output.
|
- **Context caching compatibility:** The proxy preserves compatibility by never injecting synthetic thread IDs, timestamps, or cache-control messages. It restores `reasoning_content` as the exact original string, so repeated prefixes remain intact for [DeepSeek context cache](https://api-docs.deepseek.com/guides/kv_cache). Cache hit rates are logged in the terminal output.
|
||||||
- **Additional compatibility fixes:** Beyond reasoning repair, the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases, strips mirrored thinking display blocks from assistant content, flattens multi-part content arrays to plain text, and mirrors `reasoning_content` into Cursor-visible Markdown details blocks.
|
- **Additional compatibility fixes:** Beyond reasoning repair, the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases, strips mirrored thinking display blocks from assistant content, flattens multi-part content arrays to plain text, and mirrors `reasoning_content` into Cursor-visible Markdown details blocks.
|
||||||
|
|
|
||||||
|
|
@ -172,8 +172,6 @@ def settings_from_config(
|
||||||
|
|
||||||
def normalize_thinking(value: Any) -> str:
|
def normalize_thinking(value: Any) -> str:
|
||||||
thinking = as_str(value, DEFAULT_THINKING).strip().lower()
|
thinking = as_str(value, DEFAULT_THINKING).strip().lower()
|
||||||
if thinking in {"passthrough", "pass-through", "pass_through"}:
|
|
||||||
return "pass-through"
|
|
||||||
if thinking in {"enabled", "disabled"}:
|
if thinking in {"enabled", "disabled"}:
|
||||||
return thinking
|
return thinking
|
||||||
return DEFAULT_THINKING
|
return DEFAULT_THINKING
|
||||||
|
|
|
||||||
|
|
@ -540,6 +540,8 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
|
||||||
scope=record_response_scope,
|
scope=record_response_scope,
|
||||||
prior_messages=record_response_messages,
|
prior_messages=record_response_messages,
|
||||||
recording_contexts=record_response_contexts,
|
recording_contexts=record_response_contexts,
|
||||||
|
display_reasoning=self.config.display_reasoning,
|
||||||
|
collapsible_reasoning=self.config.collapsible_reasoning,
|
||||||
)
|
)
|
||||||
except (json.JSONDecodeError, UnicodeDecodeError) as exc:
|
except (json.JSONDecodeError, UnicodeDecodeError) as exc:
|
||||||
LOG.warning("failed to rewrite upstream JSON response: %s", exc)
|
LOG.warning("failed to rewrite upstream JSON response: %s", exc)
|
||||||
|
|
@ -812,7 +814,7 @@ def build_arg_parser() -> argparse.ArgumentParser:
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--thinking",
|
"--thinking",
|
||||||
choices=["enabled", "disabled", "pass-through"],
|
choices=["enabled", "disabled"],
|
||||||
help="DeepSeek thinking mode, default from config or enabled",
|
help="DeepSeek thinking mode, default from config or enabled",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|
|
||||||
|
|
@ -292,3 +292,34 @@ class CursorReasoningDisplayAdapter:
|
||||||
}
|
}
|
||||||
if metadata:
|
if metadata:
|
||||||
self._last_chunk_metadata.update(metadata)
|
self._last_chunk_metadata.update(metadata)
|
||||||
|
|
||||||
|
|
||||||
|
def fold_reasoning_into_content(
|
||||||
|
response_payload: dict[str, Any],
|
||||||
|
collapsible: bool,
|
||||||
|
) -> None:
|
||||||
|
"""Mirror `reasoning_content` into the visible `content` field for
|
||||||
|
non-streaming responses, matching the streaming `<details>` layout."""
|
||||||
|
block_start = (
|
||||||
|
COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
|
||||||
|
)
|
||||||
|
block_end = COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
|
||||||
|
choices = response_payload.get("choices")
|
||||||
|
if not isinstance(choices, list):
|
||||||
|
return
|
||||||
|
for choice in choices:
|
||||||
|
if not isinstance(choice, dict):
|
||||||
|
continue
|
||||||
|
message = choice.get("message")
|
||||||
|
if not isinstance(message, dict):
|
||||||
|
continue
|
||||||
|
reasoning = message.get("reasoning_content")
|
||||||
|
if not isinstance(reasoning, str) or not reasoning:
|
||||||
|
continue
|
||||||
|
content = message.get("content")
|
||||||
|
message["content"] = (
|
||||||
|
block_start
|
||||||
|
+ reasoning
|
||||||
|
+ block_end
|
||||||
|
+ (content if isinstance(content, str) else "")
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import re
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
|
@ -15,6 +16,10 @@ from .reasoning_store import (
|
||||||
tool_call_signature,
|
tool_call_signature,
|
||||||
turn_context_signature,
|
turn_context_signature,
|
||||||
)
|
)
|
||||||
|
from .streaming import fold_reasoning_into_content
|
||||||
|
|
||||||
|
|
||||||
|
LOG = logging.getLogger("deepseek_cursor_proxy")
|
||||||
|
|
||||||
|
|
||||||
SUPPORTED_REQUEST_FIELDS = {
|
SUPPORTED_REQUEST_FIELDS = {
|
||||||
|
|
@ -35,6 +40,13 @@ SUPPORTED_REQUEST_FIELDS = {
|
||||||
"frequency_penalty",
|
"frequency_penalty",
|
||||||
"logprobs",
|
"logprobs",
|
||||||
"top_logprobs",
|
"top_logprobs",
|
||||||
|
# Standard OpenAI Chat Completions fields that DeepSeek either honors or
|
||||||
|
# safely ignores. Cursor and most OpenAI SDKs send these unconditionally,
|
||||||
|
# so forwarding keeps clients happy and avoids log spam.
|
||||||
|
"user",
|
||||||
|
"seed",
|
||||||
|
"n",
|
||||||
|
"logit_bias",
|
||||||
}
|
}
|
||||||
|
|
||||||
MESSAGE_FIELDS = {
|
MESSAGE_FIELDS = {
|
||||||
|
|
@ -83,10 +95,6 @@ CURSOR_THINKING_BLOCK_RE = re.compile(
|
||||||
)
|
)
|
||||||
|
|
||||||
RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history."
|
RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history."
|
||||||
LEGACY_RECOVERY_NOTICE_TEXT = (
|
|
||||||
"Note: recovered this DeepSeek chat because older tool-call reasoning "
|
|
||||||
"was unavailable; continuing with recent context only."
|
|
||||||
)
|
|
||||||
RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n"
|
RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n"
|
||||||
RECOVERY_SYSTEM_CONTENT = (
|
RECOVERY_SYSTEM_CONTENT = (
|
||||||
"deepseek-cursor-proxy recovered this request because older DeepSeek "
|
"deepseek-cursor-proxy recovered this request because older DeepSeek "
|
||||||
|
|
@ -460,10 +468,33 @@ def has_recovery_notice(message: dict[str, Any]) -> bool:
|
||||||
return (
|
return (
|
||||||
message.get("role") == "assistant"
|
message.get("role") == "assistant"
|
||||||
and isinstance(content, str)
|
and isinstance(content, str)
|
||||||
and content.startswith((RECOVERY_NOTICE_TEXT, LEGACY_RECOVERY_NOTICE_TEXT))
|
and content.startswith(RECOVERY_NOTICE_TEXT)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def strip_recovery_notice_for_upstream(
|
||||||
|
messages: list[dict[str, Any]],
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Cursor echoes the proxy's recovery notice back to us in later turns.
|
||||||
|
The notice serves as a boundary marker for the proxy, but DeepSeek must
|
||||||
|
not see proxy-generated prose. Return a copy with assistant prefixes
|
||||||
|
stripped; leave the input untouched so cache scopes/recording contexts
|
||||||
|
keep matching the with-prefix history that Cursor will send next time."""
|
||||||
|
stripped: list[dict[str, Any]] = []
|
||||||
|
for message in messages:
|
||||||
|
if message.get("role") != "assistant":
|
||||||
|
stripped.append(message)
|
||||||
|
continue
|
||||||
|
content = message.get("content")
|
||||||
|
if not isinstance(content, str) or not content.startswith(RECOVERY_NOTICE_TEXT):
|
||||||
|
stripped.append(message)
|
||||||
|
continue
|
||||||
|
cleaned = dict(message)
|
||||||
|
cleaned["content"] = content[len(RECOVERY_NOTICE_TEXT) :].lstrip("\r\n")
|
||||||
|
stripped.append(cleaned)
|
||||||
|
return stripped
|
||||||
|
|
||||||
|
|
||||||
def leading_system_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
def leading_system_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
leading_messages: list[dict[str, Any]] = []
|
leading_messages: list[dict[str, Any]] = []
|
||||||
for message in messages:
|
for message in messages:
|
||||||
|
|
@ -628,6 +659,11 @@ def assistant_needs_reasoning_for_tool_context(
|
||||||
def upstream_model_for(original_model: str, config: ProxyConfig) -> str:
|
def upstream_model_for(original_model: str, config: ProxyConfig) -> str:
|
||||||
if original_model.startswith("deepseek-"):
|
if original_model.startswith("deepseek-"):
|
||||||
return original_model
|
return original_model
|
||||||
|
LOG.warning(
|
||||||
|
"rewriting non-DeepSeek model %r to configured fallback %r",
|
||||||
|
original_model,
|
||||||
|
config.upstream_model,
|
||||||
|
)
|
||||||
return config.upstream_model
|
return config.upstream_model
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -688,6 +724,16 @@ def prepare_upstream_request(
|
||||||
prepared = {
|
prepared = {
|
||||||
key: value for key, value in payload.items() if key in SUPPORTED_REQUEST_FIELDS
|
key: value for key, value in payload.items() if key in SUPPORTED_REQUEST_FIELDS
|
||||||
}
|
}
|
||||||
|
dropped_fields = sorted(
|
||||||
|
key
|
||||||
|
for key in payload.keys()
|
||||||
|
if key not in SUPPORTED_REQUEST_FIELDS
|
||||||
|
and key not in {"max_completion_tokens", "functions", "function_call"}
|
||||||
|
)
|
||||||
|
if dropped_fields:
|
||||||
|
LOG.warning(
|
||||||
|
"dropping unsupported request field(s): %s", ", ".join(dropped_fields)
|
||||||
|
)
|
||||||
if "max_tokens" not in prepared and "max_completion_tokens" in payload:
|
if "max_tokens" not in prepared and "max_completion_tokens" in payload:
|
||||||
prepared["max_tokens"] = payload["max_completion_tokens"]
|
prepared["max_tokens"] = payload["max_completion_tokens"]
|
||||||
|
|
||||||
|
|
@ -719,14 +765,9 @@ def prepare_upstream_request(
|
||||||
if tool_choice is not None:
|
if tool_choice is not None:
|
||||||
prepared["tool_choice"] = tool_choice
|
prepared["tool_choice"] = tool_choice
|
||||||
|
|
||||||
if config.thinking != "pass-through":
|
|
||||||
prepared["thinking"] = {"type": config.thinking}
|
prepared["thinking"] = {"type": config.thinking}
|
||||||
|
thinking_enabled = config.thinking == "enabled"
|
||||||
thinking = prepared.get("thinking")
|
thinking_disabled = config.thinking == "disabled"
|
||||||
thinking_enabled = isinstance(thinking, dict) and thinking.get("type") == "enabled"
|
|
||||||
thinking_disabled = (
|
|
||||||
isinstance(thinking, dict) and thinking.get("type") == "disabled"
|
|
||||||
)
|
|
||||||
if thinking_enabled:
|
if thinking_enabled:
|
||||||
prepared["reasoning_effort"] = normalize_reasoning_effort(
|
prepared["reasoning_effort"] = normalize_reasoning_effort(
|
||||||
prepared.get("reasoning_effort") or config.reasoning_effort
|
prepared.get("reasoning_effort") or config.reasoning_effort
|
||||||
|
|
@ -797,12 +838,12 @@ def prepare_upstream_request(
|
||||||
keep_reasoning=not thinking_disabled,
|
keep_reasoning=not thinking_disabled,
|
||||||
)
|
)
|
||||||
reasoning_diagnostics.extend(latest_diagnostics)
|
reasoning_diagnostics.extend(latest_diagnostics)
|
||||||
prepared["messages"] = messages
|
|
||||||
active_record_response_scope = conversation_scope(messages, cache_namespace)
|
active_record_response_scope = conversation_scope(messages, cache_namespace)
|
||||||
record_response_contexts = response_recording_contexts(
|
record_response_contexts = response_recording_contexts(
|
||||||
(record_response_scope, record_response_messages),
|
(record_response_scope, record_response_messages),
|
||||||
(active_record_response_scope, messages),
|
(active_record_response_scope, messages),
|
||||||
)
|
)
|
||||||
|
prepared["messages"] = strip_recovery_notice_for_upstream(messages)
|
||||||
|
|
||||||
return PreparedRequest(
|
return PreparedRequest(
|
||||||
payload=prepared,
|
payload=prepared,
|
||||||
|
|
@ -874,6 +915,8 @@ def rewrite_response_body(
|
||||||
scope: str | None = None,
|
scope: str | None = None,
|
||||||
prior_messages: list[dict[str, Any]] | None = None,
|
prior_messages: list[dict[str, Any]] | None = None,
|
||||||
recording_contexts: list[tuple[str, list[dict[str, Any]]]] | None = None,
|
recording_contexts: list[tuple[str, list[dict[str, Any]]]] | None = None,
|
||||||
|
display_reasoning: bool = False,
|
||||||
|
collapsible_reasoning: bool = True,
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
response_payload = json.loads(body.decode("utf-8"))
|
response_payload = json.loads(body.decode("utf-8"))
|
||||||
if isinstance(response_payload, dict):
|
if isinstance(response_payload, dict):
|
||||||
|
|
@ -888,6 +931,8 @@ def rewrite_response_body(
|
||||||
prior_messages=prior_messages,
|
prior_messages=prior_messages,
|
||||||
recording_contexts=recording_contexts,
|
recording_contexts=recording_contexts,
|
||||||
)
|
)
|
||||||
|
if display_reasoning:
|
||||||
|
fold_reasoning_into_content(response_payload, collapsible_reasoning)
|
||||||
if "model" in response_payload:
|
if "model" in response_payload:
|
||||||
response_payload["model"] = original_model
|
response_payload["model"] = original_model
|
||||||
return json.dumps(
|
return json.dumps(
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,7 @@ class ConfigTests(unittest.TestCase):
|
||||||
[
|
[
|
||||||
"base_url: https://example.com/v1/",
|
"base_url: https://example.com/v1/",
|
||||||
"model: deepseek-v4-flash",
|
"model: deepseek-v4-flash",
|
||||||
"thinking: pass_through",
|
"thinking: disabled",
|
||||||
"reasoning_effort: max",
|
"reasoning_effort: max",
|
||||||
"port: 9100",
|
"port: 9100",
|
||||||
"host: 0.0.0.0",
|
"host: 0.0.0.0",
|
||||||
|
|
@ -145,7 +145,7 @@ class ConfigTests(unittest.TestCase):
|
||||||
|
|
||||||
self.assertEqual(config.upstream_base_url, "https://example.com/v1")
|
self.assertEqual(config.upstream_base_url, "https://example.com/v1")
|
||||||
self.assertEqual(config.upstream_model, "deepseek-v4-flash")
|
self.assertEqual(config.upstream_model, "deepseek-v4-flash")
|
||||||
self.assertEqual(config.thinking, "pass-through")
|
self.assertEqual(config.thinking, "disabled")
|
||||||
self.assertEqual(config.reasoning_effort, "max")
|
self.assertEqual(config.reasoning_effort, "max")
|
||||||
self.assertEqual(config.host, "0.0.0.0")
|
self.assertEqual(config.host, "0.0.0.0")
|
||||||
self.assertEqual(config.port, 9100)
|
self.assertEqual(config.port, 9100)
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -1,24 +1,45 @@
|
||||||
|
"""Server boundary, CLI, and operational tests.
|
||||||
|
|
||||||
|
Pure helper tests (gzip, summarize) and stub-handler tests (client
|
||||||
|
disconnect) live near the top. The bottom of the file boots a real proxy +
|
||||||
|
tiny upstream to exercise things that need the HTTP layer: bearer token
|
||||||
|
forwarding, oversized body, missing-bearer rejection, logging modes, and
|
||||||
|
streaming connection close.
|
||||||
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import replace
|
||||||
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import gzip
|
import gzip
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
import unittest
|
import unittest
|
||||||
import zlib
|
import zlib
|
||||||
|
from urllib.error import HTTPError
|
||||||
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
from deepseek_cursor_proxy.config import ProxyConfig
|
from deepseek_cursor_proxy.config import ProxyConfig
|
||||||
from deepseek_cursor_proxy.reasoning_store import ReasoningStore
|
from deepseek_cursor_proxy.reasoning_store import ReasoningStore
|
||||||
from deepseek_cursor_proxy.server import (
|
from deepseek_cursor_proxy.server import (
|
||||||
DeepSeekProxyHandler,
|
DeepSeekProxyHandler,
|
||||||
|
DeepSeekProxyServer,
|
||||||
build_arg_parser,
|
build_arg_parser,
|
||||||
read_response_body,
|
read_response_body,
|
||||||
summarize_chat_payload,
|
summarize_chat_payload,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class FakeResponse:
|
# ---------------------------------------------------------------------------
|
||||||
|
# Stubs for fast in-process tests of internal handler methods
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class _FakeResponse:
|
||||||
def __init__(self, body: bytes, encoding: str = "", status: int = 200) -> None:
|
def __init__(self, body: bytes, encoding: str = "", status: int = 200) -> None:
|
||||||
self._body = BytesIO(body)
|
self._body = BytesIO(body)
|
||||||
self.headers = {"Content-Encoding": encoding} if encoding else {}
|
self.headers = {"Content-Encoding": encoding} if encoding else {}
|
||||||
|
|
@ -28,7 +49,7 @@ class FakeResponse:
|
||||||
return self._body.read()
|
return self._body.read()
|
||||||
|
|
||||||
|
|
||||||
class FakeStreamingResponse:
|
class _FakeStreamingResponse:
|
||||||
status = 200
|
status = 200
|
||||||
headers = {"Content-Type": "text/event-stream"}
|
headers = {"Content-Type": "text/event-stream"}
|
||||||
|
|
||||||
|
|
@ -43,7 +64,7 @@ class FakeStreamingResponse:
|
||||||
return self._lines.pop(0)
|
return self._lines.pop(0)
|
||||||
|
|
||||||
|
|
||||||
class FailingStreamingResponse:
|
class _FailingStreamingResponse:
|
||||||
status = 200
|
status = 200
|
||||||
headers = {"Content-Type": "text/event-stream"}
|
headers = {"Content-Type": "text/event-stream"}
|
||||||
|
|
||||||
|
|
@ -51,7 +72,7 @@ class FailingStreamingResponse:
|
||||||
raise OSError("record layer failure")
|
raise OSError("record layer failure")
|
||||||
|
|
||||||
|
|
||||||
class BrokenPipeWfile:
|
class _BrokenPipeWfile:
|
||||||
def write(self, body: bytes) -> None:
|
def write(self, body: bytes) -> None:
|
||||||
raise BrokenPipeError("test disconnect")
|
raise BrokenPipeError("test disconnect")
|
||||||
|
|
||||||
|
|
@ -59,10 +80,10 @@ class BrokenPipeWfile:
|
||||||
raise BrokenPipeError("test disconnect")
|
raise BrokenPipeError("test disconnect")
|
||||||
|
|
||||||
|
|
||||||
def make_proxy_handler(wfile: object) -> DeepSeekProxyHandler:
|
def _make_handler_stub(wfile: object, **config: object) -> DeepSeekProxyHandler:
|
||||||
handler = object.__new__(DeepSeekProxyHandler)
|
handler = object.__new__(DeepSeekProxyHandler)
|
||||||
handler.server = SimpleNamespace(
|
handler.server = SimpleNamespace(
|
||||||
config=ProxyConfig(),
|
config=ProxyConfig(**config),
|
||||||
reasoning_store=ReasoningStore(":memory:"),
|
reasoning_store=ReasoningStore(":memory:"),
|
||||||
)
|
)
|
||||||
handler.wfile = wfile
|
handler.wfile = wfile
|
||||||
|
|
@ -73,8 +94,13 @@ def make_proxy_handler(wfile: object) -> DeepSeekProxyHandler:
|
||||||
return handler
|
return handler
|
||||||
|
|
||||||
|
|
||||||
class ServerTests(unittest.TestCase):
|
# ---------------------------------------------------------------------------
|
||||||
def test_cli_boolean_overrides_have_on_and_off_forms(self) -> None:
|
# CLI / pure helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class CliAndHelperTests(unittest.TestCase):
|
||||||
|
def test_cli_boolean_flags_have_on_and_off_forms(self) -> None:
|
||||||
args = build_arg_parser().parse_args(
|
args = build_arg_parser().parse_args(
|
||||||
[
|
[
|
||||||
"--no-ngrok",
|
"--no-ngrok",
|
||||||
|
|
@ -86,7 +112,6 @@ class ServerTests(unittest.TestCase):
|
||||||
"/tmp/dcp-traces",
|
"/tmp/dcp-traces",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertFalse(args.ngrok)
|
self.assertFalse(args.ngrok)
|
||||||
self.assertFalse(args.verbose)
|
self.assertFalse(args.verbose)
|
||||||
self.assertFalse(args.display_reasoning)
|
self.assertFalse(args.display_reasoning)
|
||||||
|
|
@ -94,19 +119,17 @@ class ServerTests(unittest.TestCase):
|
||||||
self.assertTrue(args.cors)
|
self.assertTrue(args.cors)
|
||||||
self.assertEqual(args.trace_dir, Path("/tmp/dcp-traces"))
|
self.assertEqual(args.trace_dir, Path("/tmp/dcp-traces"))
|
||||||
|
|
||||||
def test_read_response_body_handles_gzip(self) -> None:
|
def test_read_response_body_decodes_gzip_and_deflate(self) -> None:
|
||||||
body = gzip.compress(b'{"ok":true}')
|
|
||||||
|
|
||||||
self.assertEqual(read_response_body(FakeResponse(body, "gzip")), b'{"ok":true}')
|
|
||||||
|
|
||||||
def test_read_response_body_handles_deflate(self) -> None:
|
|
||||||
body = zlib.compress(b'{"ok":true}')
|
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
read_response_body(FakeResponse(body, "deflate")), b'{"ok":true}'
|
read_response_body(_FakeResponse(gzip.compress(b'{"ok":1}'), "gzip")),
|
||||||
|
b'{"ok":1}',
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
read_response_body(_FakeResponse(zlib.compress(b'{"ok":1}'), "deflate")),
|
||||||
|
b'{"ok":1}',
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_summarize_chat_payload_does_not_include_message_content(self) -> None:
|
def test_summarize_chat_payload_omits_message_content(self) -> None:
|
||||||
summary = summarize_chat_payload(
|
summary = summarize_chat_payload(
|
||||||
{
|
{
|
||||||
"model": "deepseek-v4-pro",
|
"model": "deepseek-v4-pro",
|
||||||
|
|
@ -116,18 +139,22 @@ class ServerTests(unittest.TestCase):
|
||||||
"tool_choice": "auto",
|
"tool_choice": "auto",
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn("model='deepseek-v4-pro'", summary)
|
self.assertIn("model='deepseek-v4-pro'", summary)
|
||||||
self.assertIn("stream=True", summary)
|
|
||||||
self.assertIn("messages=1", summary)
|
self.assertIn("messages=1", summary)
|
||||||
self.assertIn("tools=1", summary)
|
|
||||||
self.assertNotIn("secret prompt", summary)
|
self.assertNotIn("secret prompt", summary)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Client-disconnect / upstream-failure stubs (no real HTTP needed)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class HandlerStubTests(unittest.TestCase):
|
||||||
def test_regular_response_handles_client_disconnect(self) -> None:
|
def test_regular_response_handles_client_disconnect(self) -> None:
|
||||||
handler = make_proxy_handler(BrokenPipeWfile())
|
handler = _make_handler_stub(_BrokenPipeWfile())
|
||||||
body = json.dumps(
|
body = json.dumps(
|
||||||
{
|
{
|
||||||
"id": "chatcmpl-test",
|
"id": "x",
|
||||||
"object": "chat.completion",
|
"object": "chat.completion",
|
||||||
"model": "deepseek-v4-pro",
|
"model": "deepseek-v4-pro",
|
||||||
"choices": [
|
"choices": [
|
||||||
|
|
@ -139,116 +166,324 @@ class ServerTests(unittest.TestCase):
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
).encode("utf-8")
|
).encode("utf-8")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
|
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
|
||||||
sent = handler._proxy_regular_response(
|
result = handler._proxy_regular_response(
|
||||||
FakeResponse(body),
|
_FakeResponse(body),
|
||||||
"deepseek-v4-pro",
|
"deepseek-v4-pro",
|
||||||
[{"role": "user", "content": "hi"}],
|
[{"role": "user", "content": "hi"}],
|
||||||
"cache-namespace",
|
"ns",
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
handler.server.reasoning_store.close()
|
handler.server.reasoning_store.close()
|
||||||
|
self.assertFalse(result.sent)
|
||||||
self.assertFalse(sent.sent)
|
|
||||||
self.assertIn("sending upstream response body", "\n".join(captured.output))
|
self.assertIn("sending upstream response body", "\n".join(captured.output))
|
||||||
|
|
||||||
def test_streaming_response_stops_on_client_disconnect(self) -> None:
|
def test_streaming_response_stops_on_client_disconnect(self) -> None:
|
||||||
handler = make_proxy_handler(BrokenPipeWfile())
|
handler = _make_handler_stub(_BrokenPipeWfile())
|
||||||
chunk = {
|
chunk = {
|
||||||
"id": "chatcmpl-stream",
|
"id": "stream",
|
||||||
"model": "deepseek-v4-pro",
|
"model": "deepseek-v4-pro",
|
||||||
"choices": [
|
"choices": [{"index": 0, "delta": {"role": "assistant", "content": "hi"}}],
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {"role": "assistant", "content": "hello"},
|
|
||||||
}
|
}
|
||||||
],
|
response = _FakeStreamingResponse(
|
||||||
}
|
|
||||||
response = FakeStreamingResponse(
|
|
||||||
[
|
[
|
||||||
f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
|
f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
|
||||||
b"data: [DONE]\n\n",
|
b"data: [DONE]\n\n",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
|
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
|
||||||
sent = handler._proxy_streaming_response(
|
result = handler._proxy_streaming_response(
|
||||||
response,
|
response,
|
||||||
"deepseek-v4-pro",
|
"deepseek-v4-pro",
|
||||||
[{"role": "user", "content": "hi"}],
|
[{"role": "user", "content": "hi"}],
|
||||||
"cache-namespace",
|
"ns",
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
handler.server.reasoning_store.close()
|
handler.server.reasoning_store.close()
|
||||||
|
self.assertFalse(result.sent)
|
||||||
self.assertFalse(sent.sent)
|
|
||||||
self.assertEqual(response.readline_calls, 1)
|
self.assertEqual(response.readline_calls, 1)
|
||||||
self.assertIn("sending streaming response chunk", "\n".join(captured.output))
|
self.assertIn("sending streaming response chunk", "\n".join(captured.output))
|
||||||
|
|
||||||
def test_streaming_response_handles_upstream_read_failure(self) -> None:
|
def test_streaming_response_handles_upstream_read_failure(self) -> None:
|
||||||
handler = make_proxy_handler(BytesIO())
|
handler = _make_handler_stub(BytesIO())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
|
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
|
||||||
sent = handler._proxy_streaming_response(
|
result = handler._proxy_streaming_response(
|
||||||
FailingStreamingResponse(),
|
_FailingStreamingResponse(),
|
||||||
"deepseek-v4-pro",
|
"deepseek-v4-pro",
|
||||||
[{"role": "user", "content": "hi"}],
|
[{"role": "user", "content": "hi"}],
|
||||||
"cache-namespace",
|
"ns",
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
handler.server.reasoning_store.close()
|
handler.server.reasoning_store.close()
|
||||||
|
self.assertFalse(result.sent)
|
||||||
self.assertFalse(sent.sent)
|
|
||||||
self.assertIn(
|
self.assertIn(
|
||||||
"upstream streaming response read failed",
|
"upstream streaming response read failed", "\n".join(captured.output)
|
||||||
"\n".join(captured.output),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_collapsible_reasoning_has_no_effect_when_display_is_disabled(
|
def test_collapsible_reasoning_no_effect_when_display_disabled(self) -> None:
|
||||||
self,
|
|
||||||
) -> None:
|
|
||||||
wfile = BytesIO()
|
wfile = BytesIO()
|
||||||
handler = make_proxy_handler(wfile)
|
handler = _make_handler_stub(
|
||||||
handler.server.config = ProxyConfig(
|
wfile, display_reasoning=False, collapsible_reasoning=True
|
||||||
display_reasoning=False,
|
|
||||||
collapsible_reasoning=True,
|
|
||||||
)
|
)
|
||||||
chunk = {
|
chunk = {
|
||||||
"id": "chatcmpl-stream",
|
"id": "stream",
|
||||||
"model": "deepseek-v4-pro",
|
"model": "deepseek-v4-pro",
|
||||||
"choices": [
|
"choices": [{"index": 0, "delta": {"reasoning_content": "Need context."}}],
|
||||||
{
|
|
||||||
"index": 0,
|
|
||||||
"delta": {"reasoning_content": "Need context."},
|
|
||||||
}
|
}
|
||||||
],
|
response = _FakeStreamingResponse(
|
||||||
}
|
|
||||||
response = FakeStreamingResponse(
|
|
||||||
[
|
[
|
||||||
f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
|
f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
|
||||||
b"data: [DONE]\n\n",
|
b"data: [DONE]\n\n",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sent = handler._proxy_streaming_response(
|
handler._proxy_streaming_response(
|
||||||
response,
|
response,
|
||||||
"deepseek-v4-pro",
|
"deepseek-v4-pro",
|
||||||
[{"role": "user", "content": "hi"}],
|
[{"role": "user", "content": "hi"}],
|
||||||
"cache-namespace",
|
"ns",
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
handler.server.reasoning_store.close()
|
handler.server.reasoning_store.close()
|
||||||
|
|
||||||
body = wfile.getvalue().decode("utf-8")
|
body = wfile.getvalue().decode("utf-8")
|
||||||
self.assertTrue(sent.sent)
|
|
||||||
self.assertIn("reasoning_content", body)
|
self.assertIn("reasoning_content", body)
|
||||||
self.assertNotIn("<details>", body)
|
self.assertNotIn("<details>", body)
|
||||||
self.assertNotIn("<think>", body)
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# HTTP-level boundary tests: real proxy + tiny upstream
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class _PlainFakeUpstream(BaseHTTPRequestHandler):
|
||||||
|
"""Returns a fixed plain response and records every request."""
|
||||||
|
|
||||||
|
requests: list[dict[str, object]] = []
|
||||||
|
auth_headers: list[str] = []
|
||||||
|
delay_after_done: float = 0.0
|
||||||
|
response: dict[str, object] = {}
|
||||||
|
|
||||||
|
def log_message(self, fmt: str, *args: object) -> None:
|
||||||
|
return
|
||||||
|
|
||||||
|
def do_POST(self) -> None:
|
||||||
|
length = int(self.headers.get("Content-Length") or 0)
|
||||||
|
payload = json.loads(self.rfile.read(length).decode("utf-8"))
|
||||||
|
self.__class__.requests.append(payload)
|
||||||
|
self.__class__.auth_headers.append(self.headers.get("Authorization", ""))
|
||||||
|
|
||||||
|
if payload.get("stream"):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/event-stream")
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(
|
||||||
|
b'data: {"choices":[{"index":0,"delta":{"content":"x"}}]}\n\n'
|
||||||
|
)
|
||||||
|
self.wfile.write(b"data: [DONE]\n\n")
|
||||||
|
self.wfile.flush()
|
||||||
|
if self.__class__.delay_after_done:
|
||||||
|
time.sleep(self.__class__.delay_after_done)
|
||||||
|
return
|
||||||
|
|
||||||
|
body = json.dumps(self.__class__.response).encode("utf-8")
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "application/json")
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
|
||||||
|
|
||||||
|
_BASE_RESPONSE: dict[str, object] = {
|
||||||
|
"id": "x",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": 1,
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"message": {"role": "assistant", "content": "ok"},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": 20,
|
||||||
|
"completion_tokens": 5,
|
||||||
|
"total_tokens": 25,
|
||||||
|
"prompt_cache_hit_tokens": 12,
|
||||||
|
"prompt_cache_miss_tokens": 8,
|
||||||
|
"completion_tokens_details": {"reasoning_tokens": 3},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class _Fixture:
|
||||||
|
def __init__(self, server: ThreadingHTTPServer) -> None:
|
||||||
|
self.server = server
|
||||||
|
self.thread = threading.Thread(target=server.serve_forever, daemon=True)
|
||||||
|
self.thread.start()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self) -> str:
|
||||||
|
host, port = self.server.server_address
|
||||||
|
return f"http://{host}:{port}"
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
self.server.shutdown()
|
||||||
|
self.server.server_close()
|
||||||
|
self.thread.join(timeout=5)
|
||||||
|
|
||||||
|
|
||||||
|
def _post(url: str, payload: dict, api_key: str = "sk-test") -> tuple[int, dict]:
|
||||||
|
request = Request(
|
||||||
|
url,
|
||||||
|
data=json.dumps(payload).encode("utf-8"),
|
||||||
|
method="POST",
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urlopen(request, timeout=5) as response:
|
||||||
|
return response.status, json.loads(response.read().decode("utf-8"))
|
||||||
|
except HTTPError as exc:
|
||||||
|
return exc.code, json.loads(exc.read().decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
class HttpBoundaryTests(unittest.TestCase):
|
||||||
|
"""Real-HTTP tests that don't fit the protocol suite: things the proxy
|
||||||
|
must do at the HTTP boundary regardless of what DeepSeek answers."""
|
||||||
|
|
||||||
|
def setUp(self) -> None:
|
||||||
|
_PlainFakeUpstream.requests = []
|
||||||
|
_PlainFakeUpstream.auth_headers = []
|
||||||
|
_PlainFakeUpstream.delay_after_done = 0.0
|
||||||
|
_PlainFakeUpstream.response = dict(_BASE_RESPONSE)
|
||||||
|
self.upstream = _Fixture(
|
||||||
|
ThreadingHTTPServer(("127.0.0.1", 0), _PlainFakeUpstream)
|
||||||
|
)
|
||||||
|
self.store = ReasoningStore(":memory:")
|
||||||
|
proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
|
||||||
|
proxy.config = ProxyConfig(
|
||||||
|
upstream_base_url=self.upstream.url,
|
||||||
|
upstream_model="deepseek-v4-pro",
|
||||||
|
ngrok=False,
|
||||||
|
)
|
||||||
|
proxy.reasoning_store = self.store
|
||||||
|
self.proxy = _Fixture(proxy)
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
self.proxy.close()
|
||||||
|
self.upstream.close()
|
||||||
|
self.store.close()
|
||||||
|
|
||||||
|
def _request(self) -> dict:
|
||||||
|
return {
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"messages": [{"role": "user", "content": "hi"}],
|
||||||
|
}
|
||||||
|
|
||||||
|
def test_rejects_missing_bearer_token(self) -> None:
|
||||||
|
request = Request(
|
||||||
|
f"{self.proxy.url}/v1/chat/completions",
|
||||||
|
data=json.dumps(self._request()).encode("utf-8"),
|
||||||
|
method="POST",
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
with self.assertRaises(HTTPError) as caught:
|
||||||
|
urlopen(request, timeout=5)
|
||||||
|
self.assertEqual(caught.exception.code, 401)
|
||||||
|
self.assertEqual(_PlainFakeUpstream.requests, [])
|
||||||
|
|
||||||
|
def test_rejects_oversized_request_body(self) -> None:
|
||||||
|
self.proxy.server.config = replace(
|
||||||
|
self.proxy.server.config, max_request_body_bytes=10
|
||||||
|
)
|
||||||
|
status, payload = _post(
|
||||||
|
f"{self.proxy.url}/v1/chat/completions", self._request()
|
||||||
|
)
|
||||||
|
self.assertEqual(status, 413)
|
||||||
|
self.assertIn("too large", payload["error"]["message"])
|
||||||
|
self.assertEqual(_PlainFakeUpstream.requests, [])
|
||||||
|
|
||||||
|
def test_forwards_bearer_token_to_upstream(self) -> None:
|
||||||
|
status, _ = _post(
|
||||||
|
f"{self.proxy.url}/v1/chat/completions",
|
||||||
|
self._request(),
|
||||||
|
api_key="sk-from-cursor",
|
||||||
|
)
|
||||||
|
self.assertEqual(status, 200)
|
||||||
|
self.assertEqual(_PlainFakeUpstream.auth_headers[0], "Bearer sk-from-cursor")
|
||||||
|
|
||||||
|
def test_streaming_response_closes_after_done_when_upstream_lingers(
|
||||||
|
self,
|
||||||
|
) -> None:
|
||||||
|
"""Cursor relies on the proxy ending the SSE stream at [DONE], even
|
||||||
|
if the upstream socket stays open."""
|
||||||
|
_PlainFakeUpstream.delay_after_done = 2.0
|
||||||
|
request = Request(
|
||||||
|
f"{self.proxy.url}/v1/chat/completions",
|
||||||
|
data=json.dumps(
|
||||||
|
{
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"stream": True,
|
||||||
|
"messages": [{"role": "user", "content": "stream"}],
|
||||||
|
}
|
||||||
|
).encode("utf-8"),
|
||||||
|
method="POST",
|
||||||
|
headers={
|
||||||
|
"Authorization": "Bearer sk-test",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
started = time.monotonic()
|
||||||
|
with urlopen(request, timeout=1) as response:
|
||||||
|
body = response.read().decode("utf-8")
|
||||||
|
self.assertLess(time.monotonic() - started, 1.0)
|
||||||
|
self.assertIn("data: [DONE]", body)
|
||||||
|
|
||||||
|
def test_normal_logging_summarizes_without_bodies_or_keys(self) -> None:
|
||||||
|
with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
|
||||||
|
status, _ = _post(
|
||||||
|
f"{self.proxy.url}/v1/chat/completions",
|
||||||
|
self._request(),
|
||||||
|
api_key="sk-from-cursor",
|
||||||
|
)
|
||||||
|
# `└ stats` is emitted on the handler thread *after* the response
|
||||||
|
# body hits the socket, so the client may return before it lands.
|
||||||
|
deadline = time.monotonic() + 2
|
||||||
|
while time.monotonic() < deadline and not any(
|
||||||
|
"└ stats" in record for record in captured.output
|
||||||
|
):
|
||||||
|
time.sleep(0.01)
|
||||||
|
output = "\n".join(captured.output)
|
||||||
|
self.assertEqual(status, 200)
|
||||||
|
# Single-line stage records keep the log readable.
|
||||||
|
for marker in ("┌ cursor", "├ context", "├ send", "└ stats"):
|
||||||
|
self.assertIn(marker, output)
|
||||||
|
self.assertNotIn("hi", output.split("┌ cursor")[1].split("\n")[0])
|
||||||
|
self.assertNotIn("sk-from-cursor", output)
|
||||||
|
|
||||||
|
def test_verbose_logging_includes_bodies_but_redacts_api_key(self) -> None:
|
||||||
|
self.proxy.server.config = replace(self.proxy.server.config, verbose=True)
|
||||||
|
with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
|
||||||
|
_post(
|
||||||
|
f"{self.proxy.url}/v1/chat/completions",
|
||||||
|
self._request(),
|
||||||
|
api_key="sk-from-cursor",
|
||||||
|
)
|
||||||
|
output = "\n".join(captured.output)
|
||||||
|
self.assertIn("cursor request body", output)
|
||||||
|
self.assertIn("upstream request body", output)
|
||||||
|
self.assertNotIn("sk-from-cursor", output)
|
||||||
|
|
||||||
|
def test_healthz_returns_ok(self) -> None:
|
||||||
|
with urlopen(f"{self.proxy.url}/healthz", timeout=2) as response:
|
||||||
|
self.assertEqual(response.status, 200)
|
||||||
|
self.assertEqual(json.loads(response.read())["ok"], True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_s
|
||||||
from deepseek_cursor_proxy.streaming import (
|
from deepseek_cursor_proxy.streaming import (
|
||||||
CursorReasoningDisplayAdapter,
|
CursorReasoningDisplayAdapter,
|
||||||
StreamAccumulator,
|
StreamAccumulator,
|
||||||
|
fold_reasoning_into_content,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -430,5 +431,44 @@ class CursorReasoningDisplayAdapterTests(unittest.TestCase):
|
||||||
self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
|
self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
|
||||||
|
|
||||||
|
|
||||||
|
class FoldReasoningTests(unittest.TestCase):
|
||||||
|
def test_fold_reasoning_into_non_streaming_content(self) -> None:
|
||||||
|
"""Non-streaming responses mirror reasoning_content into a visible
|
||||||
|
<details> block, matching the streaming layout."""
|
||||||
|
payload = {
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "answer",
|
||||||
|
"reasoning_content": "thinking",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
fold_reasoning_into_content(payload, collapsible=True)
|
||||||
|
self.assertEqual(
|
||||||
|
payload["choices"][0]["message"]["content"],
|
||||||
|
"<details>\n<summary>Thinking</summary>\n\nthinking\n</details>\n\nanswer",
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_fold_reasoning_skips_empty_reasoning(self) -> None:
|
||||||
|
payload = {
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "answer",
|
||||||
|
"reasoning_content": "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
fold_reasoning_into_content(payload, collapsible=True)
|
||||||
|
self.assertEqual(payload["choices"][0]["message"]["content"], "answer")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,25 @@
|
||||||
|
"""Trace writer tests, both as a unit (writes/redacts files) and integrated
|
||||||
|
through the proxy (captures real request flow on disk)."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||||
import json
|
import json
|
||||||
|
from pathlib import Path
|
||||||
import stat
|
import stat
|
||||||
|
import threading
|
||||||
from tempfile import TemporaryDirectory
|
from tempfile import TemporaryDirectory
|
||||||
|
import time
|
||||||
import unittest
|
import unittest
|
||||||
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
|
from deepseek_cursor_proxy.config import ProxyConfig
|
||||||
|
from deepseek_cursor_proxy.reasoning_store import ReasoningStore
|
||||||
|
from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
|
||||||
from deepseek_cursor_proxy.trace import TraceWriter
|
from deepseek_cursor_proxy.trace import TraceWriter
|
||||||
|
|
||||||
|
|
||||||
class TraceWriterTests(unittest.TestCase):
|
class TraceWriterUnitTests(unittest.TestCase):
|
||||||
def test_writes_manifest_and_numbered_request_files(self) -> None:
|
def test_writes_manifest_and_numbered_request_files(self) -> None:
|
||||||
with TemporaryDirectory() as temp_dir:
|
with TemporaryDirectory() as temp_dir:
|
||||||
writer = TraceWriter(temp_dir)
|
writer = TraceWriter(temp_dir)
|
||||||
|
|
@ -47,17 +58,244 @@ class TraceWriterTests(unittest.TestCase):
|
||||||
headers={"Authorization": "Bearer sk-secret"},
|
headers={"Authorization": "Bearer sk-secret"},
|
||||||
)
|
)
|
||||||
trace.finish("completed", http_status=200)
|
trace.finish("completed", http_status=200)
|
||||||
|
serialized = trace.path.read_text(encoding="utf-8")
|
||||||
payload = json.loads(trace.path.read_text(encoding="utf-8"))
|
|
||||||
serialized = json.dumps(payload)
|
|
||||||
|
|
||||||
self.assertNotIn("sk-secret", serialized)
|
self.assertNotIn("sk-secret", serialized)
|
||||||
|
payload = json.loads(serialized)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
payload["request"]["headers"]["Authorization"]["present"],
|
payload["request"]["headers"]["Authorization"]["present"], True
|
||||||
True,
|
|
||||||
)
|
)
|
||||||
self.assertIn("sha256", payload["request"]["headers"]["Authorization"])
|
self.assertIn("sha256", payload["request"]["headers"]["Authorization"])
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Integration: trace writer attached to a running proxy.
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class _CannedUpstream(BaseHTTPRequestHandler):
|
||||||
|
"""Returns a tool-call response for the first POST and a streamed
|
||||||
|
reasoning response for the second."""
|
||||||
|
|
||||||
|
requests: list[dict[str, object]] = []
|
||||||
|
|
||||||
|
def log_message(self, fmt: str, *args: object) -> None:
|
||||||
|
return
|
||||||
|
|
||||||
|
def do_POST(self) -> None:
|
||||||
|
length = int(self.headers.get("Content-Length") or 0)
|
||||||
|
payload = json.loads(self.rfile.read(length).decode("utf-8"))
|
||||||
|
self.__class__.requests.append(payload)
|
||||||
|
|
||||||
|
if payload.get("stream"):
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "text/event-stream")
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(
|
||||||
|
b'data: {"id":"s","object":"chat.completion.chunk","choices":'
|
||||||
|
b'[{"index":0,"delta":{"role":"assistant","reasoning_content":"think"},'
|
||||||
|
b'"finish_reason":null}]}\n\n'
|
||||||
|
)
|
||||||
|
self.wfile.write(
|
||||||
|
b'data: {"id":"s","object":"chat.completion.chunk","choices":'
|
||||||
|
b'[{"index":0,"delta":{"content":"answer"},"finish_reason":null}],'
|
||||||
|
b'"usage":{"completion_tokens_details":{"reasoning_tokens":1}}}\n\n'
|
||||||
|
)
|
||||||
|
self.wfile.write(
|
||||||
|
b'data: {"id":"s","object":"chat.completion.chunk",'
|
||||||
|
b'"choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n'
|
||||||
|
)
|
||||||
|
self.wfile.write(b"data: [DONE]\n\n")
|
||||||
|
self.wfile.flush()
|
||||||
|
return
|
||||||
|
|
||||||
|
body = json.dumps(
|
||||||
|
{
|
||||||
|
"id": "tool",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"finish_reason": "tool_calls",
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"reasoning_content": "I need the date.",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "call_date",
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_date",
|
||||||
|
"arguments": "{}",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
).encode("utf-8")
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header("Content-Type", "application/json")
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
|
||||||
|
|
||||||
|
class _Fixture:
|
||||||
|
def __init__(self, server: ThreadingHTTPServer) -> None:
|
||||||
|
self.server = server
|
||||||
|
self.thread = threading.Thread(target=server.serve_forever, daemon=True)
|
||||||
|
self.thread.start()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def url(self) -> str:
|
||||||
|
host, port = self.server.server_address
|
||||||
|
return f"http://{host}:{port}"
|
||||||
|
|
||||||
|
def close(self) -> None:
|
||||||
|
self.server.shutdown()
|
||||||
|
self.server.server_close()
|
||||||
|
self.thread.join(timeout=5)
|
||||||
|
|
||||||
|
|
||||||
|
def _read_single_trace(session_dir: Path) -> dict:
|
||||||
|
deadline = time.monotonic() + 2
|
||||||
|
files = sorted(session_dir.glob("request-*.json"))
|
||||||
|
while not files and time.monotonic() < deadline:
|
||||||
|
time.sleep(0.01)
|
||||||
|
files = sorted(session_dir.glob("request-*.json"))
|
||||||
|
if len(files) != 1:
|
||||||
|
raise AssertionError(f"expected one trace, found {files}")
|
||||||
|
return json.loads(files[0].read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
class TraceIntegrationTests(unittest.TestCase):
|
||||||
|
def setUp(self) -> None:
|
||||||
|
_CannedUpstream.requests = []
|
||||||
|
self.upstream = _Fixture(ThreadingHTTPServer(("127.0.0.1", 0), _CannedUpstream))
|
||||||
|
self.store = ReasoningStore(":memory:")
|
||||||
|
self.temp_dir = TemporaryDirectory()
|
||||||
|
self.writer = TraceWriter(self.temp_dir.name)
|
||||||
|
proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
|
||||||
|
proxy.config = ProxyConfig(
|
||||||
|
upstream_base_url=self.upstream.url,
|
||||||
|
upstream_model="deepseek-v4-pro",
|
||||||
|
ngrok=False,
|
||||||
|
)
|
||||||
|
proxy.reasoning_store = self.store
|
||||||
|
proxy.trace_writer = self.writer
|
||||||
|
self.proxy = _Fixture(proxy)
|
||||||
|
|
||||||
|
def tearDown(self) -> None:
|
||||||
|
self.proxy.close()
|
||||||
|
self.upstream.close()
|
||||||
|
self.store.close()
|
||||||
|
self.temp_dir.cleanup()
|
||||||
|
|
||||||
|
def _post(self, payload: dict) -> dict:
|
||||||
|
request = Request(
|
||||||
|
f"{self.proxy.url}/v1/chat/completions",
|
||||||
|
data=json.dumps(payload).encode("utf-8"),
|
||||||
|
method="POST",
|
||||||
|
headers={
|
||||||
|
"Authorization": "Bearer sk-from-cursor",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
with urlopen(request, timeout=5) as response:
|
||||||
|
return json.loads(response.read())
|
||||||
|
|
||||||
|
def test_captures_non_streaming_replay_without_api_key(self) -> None:
|
||||||
|
self._post(
|
||||||
|
{
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"messages": [{"role": "user", "content": "What is tomorrow's date?"}],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
trace = _read_single_trace(self.writer.session_dir)
|
||||||
|
serialized = json.dumps(trace)
|
||||||
|
self.assertEqual(trace["completion"]["status"], "completed")
|
||||||
|
self.assertEqual(
|
||||||
|
trace["request"]["body"]["messages"][0]["content"],
|
||||||
|
"What is tomorrow's date?",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
trace["upstream"]["response"]["body"]["json"]["choices"][0]["message"][
|
||||||
|
"reasoning_content"
|
||||||
|
],
|
||||||
|
"I need the date.",
|
||||||
|
)
|
||||||
|
self.assertNotIn("sk-from-cursor", serialized)
|
||||||
|
|
||||||
|
def test_captures_streaming_replay_chunks(self) -> None:
|
||||||
|
request = Request(
|
||||||
|
f"{self.proxy.url}/v1/chat/completions",
|
||||||
|
data=json.dumps(
|
||||||
|
{
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"stream": True,
|
||||||
|
"messages": [{"role": "user", "content": "stream"}],
|
||||||
|
}
|
||||||
|
).encode("utf-8"),
|
||||||
|
method="POST",
|
||||||
|
headers={
|
||||||
|
"Authorization": "Bearer sk-test",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
with urlopen(request, timeout=2) as response:
|
||||||
|
response.read()
|
||||||
|
trace = _read_single_trace(self.writer.session_dir)
|
||||||
|
self.assertEqual(trace["completion"]["status"], "completed")
|
||||||
|
self.assertIn(
|
||||||
|
"reasoning_content",
|
||||||
|
trace["upstream"]["stream"]["chunks"][0]["line"],
|
||||||
|
)
|
||||||
|
self.assertIn(
|
||||||
|
"<details>", trace["cursor_response"]["stream"]["chunks"][0]["line"]
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_captures_recovery_diagnostics(self) -> None:
|
||||||
|
"""A request that triggers cold-cache recovery records the recovery
|
||||||
|
steps + diagnostic counters in the trace."""
|
||||||
|
self._post(
|
||||||
|
{
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "old"},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"id": "call_x",
|
||||||
|
"type": "function",
|
||||||
|
"function": {"name": "f", "arguments": "{}"},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{"role": "tool", "tool_call_id": "call_x", "content": "result"},
|
||||||
|
{"role": "user", "content": "new"},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
trace = _read_single_trace(self.writer.session_dir)
|
||||||
|
self.assertEqual(
|
||||||
|
trace["transform"]["recovery_steps"][0]["strategy"], "latest_user"
|
||||||
|
)
|
||||||
|
self.assertGreaterEqual(
|
||||||
|
len(
|
||||||
|
[
|
||||||
|
item
|
||||||
|
for item in trace["transform"]["reasoning_diagnostics"]
|
||||||
|
if item["missing"]
|
||||||
|
]
|
||||||
|
),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue