refactor(proxy): audit thinking-mode protocol and refactor test suite (#33)
parent
b65f0dd8a2
commit
be0310751c
|
|
@ -1,3 +1,6 @@
|
|||
# AIs
|
||||
.claude/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[codz]
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ Select `deepseek-v4-pro` in Cursor and use chat or agent mode as usual.
|
|||
|
||||
## How It Works
|
||||
|
||||
- **Core fix:** DeepSeek's [thinking mode](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) requires `reasoning_content` from assistant tool-call messages to be passed back in subsequent requests, but Cursor omits this field, causing a 400 error. The proxy (`Cursor → ngrok → proxy → DeepSeek API`) stores `reasoning_content` from every DeepSeek response in a local SQLite cache, keyed by message signature, tool-call ID, and tool-call function signature, and patches outgoing requests with missing `reasoning_content` before they reach DeepSeek. On a cold cache (proxy restart, model switch), it logs and drops unrecoverable history, continues from the latest user request, and prefixes the next Cursor response with a notice.
|
||||
- **Core fix:** DeepSeek [thinking-mode tool calls](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) require the complete **multi-round** `reasoning_content` chain to be sent back in later requests. Cursor omits that field, causing a 400 error. The proxy (`Cursor -> ngrok -> proxy -> DeepSeek API`) stores DeepSeek's original `reasoning_content` and patches missing blocks back into outgoing tool-call history.
|
||||
- **Multi-conversation isolation:** To avoid collisions across concurrent conversations, the proxy scopes cache keys by a SHA-256 hash of the canonical conversation prefix (roles, content, and tool calls, excluding `reasoning_content`) plus the upstream model, configuration, and an API-key hash. Different threads get different scopes, so reused tool-call IDs do not collide. Byte-identical cloned histories produce identical scopes.
|
||||
- **Context caching compatibility:** The proxy preserves compatibility by never injecting synthetic thread IDs, timestamps, or cache-control messages. It restores `reasoning_content` as the exact original string, so repeated prefixes remain intact for [DeepSeek context cache](https://api-docs.deepseek.com/guides/kv_cache). Cache hit rates are logged in the terminal output.
|
||||
- **Additional compatibility fixes:** Beyond reasoning repair, the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases, strips mirrored thinking display blocks from assistant content, flattens multi-part content arrays to plain text, and mirrors `reasoning_content` into Cursor-visible Markdown details blocks.
|
||||
|
|
|
|||
|
|
@ -172,8 +172,6 @@ def settings_from_config(
|
|||
|
||||
def normalize_thinking(value: Any) -> str:
|
||||
thinking = as_str(value, DEFAULT_THINKING).strip().lower()
|
||||
if thinking in {"passthrough", "pass-through", "pass_through"}:
|
||||
return "pass-through"
|
||||
if thinking in {"enabled", "disabled"}:
|
||||
return thinking
|
||||
return DEFAULT_THINKING
|
||||
|
|
|
|||
|
|
@ -540,6 +540,8 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
|
|||
scope=record_response_scope,
|
||||
prior_messages=record_response_messages,
|
||||
recording_contexts=record_response_contexts,
|
||||
display_reasoning=self.config.display_reasoning,
|
||||
collapsible_reasoning=self.config.collapsible_reasoning,
|
||||
)
|
||||
except (json.JSONDecodeError, UnicodeDecodeError) as exc:
|
||||
LOG.warning("failed to rewrite upstream JSON response: %s", exc)
|
||||
|
|
@ -812,7 +814,7 @@ def build_arg_parser() -> argparse.ArgumentParser:
|
|||
)
|
||||
parser.add_argument(
|
||||
"--thinking",
|
||||
choices=["enabled", "disabled", "pass-through"],
|
||||
choices=["enabled", "disabled"],
|
||||
help="DeepSeek thinking mode, default from config or enabled",
|
||||
)
|
||||
parser.add_argument(
|
||||
|
|
|
|||
|
|
@ -292,3 +292,34 @@ class CursorReasoningDisplayAdapter:
|
|||
}
|
||||
if metadata:
|
||||
self._last_chunk_metadata.update(metadata)
|
||||
|
||||
|
||||
def fold_reasoning_into_content(
|
||||
response_payload: dict[str, Any],
|
||||
collapsible: bool,
|
||||
) -> None:
|
||||
"""Mirror `reasoning_content` into the visible `content` field for
|
||||
non-streaming responses, matching the streaming `<details>` layout."""
|
||||
block_start = (
|
||||
COLLAPSIBLE_THINKING_BLOCK_START if collapsible else THINKING_BLOCK_START
|
||||
)
|
||||
block_end = COLLAPSIBLE_THINKING_BLOCK_END if collapsible else THINKING_BLOCK_END
|
||||
choices = response_payload.get("choices")
|
||||
if not isinstance(choices, list):
|
||||
return
|
||||
for choice in choices:
|
||||
if not isinstance(choice, dict):
|
||||
continue
|
||||
message = choice.get("message")
|
||||
if not isinstance(message, dict):
|
||||
continue
|
||||
reasoning = message.get("reasoning_content")
|
||||
if not isinstance(reasoning, str) or not reasoning:
|
||||
continue
|
||||
content = message.get("content")
|
||||
message["content"] = (
|
||||
block_start
|
||||
+ reasoning
|
||||
+ block_end
|
||||
+ (content if isinstance(content, str) else "")
|
||||
)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
|||
from dataclasses import dataclass, field
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -15,6 +16,10 @@ from .reasoning_store import (
|
|||
tool_call_signature,
|
||||
turn_context_signature,
|
||||
)
|
||||
from .streaming import fold_reasoning_into_content
|
||||
|
||||
|
||||
LOG = logging.getLogger("deepseek_cursor_proxy")
|
||||
|
||||
|
||||
SUPPORTED_REQUEST_FIELDS = {
|
||||
|
|
@ -35,6 +40,13 @@ SUPPORTED_REQUEST_FIELDS = {
|
|||
"frequency_penalty",
|
||||
"logprobs",
|
||||
"top_logprobs",
|
||||
# Standard OpenAI Chat Completions fields that DeepSeek either honors or
|
||||
# safely ignores. Cursor and most OpenAI SDKs send these unconditionally,
|
||||
# so forwarding keeps clients happy and avoids log spam.
|
||||
"user",
|
||||
"seed",
|
||||
"n",
|
||||
"logit_bias",
|
||||
}
|
||||
|
||||
MESSAGE_FIELDS = {
|
||||
|
|
@ -83,10 +95,6 @@ CURSOR_THINKING_BLOCK_RE = re.compile(
|
|||
)
|
||||
|
||||
RECOVERY_NOTICE_TEXT = "[deepseek-cursor-proxy] Refreshed reasoning_content history."
|
||||
LEGACY_RECOVERY_NOTICE_TEXT = (
|
||||
"Note: recovered this DeepSeek chat because older tool-call reasoning "
|
||||
"was unavailable; continuing with recent context only."
|
||||
)
|
||||
RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n"
|
||||
RECOVERY_SYSTEM_CONTENT = (
|
||||
"deepseek-cursor-proxy recovered this request because older DeepSeek "
|
||||
|
|
@ -460,10 +468,33 @@ def has_recovery_notice(message: dict[str, Any]) -> bool:
|
|||
return (
|
||||
message.get("role") == "assistant"
|
||||
and isinstance(content, str)
|
||||
and content.startswith((RECOVERY_NOTICE_TEXT, LEGACY_RECOVERY_NOTICE_TEXT))
|
||||
and content.startswith(RECOVERY_NOTICE_TEXT)
|
||||
)
|
||||
|
||||
|
||||
def strip_recovery_notice_for_upstream(
|
||||
messages: list[dict[str, Any]],
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Cursor echoes the proxy's recovery notice back to us in later turns.
|
||||
The notice serves as a boundary marker for the proxy, but DeepSeek must
|
||||
not see proxy-generated prose. Return a copy with assistant prefixes
|
||||
stripped; leave the input untouched so cache scopes/recording contexts
|
||||
keep matching the with-prefix history that Cursor will send next time."""
|
||||
stripped: list[dict[str, Any]] = []
|
||||
for message in messages:
|
||||
if message.get("role") != "assistant":
|
||||
stripped.append(message)
|
||||
continue
|
||||
content = message.get("content")
|
||||
if not isinstance(content, str) or not content.startswith(RECOVERY_NOTICE_TEXT):
|
||||
stripped.append(message)
|
||||
continue
|
||||
cleaned = dict(message)
|
||||
cleaned["content"] = content[len(RECOVERY_NOTICE_TEXT) :].lstrip("\r\n")
|
||||
stripped.append(cleaned)
|
||||
return stripped
|
||||
|
||||
|
||||
def leading_system_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
leading_messages: list[dict[str, Any]] = []
|
||||
for message in messages:
|
||||
|
|
@ -628,6 +659,11 @@ def assistant_needs_reasoning_for_tool_context(
|
|||
def upstream_model_for(original_model: str, config: ProxyConfig) -> str:
|
||||
if original_model.startswith("deepseek-"):
|
||||
return original_model
|
||||
LOG.warning(
|
||||
"rewriting non-DeepSeek model %r to configured fallback %r",
|
||||
original_model,
|
||||
config.upstream_model,
|
||||
)
|
||||
return config.upstream_model
|
||||
|
||||
|
||||
|
|
@ -688,6 +724,16 @@ def prepare_upstream_request(
|
|||
prepared = {
|
||||
key: value for key, value in payload.items() if key in SUPPORTED_REQUEST_FIELDS
|
||||
}
|
||||
dropped_fields = sorted(
|
||||
key
|
||||
for key in payload.keys()
|
||||
if key not in SUPPORTED_REQUEST_FIELDS
|
||||
and key not in {"max_completion_tokens", "functions", "function_call"}
|
||||
)
|
||||
if dropped_fields:
|
||||
LOG.warning(
|
||||
"dropping unsupported request field(s): %s", ", ".join(dropped_fields)
|
||||
)
|
||||
if "max_tokens" not in prepared and "max_completion_tokens" in payload:
|
||||
prepared["max_tokens"] = payload["max_completion_tokens"]
|
||||
|
||||
|
|
@ -719,14 +765,9 @@ def prepare_upstream_request(
|
|||
if tool_choice is not None:
|
||||
prepared["tool_choice"] = tool_choice
|
||||
|
||||
if config.thinking != "pass-through":
|
||||
prepared["thinking"] = {"type": config.thinking}
|
||||
|
||||
thinking = prepared.get("thinking")
|
||||
thinking_enabled = isinstance(thinking, dict) and thinking.get("type") == "enabled"
|
||||
thinking_disabled = (
|
||||
isinstance(thinking, dict) and thinking.get("type") == "disabled"
|
||||
)
|
||||
thinking_enabled = config.thinking == "enabled"
|
||||
thinking_disabled = config.thinking == "disabled"
|
||||
if thinking_enabled:
|
||||
prepared["reasoning_effort"] = normalize_reasoning_effort(
|
||||
prepared.get("reasoning_effort") or config.reasoning_effort
|
||||
|
|
@ -797,12 +838,12 @@ def prepare_upstream_request(
|
|||
keep_reasoning=not thinking_disabled,
|
||||
)
|
||||
reasoning_diagnostics.extend(latest_diagnostics)
|
||||
prepared["messages"] = messages
|
||||
active_record_response_scope = conversation_scope(messages, cache_namespace)
|
||||
record_response_contexts = response_recording_contexts(
|
||||
(record_response_scope, record_response_messages),
|
||||
(active_record_response_scope, messages),
|
||||
)
|
||||
prepared["messages"] = strip_recovery_notice_for_upstream(messages)
|
||||
|
||||
return PreparedRequest(
|
||||
payload=prepared,
|
||||
|
|
@ -874,6 +915,8 @@ def rewrite_response_body(
|
|||
scope: str | None = None,
|
||||
prior_messages: list[dict[str, Any]] | None = None,
|
||||
recording_contexts: list[tuple[str, list[dict[str, Any]]]] | None = None,
|
||||
display_reasoning: bool = False,
|
||||
collapsible_reasoning: bool = True,
|
||||
) -> bytes:
|
||||
response_payload = json.loads(body.decode("utf-8"))
|
||||
if isinstance(response_payload, dict):
|
||||
|
|
@ -888,6 +931,8 @@ def rewrite_response_body(
|
|||
prior_messages=prior_messages,
|
||||
recording_contexts=recording_contexts,
|
||||
)
|
||||
if display_reasoning:
|
||||
fold_reasoning_into_content(response_payload, collapsible_reasoning)
|
||||
if "model" in response_payload:
|
||||
response_payload["model"] = original_model
|
||||
return json.dumps(
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class ConfigTests(unittest.TestCase):
|
|||
[
|
||||
"base_url: https://example.com/v1/",
|
||||
"model: deepseek-v4-flash",
|
||||
"thinking: pass_through",
|
||||
"thinking: disabled",
|
||||
"reasoning_effort: max",
|
||||
"port: 9100",
|
||||
"host: 0.0.0.0",
|
||||
|
|
@ -145,7 +145,7 @@ class ConfigTests(unittest.TestCase):
|
|||
|
||||
self.assertEqual(config.upstream_base_url, "https://example.com/v1")
|
||||
self.assertEqual(config.upstream_model, "deepseek-v4-flash")
|
||||
self.assertEqual(config.thinking, "pass-through")
|
||||
self.assertEqual(config.thinking, "disabled")
|
||||
self.assertEqual(config.reasoning_effort, "max")
|
||||
self.assertEqual(config.host, "0.0.0.0")
|
||||
self.assertEqual(config.port, 9100)
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -1,24 +1,45 @@
|
|||
"""Server boundary, CLI, and operational tests.
|
||||
|
||||
Pure helper tests (gzip, summarize) and stub-handler tests (client
|
||||
disconnect) live near the top. The bottom of the file boots a real proxy +
|
||||
tiny upstream to exercise things that need the HTTP layer: bearer token
|
||||
forwarding, oversized body, missing-bearer rejection, logging modes, and
|
||||
streaming connection close.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import replace
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
from io import BytesIO
|
||||
import gzip
|
||||
import json
|
||||
from pathlib import Path
|
||||
import threading
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
import unittest
|
||||
import zlib
|
||||
from urllib.error import HTTPError
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
from deepseek_cursor_proxy.config import ProxyConfig
|
||||
from deepseek_cursor_proxy.reasoning_store import ReasoningStore
|
||||
from deepseek_cursor_proxy.server import (
|
||||
DeepSeekProxyHandler,
|
||||
DeepSeekProxyServer,
|
||||
build_arg_parser,
|
||||
read_response_body,
|
||||
summarize_chat_payload,
|
||||
)
|
||||
|
||||
|
||||
class FakeResponse:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stubs for fast in-process tests of internal handler methods
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
def __init__(self, body: bytes, encoding: str = "", status: int = 200) -> None:
|
||||
self._body = BytesIO(body)
|
||||
self.headers = {"Content-Encoding": encoding} if encoding else {}
|
||||
|
|
@ -28,7 +49,7 @@ class FakeResponse:
|
|||
return self._body.read()
|
||||
|
||||
|
||||
class FakeStreamingResponse:
|
||||
class _FakeStreamingResponse:
|
||||
status = 200
|
||||
headers = {"Content-Type": "text/event-stream"}
|
||||
|
||||
|
|
@ -43,7 +64,7 @@ class FakeStreamingResponse:
|
|||
return self._lines.pop(0)
|
||||
|
||||
|
||||
class FailingStreamingResponse:
|
||||
class _FailingStreamingResponse:
|
||||
status = 200
|
||||
headers = {"Content-Type": "text/event-stream"}
|
||||
|
||||
|
|
@ -51,7 +72,7 @@ class FailingStreamingResponse:
|
|||
raise OSError("record layer failure")
|
||||
|
||||
|
||||
class BrokenPipeWfile:
|
||||
class _BrokenPipeWfile:
|
||||
def write(self, body: bytes) -> None:
|
||||
raise BrokenPipeError("test disconnect")
|
||||
|
||||
|
|
@ -59,10 +80,10 @@ class BrokenPipeWfile:
|
|||
raise BrokenPipeError("test disconnect")
|
||||
|
||||
|
||||
def make_proxy_handler(wfile: object) -> DeepSeekProxyHandler:
|
||||
def _make_handler_stub(wfile: object, **config: object) -> DeepSeekProxyHandler:
|
||||
handler = object.__new__(DeepSeekProxyHandler)
|
||||
handler.server = SimpleNamespace(
|
||||
config=ProxyConfig(),
|
||||
config=ProxyConfig(**config),
|
||||
reasoning_store=ReasoningStore(":memory:"),
|
||||
)
|
||||
handler.wfile = wfile
|
||||
|
|
@ -73,8 +94,13 @@ def make_proxy_handler(wfile: object) -> DeepSeekProxyHandler:
|
|||
return handler
|
||||
|
||||
|
||||
class ServerTests(unittest.TestCase):
|
||||
def test_cli_boolean_overrides_have_on_and_off_forms(self) -> None:
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI / pure helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class CliAndHelperTests(unittest.TestCase):
|
||||
def test_cli_boolean_flags_have_on_and_off_forms(self) -> None:
|
||||
args = build_arg_parser().parse_args(
|
||||
[
|
||||
"--no-ngrok",
|
||||
|
|
@ -86,7 +112,6 @@ class ServerTests(unittest.TestCase):
|
|||
"/tmp/dcp-traces",
|
||||
]
|
||||
)
|
||||
|
||||
self.assertFalse(args.ngrok)
|
||||
self.assertFalse(args.verbose)
|
||||
self.assertFalse(args.display_reasoning)
|
||||
|
|
@ -94,19 +119,17 @@ class ServerTests(unittest.TestCase):
|
|||
self.assertTrue(args.cors)
|
||||
self.assertEqual(args.trace_dir, Path("/tmp/dcp-traces"))
|
||||
|
||||
def test_read_response_body_handles_gzip(self) -> None:
|
||||
body = gzip.compress(b'{"ok":true}')
|
||||
|
||||
self.assertEqual(read_response_body(FakeResponse(body, "gzip")), b'{"ok":true}')
|
||||
|
||||
def test_read_response_body_handles_deflate(self) -> None:
|
||||
body = zlib.compress(b'{"ok":true}')
|
||||
|
||||
def test_read_response_body_decodes_gzip_and_deflate(self) -> None:
|
||||
self.assertEqual(
|
||||
read_response_body(FakeResponse(body, "deflate")), b'{"ok":true}'
|
||||
read_response_body(_FakeResponse(gzip.compress(b'{"ok":1}'), "gzip")),
|
||||
b'{"ok":1}',
|
||||
)
|
||||
self.assertEqual(
|
||||
read_response_body(_FakeResponse(zlib.compress(b'{"ok":1}'), "deflate")),
|
||||
b'{"ok":1}',
|
||||
)
|
||||
|
||||
def test_summarize_chat_payload_does_not_include_message_content(self) -> None:
|
||||
def test_summarize_chat_payload_omits_message_content(self) -> None:
|
||||
summary = summarize_chat_payload(
|
||||
{
|
||||
"model": "deepseek-v4-pro",
|
||||
|
|
@ -116,18 +139,22 @@ class ServerTests(unittest.TestCase):
|
|||
"tool_choice": "auto",
|
||||
}
|
||||
)
|
||||
|
||||
self.assertIn("model='deepseek-v4-pro'", summary)
|
||||
self.assertIn("stream=True", summary)
|
||||
self.assertIn("messages=1", summary)
|
||||
self.assertIn("tools=1", summary)
|
||||
self.assertNotIn("secret prompt", summary)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Client-disconnect / upstream-failure stubs (no real HTTP needed)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class HandlerStubTests(unittest.TestCase):
|
||||
def test_regular_response_handles_client_disconnect(self) -> None:
|
||||
handler = make_proxy_handler(BrokenPipeWfile())
|
||||
handler = _make_handler_stub(_BrokenPipeWfile())
|
||||
body = json.dumps(
|
||||
{
|
||||
"id": "chatcmpl-test",
|
||||
"id": "x",
|
||||
"object": "chat.completion",
|
||||
"model": "deepseek-v4-pro",
|
||||
"choices": [
|
||||
|
|
@ -139,116 +166,324 @@ class ServerTests(unittest.TestCase):
|
|||
],
|
||||
}
|
||||
).encode("utf-8")
|
||||
|
||||
try:
|
||||
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
|
||||
sent = handler._proxy_regular_response(
|
||||
FakeResponse(body),
|
||||
result = handler._proxy_regular_response(
|
||||
_FakeResponse(body),
|
||||
"deepseek-v4-pro",
|
||||
[{"role": "user", "content": "hi"}],
|
||||
"cache-namespace",
|
||||
"ns",
|
||||
)
|
||||
finally:
|
||||
handler.server.reasoning_store.close()
|
||||
|
||||
self.assertFalse(sent.sent)
|
||||
self.assertFalse(result.sent)
|
||||
self.assertIn("sending upstream response body", "\n".join(captured.output))
|
||||
|
||||
def test_streaming_response_stops_on_client_disconnect(self) -> None:
|
||||
handler = make_proxy_handler(BrokenPipeWfile())
|
||||
handler = _make_handler_stub(_BrokenPipeWfile())
|
||||
chunk = {
|
||||
"id": "chatcmpl-stream",
|
||||
"id": "stream",
|
||||
"model": "deepseek-v4-pro",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {"role": "assistant", "content": "hello"},
|
||||
"choices": [{"index": 0, "delta": {"role": "assistant", "content": "hi"}}],
|
||||
}
|
||||
],
|
||||
}
|
||||
response = FakeStreamingResponse(
|
||||
response = _FakeStreamingResponse(
|
||||
[
|
||||
f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
|
||||
b"data: [DONE]\n\n",
|
||||
]
|
||||
)
|
||||
|
||||
try:
|
||||
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
|
||||
sent = handler._proxy_streaming_response(
|
||||
result = handler._proxy_streaming_response(
|
||||
response,
|
||||
"deepseek-v4-pro",
|
||||
[{"role": "user", "content": "hi"}],
|
||||
"cache-namespace",
|
||||
"ns",
|
||||
)
|
||||
finally:
|
||||
handler.server.reasoning_store.close()
|
||||
|
||||
self.assertFalse(sent.sent)
|
||||
self.assertFalse(result.sent)
|
||||
self.assertEqual(response.readline_calls, 1)
|
||||
self.assertIn("sending streaming response chunk", "\n".join(captured.output))
|
||||
|
||||
def test_streaming_response_handles_upstream_read_failure(self) -> None:
|
||||
handler = make_proxy_handler(BytesIO())
|
||||
|
||||
handler = _make_handler_stub(BytesIO())
|
||||
try:
|
||||
with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured:
|
||||
sent = handler._proxy_streaming_response(
|
||||
FailingStreamingResponse(),
|
||||
result = handler._proxy_streaming_response(
|
||||
_FailingStreamingResponse(),
|
||||
"deepseek-v4-pro",
|
||||
[{"role": "user", "content": "hi"}],
|
||||
"cache-namespace",
|
||||
"ns",
|
||||
)
|
||||
finally:
|
||||
handler.server.reasoning_store.close()
|
||||
|
||||
self.assertFalse(sent.sent)
|
||||
self.assertFalse(result.sent)
|
||||
self.assertIn(
|
||||
"upstream streaming response read failed",
|
||||
"\n".join(captured.output),
|
||||
"upstream streaming response read failed", "\n".join(captured.output)
|
||||
)
|
||||
|
||||
def test_collapsible_reasoning_has_no_effect_when_display_is_disabled(
|
||||
self,
|
||||
) -> None:
|
||||
def test_collapsible_reasoning_no_effect_when_display_disabled(self) -> None:
|
||||
wfile = BytesIO()
|
||||
handler = make_proxy_handler(wfile)
|
||||
handler.server.config = ProxyConfig(
|
||||
display_reasoning=False,
|
||||
collapsible_reasoning=True,
|
||||
handler = _make_handler_stub(
|
||||
wfile, display_reasoning=False, collapsible_reasoning=True
|
||||
)
|
||||
chunk = {
|
||||
"id": "chatcmpl-stream",
|
||||
"id": "stream",
|
||||
"model": "deepseek-v4-pro",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {"reasoning_content": "Need context."},
|
||||
"choices": [{"index": 0, "delta": {"reasoning_content": "Need context."}}],
|
||||
}
|
||||
],
|
||||
}
|
||||
response = FakeStreamingResponse(
|
||||
response = _FakeStreamingResponse(
|
||||
[
|
||||
f"data: {json.dumps(chunk)}\n\n".encode("utf-8"),
|
||||
b"data: [DONE]\n\n",
|
||||
]
|
||||
)
|
||||
|
||||
try:
|
||||
sent = handler._proxy_streaming_response(
|
||||
handler._proxy_streaming_response(
|
||||
response,
|
||||
"deepseek-v4-pro",
|
||||
[{"role": "user", "content": "hi"}],
|
||||
"cache-namespace",
|
||||
"ns",
|
||||
)
|
||||
finally:
|
||||
handler.server.reasoning_store.close()
|
||||
|
||||
body = wfile.getvalue().decode("utf-8")
|
||||
self.assertTrue(sent.sent)
|
||||
self.assertIn("reasoning_content", body)
|
||||
self.assertNotIn("<details>", body)
|
||||
self.assertNotIn("<think>", body)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP-level boundary tests: real proxy + tiny upstream
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _PlainFakeUpstream(BaseHTTPRequestHandler):
|
||||
"""Returns a fixed plain response and records every request."""
|
||||
|
||||
requests: list[dict[str, object]] = []
|
||||
auth_headers: list[str] = []
|
||||
delay_after_done: float = 0.0
|
||||
response: dict[str, object] = {}
|
||||
|
||||
def log_message(self, fmt: str, *args: object) -> None:
|
||||
return
|
||||
|
||||
def do_POST(self) -> None:
|
||||
length = int(self.headers.get("Content-Length") or 0)
|
||||
payload = json.loads(self.rfile.read(length).decode("utf-8"))
|
||||
self.__class__.requests.append(payload)
|
||||
self.__class__.auth_headers.append(self.headers.get("Authorization", ""))
|
||||
|
||||
if payload.get("stream"):
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/event-stream")
|
||||
self.end_headers()
|
||||
self.wfile.write(
|
||||
b'data: {"choices":[{"index":0,"delta":{"content":"x"}}]}\n\n'
|
||||
)
|
||||
self.wfile.write(b"data: [DONE]\n\n")
|
||||
self.wfile.flush()
|
||||
if self.__class__.delay_after_done:
|
||||
time.sleep(self.__class__.delay_after_done)
|
||||
return
|
||||
|
||||
body = json.dumps(self.__class__.response).encode("utf-8")
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
|
||||
_BASE_RESPONSE: dict[str, object] = {
|
||||
"id": "x",
|
||||
"object": "chat.completion",
|
||||
"created": 1,
|
||||
"model": "deepseek-v4-pro",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"finish_reason": "stop",
|
||||
"message": {"role": "assistant", "content": "ok"},
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"prompt_tokens": 20,
|
||||
"completion_tokens": 5,
|
||||
"total_tokens": 25,
|
||||
"prompt_cache_hit_tokens": 12,
|
||||
"prompt_cache_miss_tokens": 8,
|
||||
"completion_tokens_details": {"reasoning_tokens": 3},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class _Fixture:
|
||||
def __init__(self, server: ThreadingHTTPServer) -> None:
|
||||
self.server = server
|
||||
self.thread = threading.Thread(target=server.serve_forever, daemon=True)
|
||||
self.thread.start()
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
host, port = self.server.server_address
|
||||
return f"http://{host}:{port}"
|
||||
|
||||
def close(self) -> None:
|
||||
self.server.shutdown()
|
||||
self.server.server_close()
|
||||
self.thread.join(timeout=5)
|
||||
|
||||
|
||||
def _post(url: str, payload: dict, api_key: str = "sk-test") -> tuple[int, dict]:
|
||||
request = Request(
|
||||
url,
|
||||
data=json.dumps(payload).encode("utf-8"),
|
||||
method="POST",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
try:
|
||||
with urlopen(request, timeout=5) as response:
|
||||
return response.status, json.loads(response.read().decode("utf-8"))
|
||||
except HTTPError as exc:
|
||||
return exc.code, json.loads(exc.read().decode("utf-8"))
|
||||
|
||||
|
||||
class HttpBoundaryTests(unittest.TestCase):
|
||||
"""Real-HTTP tests that don't fit the protocol suite: things the proxy
|
||||
must do at the HTTP boundary regardless of what DeepSeek answers."""
|
||||
|
||||
def setUp(self) -> None:
|
||||
_PlainFakeUpstream.requests = []
|
||||
_PlainFakeUpstream.auth_headers = []
|
||||
_PlainFakeUpstream.delay_after_done = 0.0
|
||||
_PlainFakeUpstream.response = dict(_BASE_RESPONSE)
|
||||
self.upstream = _Fixture(
|
||||
ThreadingHTTPServer(("127.0.0.1", 0), _PlainFakeUpstream)
|
||||
)
|
||||
self.store = ReasoningStore(":memory:")
|
||||
proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
|
||||
proxy.config = ProxyConfig(
|
||||
upstream_base_url=self.upstream.url,
|
||||
upstream_model="deepseek-v4-pro",
|
||||
ngrok=False,
|
||||
)
|
||||
proxy.reasoning_store = self.store
|
||||
self.proxy = _Fixture(proxy)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.proxy.close()
|
||||
self.upstream.close()
|
||||
self.store.close()
|
||||
|
||||
def _request(self) -> dict:
|
||||
return {
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
}
|
||||
|
||||
def test_rejects_missing_bearer_token(self) -> None:
|
||||
request = Request(
|
||||
f"{self.proxy.url}/v1/chat/completions",
|
||||
data=json.dumps(self._request()).encode("utf-8"),
|
||||
method="POST",
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
with self.assertRaises(HTTPError) as caught:
|
||||
urlopen(request, timeout=5)
|
||||
self.assertEqual(caught.exception.code, 401)
|
||||
self.assertEqual(_PlainFakeUpstream.requests, [])
|
||||
|
||||
def test_rejects_oversized_request_body(self) -> None:
|
||||
self.proxy.server.config = replace(
|
||||
self.proxy.server.config, max_request_body_bytes=10
|
||||
)
|
||||
status, payload = _post(
|
||||
f"{self.proxy.url}/v1/chat/completions", self._request()
|
||||
)
|
||||
self.assertEqual(status, 413)
|
||||
self.assertIn("too large", payload["error"]["message"])
|
||||
self.assertEqual(_PlainFakeUpstream.requests, [])
|
||||
|
||||
def test_forwards_bearer_token_to_upstream(self) -> None:
|
||||
status, _ = _post(
|
||||
f"{self.proxy.url}/v1/chat/completions",
|
||||
self._request(),
|
||||
api_key="sk-from-cursor",
|
||||
)
|
||||
self.assertEqual(status, 200)
|
||||
self.assertEqual(_PlainFakeUpstream.auth_headers[0], "Bearer sk-from-cursor")
|
||||
|
||||
def test_streaming_response_closes_after_done_when_upstream_lingers(
|
||||
self,
|
||||
) -> None:
|
||||
"""Cursor relies on the proxy ending the SSE stream at [DONE], even
|
||||
if the upstream socket stays open."""
|
||||
_PlainFakeUpstream.delay_after_done = 2.0
|
||||
request = Request(
|
||||
f"{self.proxy.url}/v1/chat/completions",
|
||||
data=json.dumps(
|
||||
{
|
||||
"model": "deepseek-v4-pro",
|
||||
"stream": True,
|
||||
"messages": [{"role": "user", "content": "stream"}],
|
||||
}
|
||||
).encode("utf-8"),
|
||||
method="POST",
|
||||
headers={
|
||||
"Authorization": "Bearer sk-test",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
started = time.monotonic()
|
||||
with urlopen(request, timeout=1) as response:
|
||||
body = response.read().decode("utf-8")
|
||||
self.assertLess(time.monotonic() - started, 1.0)
|
||||
self.assertIn("data: [DONE]", body)
|
||||
|
||||
def test_normal_logging_summarizes_without_bodies_or_keys(self) -> None:
|
||||
with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
|
||||
status, _ = _post(
|
||||
f"{self.proxy.url}/v1/chat/completions",
|
||||
self._request(),
|
||||
api_key="sk-from-cursor",
|
||||
)
|
||||
# `└ stats` is emitted on the handler thread *after* the response
|
||||
# body hits the socket, so the client may return before it lands.
|
||||
deadline = time.monotonic() + 2
|
||||
while time.monotonic() < deadline and not any(
|
||||
"└ stats" in record for record in captured.output
|
||||
):
|
||||
time.sleep(0.01)
|
||||
output = "\n".join(captured.output)
|
||||
self.assertEqual(status, 200)
|
||||
# Single-line stage records keep the log readable.
|
||||
for marker in ("┌ cursor", "├ context", "├ send", "└ stats"):
|
||||
self.assertIn(marker, output)
|
||||
self.assertNotIn("hi", output.split("┌ cursor")[1].split("\n")[0])
|
||||
self.assertNotIn("sk-from-cursor", output)
|
||||
|
||||
def test_verbose_logging_includes_bodies_but_redacts_api_key(self) -> None:
|
||||
self.proxy.server.config = replace(self.proxy.server.config, verbose=True)
|
||||
with self.assertLogs("deepseek_cursor_proxy", level="INFO") as captured:
|
||||
_post(
|
||||
f"{self.proxy.url}/v1/chat/completions",
|
||||
self._request(),
|
||||
api_key="sk-from-cursor",
|
||||
)
|
||||
output = "\n".join(captured.output)
|
||||
self.assertIn("cursor request body", output)
|
||||
self.assertIn("upstream request body", output)
|
||||
self.assertNotIn("sk-from-cursor", output)
|
||||
|
||||
def test_healthz_returns_ok(self) -> None:
|
||||
with urlopen(f"{self.proxy.url}/healthz", timeout=2) as response:
|
||||
self.assertEqual(response.status, 200)
|
||||
self.assertEqual(json.loads(response.read())["ok"], True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_s
|
|||
from deepseek_cursor_proxy.streaming import (
|
||||
CursorReasoningDisplayAdapter,
|
||||
StreamAccumulator,
|
||||
fold_reasoning_into_content,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -430,5 +431,44 @@ class CursorReasoningDisplayAdapterTests(unittest.TestCase):
|
|||
self.assertIsNone(adapter.flush_chunk("deepseek-v4-pro"))
|
||||
|
||||
|
||||
class FoldReasoningTests(unittest.TestCase):
|
||||
def test_fold_reasoning_into_non_streaming_content(self) -> None:
|
||||
"""Non-streaming responses mirror reasoning_content into a visible
|
||||
<details> block, matching the streaming layout."""
|
||||
payload = {
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "answer",
|
||||
"reasoning_content": "thinking",
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
fold_reasoning_into_content(payload, collapsible=True)
|
||||
self.assertEqual(
|
||||
payload["choices"][0]["message"]["content"],
|
||||
"<details>\n<summary>Thinking</summary>\n\nthinking\n</details>\n\nanswer",
|
||||
)
|
||||
|
||||
def test_fold_reasoning_skips_empty_reasoning(self) -> None:
|
||||
payload = {
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "answer",
|
||||
"reasoning_content": "",
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
fold_reasoning_into_content(payload, collapsible=True)
|
||||
self.assertEqual(payload["choices"][0]["message"]["content"], "answer")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -1,14 +1,25 @@
|
|||
"""Trace writer tests, both as a unit (writes/redacts files) and integrated
|
||||
through the proxy (captures real request flow on disk)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
import json
|
||||
from pathlib import Path
|
||||
import stat
|
||||
import threading
|
||||
from tempfile import TemporaryDirectory
|
||||
import time
|
||||
import unittest
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
from deepseek_cursor_proxy.config import ProxyConfig
|
||||
from deepseek_cursor_proxy.reasoning_store import ReasoningStore
|
||||
from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
|
||||
from deepseek_cursor_proxy.trace import TraceWriter
|
||||
|
||||
|
||||
class TraceWriterTests(unittest.TestCase):
|
||||
class TraceWriterUnitTests(unittest.TestCase):
|
||||
def test_writes_manifest_and_numbered_request_files(self) -> None:
|
||||
with TemporaryDirectory() as temp_dir:
|
||||
writer = TraceWriter(temp_dir)
|
||||
|
|
@ -47,17 +58,244 @@ class TraceWriterTests(unittest.TestCase):
|
|||
headers={"Authorization": "Bearer sk-secret"},
|
||||
)
|
||||
trace.finish("completed", http_status=200)
|
||||
|
||||
payload = json.loads(trace.path.read_text(encoding="utf-8"))
|
||||
serialized = json.dumps(payload)
|
||||
|
||||
serialized = trace.path.read_text(encoding="utf-8")
|
||||
self.assertNotIn("sk-secret", serialized)
|
||||
payload = json.loads(serialized)
|
||||
self.assertEqual(
|
||||
payload["request"]["headers"]["Authorization"]["present"],
|
||||
True,
|
||||
payload["request"]["headers"]["Authorization"]["present"], True
|
||||
)
|
||||
self.assertIn("sha256", payload["request"]["headers"]["Authorization"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration: trace writer attached to a running proxy.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _CannedUpstream(BaseHTTPRequestHandler):
|
||||
"""Returns a tool-call response for the first POST and a streamed
|
||||
reasoning response for the second."""
|
||||
|
||||
requests: list[dict[str, object]] = []
|
||||
|
||||
def log_message(self, fmt: str, *args: object) -> None:
|
||||
return
|
||||
|
||||
def do_POST(self) -> None:
|
||||
length = int(self.headers.get("Content-Length") or 0)
|
||||
payload = json.loads(self.rfile.read(length).decode("utf-8"))
|
||||
self.__class__.requests.append(payload)
|
||||
|
||||
if payload.get("stream"):
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "text/event-stream")
|
||||
self.end_headers()
|
||||
self.wfile.write(
|
||||
b'data: {"id":"s","object":"chat.completion.chunk","choices":'
|
||||
b'[{"index":0,"delta":{"role":"assistant","reasoning_content":"think"},'
|
||||
b'"finish_reason":null}]}\n\n'
|
||||
)
|
||||
self.wfile.write(
|
||||
b'data: {"id":"s","object":"chat.completion.chunk","choices":'
|
||||
b'[{"index":0,"delta":{"content":"answer"},"finish_reason":null}],'
|
||||
b'"usage":{"completion_tokens_details":{"reasoning_tokens":1}}}\n\n'
|
||||
)
|
||||
self.wfile.write(
|
||||
b'data: {"id":"s","object":"chat.completion.chunk",'
|
||||
b'"choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n'
|
||||
)
|
||||
self.wfile.write(b"data: [DONE]\n\n")
|
||||
self.wfile.flush()
|
||||
return
|
||||
|
||||
body = json.dumps(
|
||||
{
|
||||
"id": "tool",
|
||||
"object": "chat.completion",
|
||||
"model": "deepseek-v4-pro",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"finish_reason": "tool_calls",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": "I need the date.",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_date",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_date",
|
||||
"arguments": "{}",
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
).encode("utf-8")
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
|
||||
class _Fixture:
|
||||
def __init__(self, server: ThreadingHTTPServer) -> None:
|
||||
self.server = server
|
||||
self.thread = threading.Thread(target=server.serve_forever, daemon=True)
|
||||
self.thread.start()
|
||||
|
||||
@property
|
||||
def url(self) -> str:
|
||||
host, port = self.server.server_address
|
||||
return f"http://{host}:{port}"
|
||||
|
||||
def close(self) -> None:
|
||||
self.server.shutdown()
|
||||
self.server.server_close()
|
||||
self.thread.join(timeout=5)
|
||||
|
||||
|
||||
def _read_single_trace(session_dir: Path) -> dict:
|
||||
deadline = time.monotonic() + 2
|
||||
files = sorted(session_dir.glob("request-*.json"))
|
||||
while not files and time.monotonic() < deadline:
|
||||
time.sleep(0.01)
|
||||
files = sorted(session_dir.glob("request-*.json"))
|
||||
if len(files) != 1:
|
||||
raise AssertionError(f"expected one trace, found {files}")
|
||||
return json.loads(files[0].read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
class TraceIntegrationTests(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
_CannedUpstream.requests = []
|
||||
self.upstream = _Fixture(ThreadingHTTPServer(("127.0.0.1", 0), _CannedUpstream))
|
||||
self.store = ReasoningStore(":memory:")
|
||||
self.temp_dir = TemporaryDirectory()
|
||||
self.writer = TraceWriter(self.temp_dir.name)
|
||||
proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
|
||||
proxy.config = ProxyConfig(
|
||||
upstream_base_url=self.upstream.url,
|
||||
upstream_model="deepseek-v4-pro",
|
||||
ngrok=False,
|
||||
)
|
||||
proxy.reasoning_store = self.store
|
||||
proxy.trace_writer = self.writer
|
||||
self.proxy = _Fixture(proxy)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.proxy.close()
|
||||
self.upstream.close()
|
||||
self.store.close()
|
||||
self.temp_dir.cleanup()
|
||||
|
||||
def _post(self, payload: dict) -> dict:
|
||||
request = Request(
|
||||
f"{self.proxy.url}/v1/chat/completions",
|
||||
data=json.dumps(payload).encode("utf-8"),
|
||||
method="POST",
|
||||
headers={
|
||||
"Authorization": "Bearer sk-from-cursor",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
with urlopen(request, timeout=5) as response:
|
||||
return json.loads(response.read())
|
||||
|
||||
def test_captures_non_streaming_replay_without_api_key(self) -> None:
|
||||
self._post(
|
||||
{
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [{"role": "user", "content": "What is tomorrow's date?"}],
|
||||
}
|
||||
)
|
||||
trace = _read_single_trace(self.writer.session_dir)
|
||||
serialized = json.dumps(trace)
|
||||
self.assertEqual(trace["completion"]["status"], "completed")
|
||||
self.assertEqual(
|
||||
trace["request"]["body"]["messages"][0]["content"],
|
||||
"What is tomorrow's date?",
|
||||
)
|
||||
self.assertEqual(
|
||||
trace["upstream"]["response"]["body"]["json"]["choices"][0]["message"][
|
||||
"reasoning_content"
|
||||
],
|
||||
"I need the date.",
|
||||
)
|
||||
self.assertNotIn("sk-from-cursor", serialized)
|
||||
|
||||
def test_captures_streaming_replay_chunks(self) -> None:
|
||||
request = Request(
|
||||
f"{self.proxy.url}/v1/chat/completions",
|
||||
data=json.dumps(
|
||||
{
|
||||
"model": "deepseek-v4-pro",
|
||||
"stream": True,
|
||||
"messages": [{"role": "user", "content": "stream"}],
|
||||
}
|
||||
).encode("utf-8"),
|
||||
method="POST",
|
||||
headers={
|
||||
"Authorization": "Bearer sk-test",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
with urlopen(request, timeout=2) as response:
|
||||
response.read()
|
||||
trace = _read_single_trace(self.writer.session_dir)
|
||||
self.assertEqual(trace["completion"]["status"], "completed")
|
||||
self.assertIn(
|
||||
"reasoning_content",
|
||||
trace["upstream"]["stream"]["chunks"][0]["line"],
|
||||
)
|
||||
self.assertIn(
|
||||
"<details>", trace["cursor_response"]["stream"]["chunks"][0]["line"]
|
||||
)
|
||||
|
||||
def test_captures_recovery_diagnostics(self) -> None:
|
||||
"""A request that triggers cold-cache recovery records the recovery
|
||||
steps + diagnostic counters in the trace."""
|
||||
self._post(
|
||||
{
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
{"role": "user", "content": "old"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_x",
|
||||
"type": "function",
|
||||
"function": {"name": "f", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{"role": "tool", "tool_call_id": "call_x", "content": "result"},
|
||||
{"role": "user", "content": "new"},
|
||||
],
|
||||
}
|
||||
)
|
||||
trace = _read_single_trace(self.writer.session_dir)
|
||||
self.assertEqual(
|
||||
trace["transform"]["recovery_steps"][0]["strategy"], "latest_user"
|
||||
)
|
||||
self.assertGreaterEqual(
|
||||
len(
|
||||
[
|
||||
item
|
||||
for item in trace["transform"]["reasoning_diagnostics"]
|
||||
if item["missing"]
|
||||
]
|
||||
),
|
||||
1,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue