fix(server): honor missing-reasoning reject mode (#34)

main
Yixing Lao 2026-05-01 22:32:27 +08:00 committed by GitHub
parent be0310751c
commit 7bdf177e0f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 502 additions and 2 deletions

View File

@ -106,6 +106,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
)
if request_path not in {"/chat/completions", "/v1/chat/completions"}:
LOG.warning("rejected unsupported POST path=%s status=404", request_path)
self._record_request_body_for_trace(trace)
self._send_json(
404,
{"error": {"message": "Only /v1/chat/completions is supported"}},
@ -119,6 +120,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
"rejected request path=%s status=401 reason=missing_bearer_token",
request_path,
)
self._record_request_body_for_trace(trace)
self._send_json(
401,
{"error": {"message": "Missing Authorization bearer token"}},
@ -161,7 +163,10 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
if trace is not None:
trace.record_transform(prepared)
log_context_summary(prepared)
if prepared.missing_reasoning_messages:
if (
prepared.missing_reasoning_messages
and self.config.missing_reasoning_strategy == "reject"
):
LOG.warning(
(
"strict missing-reasoning mode rejected request path=%s "
@ -470,6 +475,32 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
raise ValueError("Request body must be a JSON object")
return payload
def _record_request_body_for_trace(self, trace: TraceRequest | None) -> None:
if trace is None:
return
try:
length = int(self.headers.get("Content-Length") or 0)
except ValueError:
trace.record_cursor_body_omitted(reason="invalid_content_length")
return
if length < 0:
trace.record_cursor_body_omitted(
reason="invalid_content_length", body_bytes=length
)
return
if length > self.config.max_request_body_bytes:
trace.record_cursor_body_omitted(reason="body_too_large", body_bytes=length)
self.close_connection = True
return
try:
raw_body = self.rfile.read(length)
except OSError as exc:
trace.record_cursor_body_omitted(
reason=f"read_failed:{exc}", body_bytes=length
)
return
trace.record_cursor_body_bytes(raw_body)
def _upstream_headers(self, stream: bool, authorization: str) -> dict[str, str]:
headers = {
"Authorization": authorization,

View File

@ -206,7 +206,7 @@ class TraceWriter:
"pid": os.getpid(),
"base_dir": str(self.base_dir),
"session_dir": str(self.session_dir),
"format": "one JSON file per proxied POST request",
"format": "one JSON file per traced POST request",
},
)
@ -224,6 +224,26 @@ class TraceRequest:
self.data["request"]["body"] = payload
self.data["request"]["summary"] = payload_summary(payload)
def record_cursor_body_bytes(self, body: bytes) -> None:
self.data["request"]["body_bytes"] = len(body)
text = body.decode("utf-8", errors="replace")
try:
payload = json.loads(text)
except json.JSONDecodeError:
self.data["request"]["body"] = {"text": text}
return
self.data["request"]["body"] = payload
if isinstance(payload, dict):
self.data["request"]["summary"] = payload_summary(payload)
def record_cursor_body_omitted(
self, *, reason: str, body_bytes: int | None = None
) -> None:
omitted: dict[str, Any] = {"reason": reason}
if body_bytes is not None:
omitted["body_bytes"] = body_bytes
self.data["request"]["body_omitted"] = omitted
def record_transform(self, prepared: Any) -> None:
self.data["transform"] = {
"original_model": prepared.original_model,

View File

@ -658,6 +658,43 @@ class RecoveryTests(_StrictUpstreamCase):
continue
self.assertNotIn("deepseek-cursor-proxy", message.get("content", ""))
def test_recover_mode_does_not_short_circuit_with_409(self) -> None:
"""In `recover` mode, a payload with no user message leaves the
recovery loop unable to drop anything (`dropped_messages == 0`),
so `missing_indexes` stays populated. The proxy must NOT 409 in
that case it must forward to upstream and relay whatever
DeepSeek decides. 409 is reserved for `reject` mode."""
status, _ = _post(
f"{self.proxy.url}/v1/chat/completions",
{
"model": "deepseek-v4-pro",
"messages": [
{"role": "system", "content": "Be brief."},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": CALL_ID_1,
"type": "function",
"function": {"name": "get_date", "arguments": "{}"},
}
],
},
{
"role": "tool",
"tool_call_id": CALL_ID_1,
"content": "2026-04-24",
},
],
},
)
# Strict upstream rejects the missing-reasoning history with 400.
# The point of this test is the proxy did NOT pre-empt with 409.
self.assertNotEqual(status, 409)
self.assertEqual(status, 400)
self.assertEqual(len(StrictFakeDeepSeek.requests), 1)
# ---------------------------------------------------------------------------
# Streaming behaviour

View File

@ -11,6 +11,7 @@ import threading
from tempfile import TemporaryDirectory
import time
import unittest
from urllib.error import HTTPError
from urllib.request import Request, urlopen
from deepseek_cursor_proxy.config import ProxyConfig
@ -207,6 +208,36 @@ class TraceIntegrationTests(unittest.TestCase):
with urlopen(request, timeout=5) as response:
return json.loads(response.read())
def test_traces_unsupported_post_path_with_body(self) -> None:
request = Request(
f"{self.proxy.url}/v1/summarize",
data=json.dumps(
{
"model": "gpt-4o-mini",
"messages": [{"role": "user", "content": "summarize"}],
}
).encode("utf-8"),
method="POST",
headers={
"Authorization": "Bearer sk-from-cursor",
"Content-Type": "application/json",
},
)
with self.assertRaises(HTTPError) as captured:
urlopen(request, timeout=5)
self.assertEqual(captured.exception.code, 404)
captured.exception.read()
trace = _read_single_trace(self.writer.session_dir)
self.assertEqual(trace["request"]["method"], "POST")
self.assertEqual(trace["request"]["path"], "/v1/summarize")
self.assertEqual(trace["request"]["body"]["model"], "gpt-4o-mini")
self.assertEqual(trace["request"]["summary"]["model"], "gpt-4o-mini")
self.assertEqual(trace["completion"]["status"], "rejected")
self.assertEqual(trace["completion"]["http_status"], 404)
self.assertEqual(trace["transform"], {})
self.assertEqual(_CannedUpstream.requests, [])
def test_captures_non_streaming_replay_without_api_key(self) -> None:
self._post(
{

View File

@ -23,12 +23,26 @@ from deepseek_cursor_proxy.transform import (
extract_text_content,
normalize_reasoning_effort,
prepare_upstream_request,
reasoning_cache_namespace,
rewrite_response_body,
strip_cursor_thinking_blocks,
strip_recovery_notice_for_upstream,
)
def _default_cache_namespace() -> str:
return reasoning_cache_namespace(
ProxyConfig(),
"deepseek-v4-pro",
{"type": "enabled"},
"high",
)
def _cache_scope(messages: list[dict]) -> str:
return conversation_scope(messages, _default_cache_namespace())
class ContentHelpersTests(unittest.TestCase):
def test_extract_text_content_flattens_multipart_array(self) -> None:
content = [
@ -334,5 +348,372 @@ class ResponseRewriteTests(unittest.TestCase):
self.assertEqual(usage["prompt_cache_miss_tokens"], 4)
class CrossModeAndModelTests(unittest.TestCase):
"""Regression coverage for PR #28's cross-mode/model context preservation
(ProFlash family normalization, portable turn-scoped keys, recovery
boundary continuation). Originally shipped with PR #28 in test_transform.py
and dropped by PR #33's test refactor; restored from commit 5f14da3."""
def setUp(self) -> None:
self.store = ReasoningStore(":memory:")
def tearDown(self) -> None:
self.store.close()
def test_deepseek_pro_and_flash_share_reasoning_namespace(self) -> None:
config = ProxyConfig()
namespace_pro = reasoning_cache_namespace(
config,
"deepseek-v4-pro",
{"type": "enabled"},
"high",
"Bearer key-a",
)
namespace_flash = reasoning_cache_namespace(
config,
"deepseek-v4-flash",
{"type": "enabled"},
"high",
"Bearer key-a",
)
self.assertEqual(namespace_pro, namespace_flash)
prior = [{"role": "user", "content": "read README"}]
tool_call = {
"id": "call_shared",
"type": "function",
"function": {
"name": "read_file",
"arguments": '{"path":"README.md"}',
},
}
self.store.store_assistant_message(
{
"role": "assistant",
"content": "",
"reasoning_content": "Shared DeepSeek reasoning.",
"tool_calls": [tool_call],
},
conversation_scope(prior, namespace_pro),
namespace_pro,
prior,
)
prepared = prepare_upstream_request(
{
"model": "deepseek-v4-flash",
"messages": [
*prior,
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
],
},
config,
self.store,
authorization="Bearer key-a",
)
self.assertEqual(prepared.missing_reasoning_messages, 0)
self.assertEqual(
prepared.payload["messages"][1]["reasoning_content"],
"Shared DeepSeek reasoning.",
)
def test_strict_hit_backfills_portable_cache_for_mode_switch(self) -> None:
agent_prior = [
{"role": "system", "content": "Agent mode."},
{"role": "user", "content": "set up the task"},
{"role": "user", "content": "read README"},
]
plan_prior = [
{"role": "system", "content": "Plan mode."},
{"role": "user", "content": "set up the task"},
{"role": "user", "content": "read README"},
]
tool_call = {
"id": "call_mode_switch",
"type": "function",
"function": {"name": "read_file", "arguments": '{"path":"README.md"}'},
}
assistant_message = {
"role": "assistant",
"content": "",
"reasoning_content": "Need README before answering.",
"tool_calls": [tool_call],
}
# Store under Agent scope only — no portable aliases yet.
self.store.store_assistant_message(
assistant_message,
_cache_scope(agent_prior),
)
# Agent re-request: strict scope hit, should backfill portable.
strict_prepared = prepare_upstream_request(
{
"model": "deepseek-v4-pro",
"messages": [
*agent_prior,
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
],
},
ProxyConfig(),
self.store,
)
# Plan re-request: scope changed (different system prompt) but the
# turn signature still matches, so the portable alias hits.
portable_prepared = prepare_upstream_request(
{
"model": "deepseek-v4-pro",
"messages": [
*plan_prior,
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
],
},
ProxyConfig(),
self.store,
)
self.assertEqual(strict_prepared.patched_reasoning_messages, 1)
self.assertEqual(portable_prepared.patched_reasoning_messages, 1)
self.assertEqual(portable_prepared.missing_reasoning_messages, 0)
self.assertEqual(
portable_prepared.payload["messages"][3]["reasoning_content"],
"Need README before answering.",
)
self.assertTrue(
str(portable_prepared.reasoning_diagnostics[-1]["hit_kind"]).startswith(
"portable_"
)
)
def test_portable_turn_cache_restores_final_assistant_after_tool_result(
self,
) -> None:
agent_user = {"role": "user", "content": "look up project state"}
plan_user = dict(agent_user)
tool_call = {
"id": "call_project_state",
"type": "function",
"function": {"name": "lookup", "arguments": '{"query":"state"}'},
}
tool_result = {
"role": "tool",
"tool_call_id": "call_project_state",
"content": '{"state":"ready"}',
}
tool_assistant = {
"role": "assistant",
"content": "",
"reasoning_content": "Need the project state.",
"tool_calls": [tool_call],
}
final_assistant = {
"role": "assistant",
"content": "The project is ready.",
"reasoning_content": "The tool result is enough to answer.",
}
agent_initial_prior = [
{"role": "system", "content": "Agent mode."},
agent_user,
]
agent_final_prior = [*agent_initial_prior, tool_assistant, tool_result]
self.store.store_assistant_message(
tool_assistant,
_cache_scope(agent_initial_prior),
_default_cache_namespace(),
agent_initial_prior,
)
self.store.store_assistant_message(
final_assistant,
_cache_scope(agent_final_prior),
_default_cache_namespace(),
agent_final_prior,
)
prepared = prepare_upstream_request(
{
"model": "deepseek-v4-pro",
"messages": [
{"role": "system", "content": "Plan mode."},
plan_user,
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
tool_result,
{"role": "assistant", "content": "The project is ready."},
{"role": "user", "content": "continue"},
],
},
ProxyConfig(missing_reasoning_strategy="reject"),
self.store,
)
self.assertEqual(prepared.missing_reasoning_messages, 0)
self.assertEqual(prepared.patched_reasoning_messages, 2)
self.assertEqual(
prepared.payload["messages"][4]["reasoning_content"],
"The tool result is enough to answer.",
)
def test_portable_turn_cache_isolated_for_reused_tool_call_id(self) -> None:
# Two different conversations both happen to reuse the same
# tool_call.id. Cache must NOT cross-contaminate.
tool_call = {
"id": "call_reused",
"type": "function",
"function": {"name": "lookup", "arguments": "{}"},
}
assistant_a = {
"role": "assistant",
"content": "",
"reasoning_content": "Reasoning for thread A.",
"tool_calls": [tool_call],
}
assistant_b = {
"role": "assistant",
"content": "",
"reasoning_content": "Reasoning for thread B.",
"tool_calls": [tool_call],
}
prior_a = [
{"role": "system", "content": "Agent mode."},
{"role": "user", "content": "thread A"},
]
prior_b = [
{"role": "system", "content": "Agent mode."},
{"role": "user", "content": "thread B"},
]
self.store.store_assistant_message(
assistant_a,
_cache_scope(prior_a),
_default_cache_namespace(),
prior_a,
)
self.store.store_assistant_message(
assistant_b,
_cache_scope(prior_b),
_default_cache_namespace(),
prior_b,
)
# Plan-mode replay of thread A — should retrieve A's reasoning, not B's.
prepared = prepare_upstream_request(
{
"model": "deepseek-v4-pro",
"messages": [
{"role": "system", "content": "Plan mode."},
{"role": "user", "content": "thread A"},
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
],
},
ProxyConfig(),
self.store,
)
self.assertEqual(
prepared.payload["messages"][2]["reasoning_content"],
"Reasoning for thread A.",
)
def test_recovered_response_is_recorded_under_pre_recovery_scope(self) -> None:
old_tool_call = {
"id": "call_old",
"type": "function",
"function": {
"name": "read_file",
"arguments": '{"path":"README.md"}',
},
}
new_tool_call = {
"id": "call_new",
"type": "function",
"function": {"name": "lookup", "arguments": '{"query":"new"}'},
}
first_payload = {
"model": "deepseek-v4-pro",
"messages": [
{"role": "user", "content": "old model turn"},
{"role": "assistant", "content": "", "tool_calls": [old_tool_call]},
{"role": "tool", "tool_call_id": "call_old", "content": "old result"},
{"role": "user", "content": "continue with DeepSeek"},
],
}
first_recovered = prepare_upstream_request(
first_payload,
ProxyConfig(missing_reasoning_strategy="recover"),
self.store,
)
self.assertEqual(first_recovered.recovered_reasoning_messages, 1)
# Simulate DeepSeek's response to the recovered request.
response_body = json.dumps(
{
"id": "chatcmpl-test",
"object": "chat.completion",
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"finish_reason": "tool_calls",
"message": {
"role": "assistant",
"content": "",
"reasoning_content": "Need the new lookup.",
"tool_calls": [new_tool_call],
},
}
],
}
).encode()
rewritten = rewrite_response_body(
response_body,
"deepseek-v4-pro",
self.store,
first_recovered.payload["messages"],
first_recovered.cache_namespace,
content_prefix=first_recovered.recovery_notice,
recording_contexts=first_recovered.record_response_contexts,
)
recovered_assistant = json.loads(rewritten)["choices"][0]["message"]
# Reasoning must be recorded under BOTH scopes — pre-recovery (so
# subsequent Cursor requests echoing the with-prefix history hit) and
# post-recovery (so an immediate continuation also hits).
self.assertEqual(len(first_recovered.record_response_contexts), 2)
for scope, _messages in first_recovered.record_response_contexts:
self.assertEqual(
self.store.get(
f"scope:{scope}:signature:{message_signature(recovered_assistant)}"
),
"Need the new lookup.",
)
recovered_assistant.pop("reasoning_content", None)
# Cursor's next request echoes the recovered assistant + tool result.
# The proxy should detect the recovery boundary, retire the prefix,
# and continue cleanly without recovering again.
second_payload = {
"model": "deepseek-v4-pro",
"messages": [
*first_payload["messages"],
recovered_assistant,
{"role": "tool", "tool_call_id": "call_new", "content": "new result"},
],
}
second_prepared = prepare_upstream_request(
second_payload,
ProxyConfig(missing_reasoning_strategy="recover"),
self.store,
)
self.assertEqual(second_prepared.missing_reasoning_messages, 0)
self.assertEqual(second_prepared.recovered_reasoning_messages, 0)
self.assertEqual(second_prepared.recovery_dropped_messages, 0)
self.assertTrue(second_prepared.continued_recovery_boundary)
self.assertGreater(second_prepared.retired_prefix_messages, 0)
self.assertEqual(
second_prepared.payload["messages"][2]["reasoning_content"],
"Need the new lookup.",
)
if __name__ == "__main__":
unittest.main()