fix(server): honor missing-reasoning reject mode (#34)

main
Yixing Lao 2026-05-01 22:32:27 +08:00 committed by GitHub
parent be0310751c
commit 7bdf177e0f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 502 additions and 2 deletions

View File

@ -106,6 +106,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
) )
if request_path not in {"/chat/completions", "/v1/chat/completions"}: if request_path not in {"/chat/completions", "/v1/chat/completions"}:
LOG.warning("rejected unsupported POST path=%s status=404", request_path) LOG.warning("rejected unsupported POST path=%s status=404", request_path)
self._record_request_body_for_trace(trace)
self._send_json( self._send_json(
404, 404,
{"error": {"message": "Only /v1/chat/completions is supported"}}, {"error": {"message": "Only /v1/chat/completions is supported"}},
@ -119,6 +120,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
"rejected request path=%s status=401 reason=missing_bearer_token", "rejected request path=%s status=401 reason=missing_bearer_token",
request_path, request_path,
) )
self._record_request_body_for_trace(trace)
self._send_json( self._send_json(
401, 401,
{"error": {"message": "Missing Authorization bearer token"}}, {"error": {"message": "Missing Authorization bearer token"}},
@ -161,7 +163,10 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
if trace is not None: if trace is not None:
trace.record_transform(prepared) trace.record_transform(prepared)
log_context_summary(prepared) log_context_summary(prepared)
if prepared.missing_reasoning_messages: if (
prepared.missing_reasoning_messages
and self.config.missing_reasoning_strategy == "reject"
):
LOG.warning( LOG.warning(
( (
"strict missing-reasoning mode rejected request path=%s " "strict missing-reasoning mode rejected request path=%s "
@ -470,6 +475,32 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
raise ValueError("Request body must be a JSON object") raise ValueError("Request body must be a JSON object")
return payload return payload
def _record_request_body_for_trace(self, trace: TraceRequest | None) -> None:
if trace is None:
return
try:
length = int(self.headers.get("Content-Length") or 0)
except ValueError:
trace.record_cursor_body_omitted(reason="invalid_content_length")
return
if length < 0:
trace.record_cursor_body_omitted(
reason="invalid_content_length", body_bytes=length
)
return
if length > self.config.max_request_body_bytes:
trace.record_cursor_body_omitted(reason="body_too_large", body_bytes=length)
self.close_connection = True
return
try:
raw_body = self.rfile.read(length)
except OSError as exc:
trace.record_cursor_body_omitted(
reason=f"read_failed:{exc}", body_bytes=length
)
return
trace.record_cursor_body_bytes(raw_body)
def _upstream_headers(self, stream: bool, authorization: str) -> dict[str, str]: def _upstream_headers(self, stream: bool, authorization: str) -> dict[str, str]:
headers = { headers = {
"Authorization": authorization, "Authorization": authorization,

View File

@ -206,7 +206,7 @@ class TraceWriter:
"pid": os.getpid(), "pid": os.getpid(),
"base_dir": str(self.base_dir), "base_dir": str(self.base_dir),
"session_dir": str(self.session_dir), "session_dir": str(self.session_dir),
"format": "one JSON file per proxied POST request", "format": "one JSON file per traced POST request",
}, },
) )
@ -224,6 +224,26 @@ class TraceRequest:
self.data["request"]["body"] = payload self.data["request"]["body"] = payload
self.data["request"]["summary"] = payload_summary(payload) self.data["request"]["summary"] = payload_summary(payload)
def record_cursor_body_bytes(self, body: bytes) -> None:
self.data["request"]["body_bytes"] = len(body)
text = body.decode("utf-8", errors="replace")
try:
payload = json.loads(text)
except json.JSONDecodeError:
self.data["request"]["body"] = {"text": text}
return
self.data["request"]["body"] = payload
if isinstance(payload, dict):
self.data["request"]["summary"] = payload_summary(payload)
def record_cursor_body_omitted(
self, *, reason: str, body_bytes: int | None = None
) -> None:
omitted: dict[str, Any] = {"reason": reason}
if body_bytes is not None:
omitted["body_bytes"] = body_bytes
self.data["request"]["body_omitted"] = omitted
def record_transform(self, prepared: Any) -> None: def record_transform(self, prepared: Any) -> None:
self.data["transform"] = { self.data["transform"] = {
"original_model": prepared.original_model, "original_model": prepared.original_model,

View File

@ -658,6 +658,43 @@ class RecoveryTests(_StrictUpstreamCase):
continue continue
self.assertNotIn("deepseek-cursor-proxy", message.get("content", "")) self.assertNotIn("deepseek-cursor-proxy", message.get("content", ""))
def test_recover_mode_does_not_short_circuit_with_409(self) -> None:
"""In `recover` mode, a payload with no user message leaves the
recovery loop unable to drop anything (`dropped_messages == 0`),
so `missing_indexes` stays populated. The proxy must NOT 409 in
that case it must forward to upstream and relay whatever
DeepSeek decides. 409 is reserved for `reject` mode."""
status, _ = _post(
f"{self.proxy.url}/v1/chat/completions",
{
"model": "deepseek-v4-pro",
"messages": [
{"role": "system", "content": "Be brief."},
{
"role": "assistant",
"content": "",
"tool_calls": [
{
"id": CALL_ID_1,
"type": "function",
"function": {"name": "get_date", "arguments": "{}"},
}
],
},
{
"role": "tool",
"tool_call_id": CALL_ID_1,
"content": "2026-04-24",
},
],
},
)
# Strict upstream rejects the missing-reasoning history with 400.
# The point of this test is the proxy did NOT pre-empt with 409.
self.assertNotEqual(status, 409)
self.assertEqual(status, 400)
self.assertEqual(len(StrictFakeDeepSeek.requests), 1)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Streaming behaviour # Streaming behaviour

View File

@ -11,6 +11,7 @@ import threading
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
import time import time
import unittest import unittest
from urllib.error import HTTPError
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
from deepseek_cursor_proxy.config import ProxyConfig from deepseek_cursor_proxy.config import ProxyConfig
@ -207,6 +208,36 @@ class TraceIntegrationTests(unittest.TestCase):
with urlopen(request, timeout=5) as response: with urlopen(request, timeout=5) as response:
return json.loads(response.read()) return json.loads(response.read())
def test_traces_unsupported_post_path_with_body(self) -> None:
request = Request(
f"{self.proxy.url}/v1/summarize",
data=json.dumps(
{
"model": "gpt-4o-mini",
"messages": [{"role": "user", "content": "summarize"}],
}
).encode("utf-8"),
method="POST",
headers={
"Authorization": "Bearer sk-from-cursor",
"Content-Type": "application/json",
},
)
with self.assertRaises(HTTPError) as captured:
urlopen(request, timeout=5)
self.assertEqual(captured.exception.code, 404)
captured.exception.read()
trace = _read_single_trace(self.writer.session_dir)
self.assertEqual(trace["request"]["method"], "POST")
self.assertEqual(trace["request"]["path"], "/v1/summarize")
self.assertEqual(trace["request"]["body"]["model"], "gpt-4o-mini")
self.assertEqual(trace["request"]["summary"]["model"], "gpt-4o-mini")
self.assertEqual(trace["completion"]["status"], "rejected")
self.assertEqual(trace["completion"]["http_status"], 404)
self.assertEqual(trace["transform"], {})
self.assertEqual(_CannedUpstream.requests, [])
def test_captures_non_streaming_replay_without_api_key(self) -> None: def test_captures_non_streaming_replay_without_api_key(self) -> None:
self._post( self._post(
{ {

View File

@ -23,12 +23,26 @@ from deepseek_cursor_proxy.transform import (
extract_text_content, extract_text_content,
normalize_reasoning_effort, normalize_reasoning_effort,
prepare_upstream_request, prepare_upstream_request,
reasoning_cache_namespace,
rewrite_response_body, rewrite_response_body,
strip_cursor_thinking_blocks, strip_cursor_thinking_blocks,
strip_recovery_notice_for_upstream, strip_recovery_notice_for_upstream,
) )
def _default_cache_namespace() -> str:
return reasoning_cache_namespace(
ProxyConfig(),
"deepseek-v4-pro",
{"type": "enabled"},
"high",
)
def _cache_scope(messages: list[dict]) -> str:
return conversation_scope(messages, _default_cache_namespace())
class ContentHelpersTests(unittest.TestCase): class ContentHelpersTests(unittest.TestCase):
def test_extract_text_content_flattens_multipart_array(self) -> None: def test_extract_text_content_flattens_multipart_array(self) -> None:
content = [ content = [
@ -334,5 +348,372 @@ class ResponseRewriteTests(unittest.TestCase):
self.assertEqual(usage["prompt_cache_miss_tokens"], 4) self.assertEqual(usage["prompt_cache_miss_tokens"], 4)
class CrossModeAndModelTests(unittest.TestCase):
"""Regression coverage for PR #28's cross-mode/model context preservation
(ProFlash family normalization, portable turn-scoped keys, recovery
boundary continuation). Originally shipped with PR #28 in test_transform.py
and dropped by PR #33's test refactor; restored from commit 5f14da3."""
def setUp(self) -> None:
self.store = ReasoningStore(":memory:")
def tearDown(self) -> None:
self.store.close()
def test_deepseek_pro_and_flash_share_reasoning_namespace(self) -> None:
config = ProxyConfig()
namespace_pro = reasoning_cache_namespace(
config,
"deepseek-v4-pro",
{"type": "enabled"},
"high",
"Bearer key-a",
)
namespace_flash = reasoning_cache_namespace(
config,
"deepseek-v4-flash",
{"type": "enabled"},
"high",
"Bearer key-a",
)
self.assertEqual(namespace_pro, namespace_flash)
prior = [{"role": "user", "content": "read README"}]
tool_call = {
"id": "call_shared",
"type": "function",
"function": {
"name": "read_file",
"arguments": '{"path":"README.md"}',
},
}
self.store.store_assistant_message(
{
"role": "assistant",
"content": "",
"reasoning_content": "Shared DeepSeek reasoning.",
"tool_calls": [tool_call],
},
conversation_scope(prior, namespace_pro),
namespace_pro,
prior,
)
prepared = prepare_upstream_request(
{
"model": "deepseek-v4-flash",
"messages": [
*prior,
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
],
},
config,
self.store,
authorization="Bearer key-a",
)
self.assertEqual(prepared.missing_reasoning_messages, 0)
self.assertEqual(
prepared.payload["messages"][1]["reasoning_content"],
"Shared DeepSeek reasoning.",
)
def test_strict_hit_backfills_portable_cache_for_mode_switch(self) -> None:
agent_prior = [
{"role": "system", "content": "Agent mode."},
{"role": "user", "content": "set up the task"},
{"role": "user", "content": "read README"},
]
plan_prior = [
{"role": "system", "content": "Plan mode."},
{"role": "user", "content": "set up the task"},
{"role": "user", "content": "read README"},
]
tool_call = {
"id": "call_mode_switch",
"type": "function",
"function": {"name": "read_file", "arguments": '{"path":"README.md"}'},
}
assistant_message = {
"role": "assistant",
"content": "",
"reasoning_content": "Need README before answering.",
"tool_calls": [tool_call],
}
# Store under Agent scope only — no portable aliases yet.
self.store.store_assistant_message(
assistant_message,
_cache_scope(agent_prior),
)
# Agent re-request: strict scope hit, should backfill portable.
strict_prepared = prepare_upstream_request(
{
"model": "deepseek-v4-pro",
"messages": [
*agent_prior,
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
],
},
ProxyConfig(),
self.store,
)
# Plan re-request: scope changed (different system prompt) but the
# turn signature still matches, so the portable alias hits.
portable_prepared = prepare_upstream_request(
{
"model": "deepseek-v4-pro",
"messages": [
*plan_prior,
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
],
},
ProxyConfig(),
self.store,
)
self.assertEqual(strict_prepared.patched_reasoning_messages, 1)
self.assertEqual(portable_prepared.patched_reasoning_messages, 1)
self.assertEqual(portable_prepared.missing_reasoning_messages, 0)
self.assertEqual(
portable_prepared.payload["messages"][3]["reasoning_content"],
"Need README before answering.",
)
self.assertTrue(
str(portable_prepared.reasoning_diagnostics[-1]["hit_kind"]).startswith(
"portable_"
)
)
def test_portable_turn_cache_restores_final_assistant_after_tool_result(
self,
) -> None:
agent_user = {"role": "user", "content": "look up project state"}
plan_user = dict(agent_user)
tool_call = {
"id": "call_project_state",
"type": "function",
"function": {"name": "lookup", "arguments": '{"query":"state"}'},
}
tool_result = {
"role": "tool",
"tool_call_id": "call_project_state",
"content": '{"state":"ready"}',
}
tool_assistant = {
"role": "assistant",
"content": "",
"reasoning_content": "Need the project state.",
"tool_calls": [tool_call],
}
final_assistant = {
"role": "assistant",
"content": "The project is ready.",
"reasoning_content": "The tool result is enough to answer.",
}
agent_initial_prior = [
{"role": "system", "content": "Agent mode."},
agent_user,
]
agent_final_prior = [*agent_initial_prior, tool_assistant, tool_result]
self.store.store_assistant_message(
tool_assistant,
_cache_scope(agent_initial_prior),
_default_cache_namespace(),
agent_initial_prior,
)
self.store.store_assistant_message(
final_assistant,
_cache_scope(agent_final_prior),
_default_cache_namespace(),
agent_final_prior,
)
prepared = prepare_upstream_request(
{
"model": "deepseek-v4-pro",
"messages": [
{"role": "system", "content": "Plan mode."},
plan_user,
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
tool_result,
{"role": "assistant", "content": "The project is ready."},
{"role": "user", "content": "continue"},
],
},
ProxyConfig(missing_reasoning_strategy="reject"),
self.store,
)
self.assertEqual(prepared.missing_reasoning_messages, 0)
self.assertEqual(prepared.patched_reasoning_messages, 2)
self.assertEqual(
prepared.payload["messages"][4]["reasoning_content"],
"The tool result is enough to answer.",
)
def test_portable_turn_cache_isolated_for_reused_tool_call_id(self) -> None:
# Two different conversations both happen to reuse the same
# tool_call.id. Cache must NOT cross-contaminate.
tool_call = {
"id": "call_reused",
"type": "function",
"function": {"name": "lookup", "arguments": "{}"},
}
assistant_a = {
"role": "assistant",
"content": "",
"reasoning_content": "Reasoning for thread A.",
"tool_calls": [tool_call],
}
assistant_b = {
"role": "assistant",
"content": "",
"reasoning_content": "Reasoning for thread B.",
"tool_calls": [tool_call],
}
prior_a = [
{"role": "system", "content": "Agent mode."},
{"role": "user", "content": "thread A"},
]
prior_b = [
{"role": "system", "content": "Agent mode."},
{"role": "user", "content": "thread B"},
]
self.store.store_assistant_message(
assistant_a,
_cache_scope(prior_a),
_default_cache_namespace(),
prior_a,
)
self.store.store_assistant_message(
assistant_b,
_cache_scope(prior_b),
_default_cache_namespace(),
prior_b,
)
# Plan-mode replay of thread A — should retrieve A's reasoning, not B's.
prepared = prepare_upstream_request(
{
"model": "deepseek-v4-pro",
"messages": [
{"role": "system", "content": "Plan mode."},
{"role": "user", "content": "thread A"},
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
],
},
ProxyConfig(),
self.store,
)
self.assertEqual(
prepared.payload["messages"][2]["reasoning_content"],
"Reasoning for thread A.",
)
def test_recovered_response_is_recorded_under_pre_recovery_scope(self) -> None:
old_tool_call = {
"id": "call_old",
"type": "function",
"function": {
"name": "read_file",
"arguments": '{"path":"README.md"}',
},
}
new_tool_call = {
"id": "call_new",
"type": "function",
"function": {"name": "lookup", "arguments": '{"query":"new"}'},
}
first_payload = {
"model": "deepseek-v4-pro",
"messages": [
{"role": "user", "content": "old model turn"},
{"role": "assistant", "content": "", "tool_calls": [old_tool_call]},
{"role": "tool", "tool_call_id": "call_old", "content": "old result"},
{"role": "user", "content": "continue with DeepSeek"},
],
}
first_recovered = prepare_upstream_request(
first_payload,
ProxyConfig(missing_reasoning_strategy="recover"),
self.store,
)
self.assertEqual(first_recovered.recovered_reasoning_messages, 1)
# Simulate DeepSeek's response to the recovered request.
response_body = json.dumps(
{
"id": "chatcmpl-test",
"object": "chat.completion",
"model": "deepseek-v4-pro",
"choices": [
{
"index": 0,
"finish_reason": "tool_calls",
"message": {
"role": "assistant",
"content": "",
"reasoning_content": "Need the new lookup.",
"tool_calls": [new_tool_call],
},
}
],
}
).encode()
rewritten = rewrite_response_body(
response_body,
"deepseek-v4-pro",
self.store,
first_recovered.payload["messages"],
first_recovered.cache_namespace,
content_prefix=first_recovered.recovery_notice,
recording_contexts=first_recovered.record_response_contexts,
)
recovered_assistant = json.loads(rewritten)["choices"][0]["message"]
# Reasoning must be recorded under BOTH scopes — pre-recovery (so
# subsequent Cursor requests echoing the with-prefix history hit) and
# post-recovery (so an immediate continuation also hits).
self.assertEqual(len(first_recovered.record_response_contexts), 2)
for scope, _messages in first_recovered.record_response_contexts:
self.assertEqual(
self.store.get(
f"scope:{scope}:signature:{message_signature(recovered_assistant)}"
),
"Need the new lookup.",
)
recovered_assistant.pop("reasoning_content", None)
# Cursor's next request echoes the recovered assistant + tool result.
# The proxy should detect the recovery boundary, retire the prefix,
# and continue cleanly without recovering again.
second_payload = {
"model": "deepseek-v4-pro",
"messages": [
*first_payload["messages"],
recovered_assistant,
{"role": "tool", "tool_call_id": "call_new", "content": "new result"},
],
}
second_prepared = prepare_upstream_request(
second_payload,
ProxyConfig(missing_reasoning_strategy="recover"),
self.store,
)
self.assertEqual(second_prepared.missing_reasoning_messages, 0)
self.assertEqual(second_prepared.recovered_reasoning_messages, 0)
self.assertEqual(second_prepared.recovery_dropped_messages, 0)
self.assertTrue(second_prepared.continued_recovery_boundary)
self.assertGreater(second_prepared.retired_prefix_messages, 0)
self.assertEqual(
second_prepared.payload["messages"][2]["reasoning_content"],
"Need the new lookup.",
)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()