fix(server): honor missing-reasoning reject mode (#34)
parent
be0310751c
commit
7bdf177e0f
|
|
@ -106,6 +106,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
|
|||
)
|
||||
if request_path not in {"/chat/completions", "/v1/chat/completions"}:
|
||||
LOG.warning("rejected unsupported POST path=%s status=404", request_path)
|
||||
self._record_request_body_for_trace(trace)
|
||||
self._send_json(
|
||||
404,
|
||||
{"error": {"message": "Only /v1/chat/completions is supported"}},
|
||||
|
|
@ -119,6 +120,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
|
|||
"rejected request path=%s status=401 reason=missing_bearer_token",
|
||||
request_path,
|
||||
)
|
||||
self._record_request_body_for_trace(trace)
|
||||
self._send_json(
|
||||
401,
|
||||
{"error": {"message": "Missing Authorization bearer token"}},
|
||||
|
|
@ -161,7 +163,10 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
|
|||
if trace is not None:
|
||||
trace.record_transform(prepared)
|
||||
log_context_summary(prepared)
|
||||
if prepared.missing_reasoning_messages:
|
||||
if (
|
||||
prepared.missing_reasoning_messages
|
||||
and self.config.missing_reasoning_strategy == "reject"
|
||||
):
|
||||
LOG.warning(
|
||||
(
|
||||
"strict missing-reasoning mode rejected request path=%s "
|
||||
|
|
@ -470,6 +475,32 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
|
|||
raise ValueError("Request body must be a JSON object")
|
||||
return payload
|
||||
|
||||
def _record_request_body_for_trace(self, trace: TraceRequest | None) -> None:
|
||||
if trace is None:
|
||||
return
|
||||
try:
|
||||
length = int(self.headers.get("Content-Length") or 0)
|
||||
except ValueError:
|
||||
trace.record_cursor_body_omitted(reason="invalid_content_length")
|
||||
return
|
||||
if length < 0:
|
||||
trace.record_cursor_body_omitted(
|
||||
reason="invalid_content_length", body_bytes=length
|
||||
)
|
||||
return
|
||||
if length > self.config.max_request_body_bytes:
|
||||
trace.record_cursor_body_omitted(reason="body_too_large", body_bytes=length)
|
||||
self.close_connection = True
|
||||
return
|
||||
try:
|
||||
raw_body = self.rfile.read(length)
|
||||
except OSError as exc:
|
||||
trace.record_cursor_body_omitted(
|
||||
reason=f"read_failed:{exc}", body_bytes=length
|
||||
)
|
||||
return
|
||||
trace.record_cursor_body_bytes(raw_body)
|
||||
|
||||
def _upstream_headers(self, stream: bool, authorization: str) -> dict[str, str]:
|
||||
headers = {
|
||||
"Authorization": authorization,
|
||||
|
|
|
|||
|
|
@ -206,7 +206,7 @@ class TraceWriter:
|
|||
"pid": os.getpid(),
|
||||
"base_dir": str(self.base_dir),
|
||||
"session_dir": str(self.session_dir),
|
||||
"format": "one JSON file per proxied POST request",
|
||||
"format": "one JSON file per traced POST request",
|
||||
},
|
||||
)
|
||||
|
||||
|
|
@ -224,6 +224,26 @@ class TraceRequest:
|
|||
self.data["request"]["body"] = payload
|
||||
self.data["request"]["summary"] = payload_summary(payload)
|
||||
|
||||
def record_cursor_body_bytes(self, body: bytes) -> None:
|
||||
self.data["request"]["body_bytes"] = len(body)
|
||||
text = body.decode("utf-8", errors="replace")
|
||||
try:
|
||||
payload = json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
self.data["request"]["body"] = {"text": text}
|
||||
return
|
||||
self.data["request"]["body"] = payload
|
||||
if isinstance(payload, dict):
|
||||
self.data["request"]["summary"] = payload_summary(payload)
|
||||
|
||||
def record_cursor_body_omitted(
|
||||
self, *, reason: str, body_bytes: int | None = None
|
||||
) -> None:
|
||||
omitted: dict[str, Any] = {"reason": reason}
|
||||
if body_bytes is not None:
|
||||
omitted["body_bytes"] = body_bytes
|
||||
self.data["request"]["body_omitted"] = omitted
|
||||
|
||||
def record_transform(self, prepared: Any) -> None:
|
||||
self.data["transform"] = {
|
||||
"original_model": prepared.original_model,
|
||||
|
|
|
|||
|
|
@ -658,6 +658,43 @@ class RecoveryTests(_StrictUpstreamCase):
|
|||
continue
|
||||
self.assertNotIn("deepseek-cursor-proxy", message.get("content", ""))
|
||||
|
||||
def test_recover_mode_does_not_short_circuit_with_409(self) -> None:
|
||||
"""In `recover` mode, a payload with no user message leaves the
|
||||
recovery loop unable to drop anything (`dropped_messages == 0`),
|
||||
so `missing_indexes` stays populated. The proxy must NOT 409 in
|
||||
that case — it must forward to upstream and relay whatever
|
||||
DeepSeek decides. 409 is reserved for `reject` mode."""
|
||||
status, _ = _post(
|
||||
f"{self.proxy.url}/v1/chat/completions",
|
||||
{
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
{"role": "system", "content": "Be brief."},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": CALL_ID_1,
|
||||
"type": "function",
|
||||
"function": {"name": "get_date", "arguments": "{}"},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": CALL_ID_1,
|
||||
"content": "2026-04-24",
|
||||
},
|
||||
],
|
||||
},
|
||||
)
|
||||
# Strict upstream rejects the missing-reasoning history with 400.
|
||||
# The point of this test is the proxy did NOT pre-empt with 409.
|
||||
self.assertNotEqual(status, 409)
|
||||
self.assertEqual(status, 400)
|
||||
self.assertEqual(len(StrictFakeDeepSeek.requests), 1)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Streaming behaviour
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import threading
|
|||
from tempfile import TemporaryDirectory
|
||||
import time
|
||||
import unittest
|
||||
from urllib.error import HTTPError
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
from deepseek_cursor_proxy.config import ProxyConfig
|
||||
|
|
@ -207,6 +208,36 @@ class TraceIntegrationTests(unittest.TestCase):
|
|||
with urlopen(request, timeout=5) as response:
|
||||
return json.loads(response.read())
|
||||
|
||||
def test_traces_unsupported_post_path_with_body(self) -> None:
|
||||
request = Request(
|
||||
f"{self.proxy.url}/v1/summarize",
|
||||
data=json.dumps(
|
||||
{
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [{"role": "user", "content": "summarize"}],
|
||||
}
|
||||
).encode("utf-8"),
|
||||
method="POST",
|
||||
headers={
|
||||
"Authorization": "Bearer sk-from-cursor",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
)
|
||||
with self.assertRaises(HTTPError) as captured:
|
||||
urlopen(request, timeout=5)
|
||||
self.assertEqual(captured.exception.code, 404)
|
||||
captured.exception.read()
|
||||
|
||||
trace = _read_single_trace(self.writer.session_dir)
|
||||
self.assertEqual(trace["request"]["method"], "POST")
|
||||
self.assertEqual(trace["request"]["path"], "/v1/summarize")
|
||||
self.assertEqual(trace["request"]["body"]["model"], "gpt-4o-mini")
|
||||
self.assertEqual(trace["request"]["summary"]["model"], "gpt-4o-mini")
|
||||
self.assertEqual(trace["completion"]["status"], "rejected")
|
||||
self.assertEqual(trace["completion"]["http_status"], 404)
|
||||
self.assertEqual(trace["transform"], {})
|
||||
self.assertEqual(_CannedUpstream.requests, [])
|
||||
|
||||
def test_captures_non_streaming_replay_without_api_key(self) -> None:
|
||||
self._post(
|
||||
{
|
||||
|
|
|
|||
|
|
@ -23,12 +23,26 @@ from deepseek_cursor_proxy.transform import (
|
|||
extract_text_content,
|
||||
normalize_reasoning_effort,
|
||||
prepare_upstream_request,
|
||||
reasoning_cache_namespace,
|
||||
rewrite_response_body,
|
||||
strip_cursor_thinking_blocks,
|
||||
strip_recovery_notice_for_upstream,
|
||||
)
|
||||
|
||||
|
||||
def _default_cache_namespace() -> str:
|
||||
return reasoning_cache_namespace(
|
||||
ProxyConfig(),
|
||||
"deepseek-v4-pro",
|
||||
{"type": "enabled"},
|
||||
"high",
|
||||
)
|
||||
|
||||
|
||||
def _cache_scope(messages: list[dict]) -> str:
|
||||
return conversation_scope(messages, _default_cache_namespace())
|
||||
|
||||
|
||||
class ContentHelpersTests(unittest.TestCase):
|
||||
def test_extract_text_content_flattens_multipart_array(self) -> None:
|
||||
content = [
|
||||
|
|
@ -334,5 +348,372 @@ class ResponseRewriteTests(unittest.TestCase):
|
|||
self.assertEqual(usage["prompt_cache_miss_tokens"], 4)
|
||||
|
||||
|
||||
class CrossModeAndModelTests(unittest.TestCase):
|
||||
"""Regression coverage for PR #28's cross-mode/model context preservation
|
||||
(Pro↔Flash family normalization, portable turn-scoped keys, recovery
|
||||
boundary continuation). Originally shipped with PR #28 in test_transform.py
|
||||
and dropped by PR #33's test refactor; restored from commit 5f14da3."""
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.store = ReasoningStore(":memory:")
|
||||
|
||||
def tearDown(self) -> None:
|
||||
self.store.close()
|
||||
|
||||
def test_deepseek_pro_and_flash_share_reasoning_namespace(self) -> None:
|
||||
config = ProxyConfig()
|
||||
namespace_pro = reasoning_cache_namespace(
|
||||
config,
|
||||
"deepseek-v4-pro",
|
||||
{"type": "enabled"},
|
||||
"high",
|
||||
"Bearer key-a",
|
||||
)
|
||||
namespace_flash = reasoning_cache_namespace(
|
||||
config,
|
||||
"deepseek-v4-flash",
|
||||
{"type": "enabled"},
|
||||
"high",
|
||||
"Bearer key-a",
|
||||
)
|
||||
self.assertEqual(namespace_pro, namespace_flash)
|
||||
|
||||
prior = [{"role": "user", "content": "read README"}]
|
||||
tool_call = {
|
||||
"id": "call_shared",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"arguments": '{"path":"README.md"}',
|
||||
},
|
||||
}
|
||||
self.store.store_assistant_message(
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": "Shared DeepSeek reasoning.",
|
||||
"tool_calls": [tool_call],
|
||||
},
|
||||
conversation_scope(prior, namespace_pro),
|
||||
namespace_pro,
|
||||
prior,
|
||||
)
|
||||
|
||||
prepared = prepare_upstream_request(
|
||||
{
|
||||
"model": "deepseek-v4-flash",
|
||||
"messages": [
|
||||
*prior,
|
||||
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
|
||||
],
|
||||
},
|
||||
config,
|
||||
self.store,
|
||||
authorization="Bearer key-a",
|
||||
)
|
||||
|
||||
self.assertEqual(prepared.missing_reasoning_messages, 0)
|
||||
self.assertEqual(
|
||||
prepared.payload["messages"][1]["reasoning_content"],
|
||||
"Shared DeepSeek reasoning.",
|
||||
)
|
||||
|
||||
def test_strict_hit_backfills_portable_cache_for_mode_switch(self) -> None:
|
||||
agent_prior = [
|
||||
{"role": "system", "content": "Agent mode."},
|
||||
{"role": "user", "content": "set up the task"},
|
||||
{"role": "user", "content": "read README"},
|
||||
]
|
||||
plan_prior = [
|
||||
{"role": "system", "content": "Plan mode."},
|
||||
{"role": "user", "content": "set up the task"},
|
||||
{"role": "user", "content": "read README"},
|
||||
]
|
||||
tool_call = {
|
||||
"id": "call_mode_switch",
|
||||
"type": "function",
|
||||
"function": {"name": "read_file", "arguments": '{"path":"README.md"}'},
|
||||
}
|
||||
assistant_message = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": "Need README before answering.",
|
||||
"tool_calls": [tool_call],
|
||||
}
|
||||
# Store under Agent scope only — no portable aliases yet.
|
||||
self.store.store_assistant_message(
|
||||
assistant_message,
|
||||
_cache_scope(agent_prior),
|
||||
)
|
||||
|
||||
# Agent re-request: strict scope hit, should backfill portable.
|
||||
strict_prepared = prepare_upstream_request(
|
||||
{
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
*agent_prior,
|
||||
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
|
||||
],
|
||||
},
|
||||
ProxyConfig(),
|
||||
self.store,
|
||||
)
|
||||
# Plan re-request: scope changed (different system prompt) but the
|
||||
# turn signature still matches, so the portable alias hits.
|
||||
portable_prepared = prepare_upstream_request(
|
||||
{
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
*plan_prior,
|
||||
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
|
||||
],
|
||||
},
|
||||
ProxyConfig(),
|
||||
self.store,
|
||||
)
|
||||
|
||||
self.assertEqual(strict_prepared.patched_reasoning_messages, 1)
|
||||
self.assertEqual(portable_prepared.patched_reasoning_messages, 1)
|
||||
self.assertEqual(portable_prepared.missing_reasoning_messages, 0)
|
||||
self.assertEqual(
|
||||
portable_prepared.payload["messages"][3]["reasoning_content"],
|
||||
"Need README before answering.",
|
||||
)
|
||||
self.assertTrue(
|
||||
str(portable_prepared.reasoning_diagnostics[-1]["hit_kind"]).startswith(
|
||||
"portable_"
|
||||
)
|
||||
)
|
||||
|
||||
def test_portable_turn_cache_restores_final_assistant_after_tool_result(
|
||||
self,
|
||||
) -> None:
|
||||
agent_user = {"role": "user", "content": "look up project state"}
|
||||
plan_user = dict(agent_user)
|
||||
tool_call = {
|
||||
"id": "call_project_state",
|
||||
"type": "function",
|
||||
"function": {"name": "lookup", "arguments": '{"query":"state"}'},
|
||||
}
|
||||
tool_result = {
|
||||
"role": "tool",
|
||||
"tool_call_id": "call_project_state",
|
||||
"content": '{"state":"ready"}',
|
||||
}
|
||||
tool_assistant = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": "Need the project state.",
|
||||
"tool_calls": [tool_call],
|
||||
}
|
||||
final_assistant = {
|
||||
"role": "assistant",
|
||||
"content": "The project is ready.",
|
||||
"reasoning_content": "The tool result is enough to answer.",
|
||||
}
|
||||
agent_initial_prior = [
|
||||
{"role": "system", "content": "Agent mode."},
|
||||
agent_user,
|
||||
]
|
||||
agent_final_prior = [*agent_initial_prior, tool_assistant, tool_result]
|
||||
self.store.store_assistant_message(
|
||||
tool_assistant,
|
||||
_cache_scope(agent_initial_prior),
|
||||
_default_cache_namespace(),
|
||||
agent_initial_prior,
|
||||
)
|
||||
self.store.store_assistant_message(
|
||||
final_assistant,
|
||||
_cache_scope(agent_final_prior),
|
||||
_default_cache_namespace(),
|
||||
agent_final_prior,
|
||||
)
|
||||
|
||||
prepared = prepare_upstream_request(
|
||||
{
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
{"role": "system", "content": "Plan mode."},
|
||||
plan_user,
|
||||
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
|
||||
tool_result,
|
||||
{"role": "assistant", "content": "The project is ready."},
|
||||
{"role": "user", "content": "continue"},
|
||||
],
|
||||
},
|
||||
ProxyConfig(missing_reasoning_strategy="reject"),
|
||||
self.store,
|
||||
)
|
||||
|
||||
self.assertEqual(prepared.missing_reasoning_messages, 0)
|
||||
self.assertEqual(prepared.patched_reasoning_messages, 2)
|
||||
self.assertEqual(
|
||||
prepared.payload["messages"][4]["reasoning_content"],
|
||||
"The tool result is enough to answer.",
|
||||
)
|
||||
|
||||
def test_portable_turn_cache_isolated_for_reused_tool_call_id(self) -> None:
|
||||
# Two different conversations both happen to reuse the same
|
||||
# tool_call.id. Cache must NOT cross-contaminate.
|
||||
tool_call = {
|
||||
"id": "call_reused",
|
||||
"type": "function",
|
||||
"function": {"name": "lookup", "arguments": "{}"},
|
||||
}
|
||||
assistant_a = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": "Reasoning for thread A.",
|
||||
"tool_calls": [tool_call],
|
||||
}
|
||||
assistant_b = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": "Reasoning for thread B.",
|
||||
"tool_calls": [tool_call],
|
||||
}
|
||||
prior_a = [
|
||||
{"role": "system", "content": "Agent mode."},
|
||||
{"role": "user", "content": "thread A"},
|
||||
]
|
||||
prior_b = [
|
||||
{"role": "system", "content": "Agent mode."},
|
||||
{"role": "user", "content": "thread B"},
|
||||
]
|
||||
self.store.store_assistant_message(
|
||||
assistant_a,
|
||||
_cache_scope(prior_a),
|
||||
_default_cache_namespace(),
|
||||
prior_a,
|
||||
)
|
||||
self.store.store_assistant_message(
|
||||
assistant_b,
|
||||
_cache_scope(prior_b),
|
||||
_default_cache_namespace(),
|
||||
prior_b,
|
||||
)
|
||||
|
||||
# Plan-mode replay of thread A — should retrieve A's reasoning, not B's.
|
||||
prepared = prepare_upstream_request(
|
||||
{
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
{"role": "system", "content": "Plan mode."},
|
||||
{"role": "user", "content": "thread A"},
|
||||
{"role": "assistant", "content": "", "tool_calls": [tool_call]},
|
||||
],
|
||||
},
|
||||
ProxyConfig(),
|
||||
self.store,
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
prepared.payload["messages"][2]["reasoning_content"],
|
||||
"Reasoning for thread A.",
|
||||
)
|
||||
|
||||
def test_recovered_response_is_recorded_under_pre_recovery_scope(self) -> None:
|
||||
old_tool_call = {
|
||||
"id": "call_old",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"arguments": '{"path":"README.md"}',
|
||||
},
|
||||
}
|
||||
new_tool_call = {
|
||||
"id": "call_new",
|
||||
"type": "function",
|
||||
"function": {"name": "lookup", "arguments": '{"query":"new"}'},
|
||||
}
|
||||
first_payload = {
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
{"role": "user", "content": "old model turn"},
|
||||
{"role": "assistant", "content": "", "tool_calls": [old_tool_call]},
|
||||
{"role": "tool", "tool_call_id": "call_old", "content": "old result"},
|
||||
{"role": "user", "content": "continue with DeepSeek"},
|
||||
],
|
||||
}
|
||||
first_recovered = prepare_upstream_request(
|
||||
first_payload,
|
||||
ProxyConfig(missing_reasoning_strategy="recover"),
|
||||
self.store,
|
||||
)
|
||||
self.assertEqual(first_recovered.recovered_reasoning_messages, 1)
|
||||
|
||||
# Simulate DeepSeek's response to the recovered request.
|
||||
response_body = json.dumps(
|
||||
{
|
||||
"id": "chatcmpl-test",
|
||||
"object": "chat.completion",
|
||||
"model": "deepseek-v4-pro",
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"finish_reason": "tool_calls",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"reasoning_content": "Need the new lookup.",
|
||||
"tool_calls": [new_tool_call],
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
).encode()
|
||||
rewritten = rewrite_response_body(
|
||||
response_body,
|
||||
"deepseek-v4-pro",
|
||||
self.store,
|
||||
first_recovered.payload["messages"],
|
||||
first_recovered.cache_namespace,
|
||||
content_prefix=first_recovered.recovery_notice,
|
||||
recording_contexts=first_recovered.record_response_contexts,
|
||||
)
|
||||
recovered_assistant = json.loads(rewritten)["choices"][0]["message"]
|
||||
|
||||
# Reasoning must be recorded under BOTH scopes — pre-recovery (so
|
||||
# subsequent Cursor requests echoing the with-prefix history hit) and
|
||||
# post-recovery (so an immediate continuation also hits).
|
||||
self.assertEqual(len(first_recovered.record_response_contexts), 2)
|
||||
for scope, _messages in first_recovered.record_response_contexts:
|
||||
self.assertEqual(
|
||||
self.store.get(
|
||||
f"scope:{scope}:signature:{message_signature(recovered_assistant)}"
|
||||
),
|
||||
"Need the new lookup.",
|
||||
)
|
||||
recovered_assistant.pop("reasoning_content", None)
|
||||
|
||||
# Cursor's next request echoes the recovered assistant + tool result.
|
||||
# The proxy should detect the recovery boundary, retire the prefix,
|
||||
# and continue cleanly without recovering again.
|
||||
second_payload = {
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
*first_payload["messages"],
|
||||
recovered_assistant,
|
||||
{"role": "tool", "tool_call_id": "call_new", "content": "new result"},
|
||||
],
|
||||
}
|
||||
|
||||
second_prepared = prepare_upstream_request(
|
||||
second_payload,
|
||||
ProxyConfig(missing_reasoning_strategy="recover"),
|
||||
self.store,
|
||||
)
|
||||
|
||||
self.assertEqual(second_prepared.missing_reasoning_messages, 0)
|
||||
self.assertEqual(second_prepared.recovered_reasoning_messages, 0)
|
||||
self.assertEqual(second_prepared.recovery_dropped_messages, 0)
|
||||
self.assertTrue(second_prepared.continued_recovery_boundary)
|
||||
self.assertGreater(second_prepared.retired_prefix_messages, 0)
|
||||
self.assertEqual(
|
||||
second_prepared.payload["messages"][2]["reasoning_content"],
|
||||
"Need the new lookup.",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
Loading…
Reference in New Issue