fix(server): honor missing-reasoning reject mode (#34)

2026-05-01 22:32:27 +08:00 · 2026-05-01 22:32:27 +08:00 · 7bdf177e0f
parent be0310751c
commit 7bdf177e0f
5 changed files with 502 additions and 2 deletions
--- a/src/deepseek_cursor_proxy/server.py
+++ b/src/deepseek_cursor_proxy/server.py
@ -106,6 +106,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
            )
        if request_path not in {"/chat/completions", "/v1/chat/completions"}:
            LOG.warning("rejected unsupported POST path=%s status=404", request_path)
+            self._record_request_body_for_trace(trace)
            self._send_json(
                404,
                {"error": {"message": "Only /v1/chat/completions is supported"}},
@ -119,6 +120,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
                "rejected request path=%s status=401 reason=missing_bearer_token",
                request_path,
            )
+            self._record_request_body_for_trace(trace)
            self._send_json(
                401,
                {"error": {"message": "Missing Authorization bearer token"}},
@ -161,7 +163,10 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
        if trace is not None:
            trace.record_transform(prepared)
        log_context_summary(prepared)
-        if prepared.missing_reasoning_messages:
+        if (
+            prepared.missing_reasoning_messages
+            and self.config.missing_reasoning_strategy == "reject"
+        ):
            LOG.warning(
                (
                    "strict missing-reasoning mode rejected request path=%s "
@ -470,6 +475,32 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
            raise ValueError("Request body must be a JSON object")
        return payload

+    def _record_request_body_for_trace(self, trace: TraceRequest | None) -> None:
+        if trace is None:
+            return
+        try:
+            length = int(self.headers.get("Content-Length") or 0)
+        except ValueError:
+            trace.record_cursor_body_omitted(reason="invalid_content_length")
+            return
+        if length < 0:
+            trace.record_cursor_body_omitted(
+                reason="invalid_content_length", body_bytes=length
+            )
+            return
+        if length > self.config.max_request_body_bytes:
+            trace.record_cursor_body_omitted(reason="body_too_large", body_bytes=length)
+            self.close_connection = True
+            return
+        try:
+            raw_body = self.rfile.read(length)
+        except OSError as exc:
+            trace.record_cursor_body_omitted(
+                reason=f"read_failed:{exc}", body_bytes=length
+            )
+            return
+        trace.record_cursor_body_bytes(raw_body)
+
    def _upstream_headers(self, stream: bool, authorization: str) -> dict[str, str]:
        headers = {
            "Authorization": authorization,
--- a/src/deepseek_cursor_proxy/trace.py
+++ b/src/deepseek_cursor_proxy/trace.py
@ -206,7 +206,7 @@ class TraceWriter:
                "pid": os.getpid(),
                "base_dir": str(self.base_dir),
                "session_dir": str(self.session_dir),
-                "format": "one JSON file per proxied POST request",
+                "format": "one JSON file per traced POST request",
            },
        )

@ -224,6 +224,26 @@ class TraceRequest:
        self.data["request"]["body"] = payload
        self.data["request"]["summary"] = payload_summary(payload)

+    def record_cursor_body_bytes(self, body: bytes) -> None:
+        self.data["request"]["body_bytes"] = len(body)
+        text = body.decode("utf-8", errors="replace")
+        try:
+            payload = json.loads(text)
+        except json.JSONDecodeError:
+            self.data["request"]["body"] = {"text": text}
+            return
+        self.data["request"]["body"] = payload
+        if isinstance(payload, dict):
+            self.data["request"]["summary"] = payload_summary(payload)
+
+    def record_cursor_body_omitted(
+        self, *, reason: str, body_bytes: int | None = None
+    ) -> None:
+        omitted: dict[str, Any] = {"reason": reason}
+        if body_bytes is not None:
+            omitted["body_bytes"] = body_bytes
+        self.data["request"]["body_omitted"] = omitted
+
    def record_transform(self, prepared: Any) -> None:
        self.data["transform"] = {
            "original_model": prepared.original_model,
--- a/tests/test_protocol.py
+++ b/tests/test_protocol.py
@ -658,6 +658,43 @@ class RecoveryTests(_StrictUpstreamCase):
                continue
            self.assertNotIn("deepseek-cursor-proxy", message.get("content", ""))

+    def test_recover_mode_does_not_short_circuit_with_409(self) -> None:
+        """In `recover` mode, a payload with no user message leaves the
+        recovery loop unable to drop anything (`dropped_messages == 0`),
+        so `missing_indexes` stays populated. The proxy must NOT 409 in
+        that case — it must forward to upstream and relay whatever
+        DeepSeek decides. 409 is reserved for `reject` mode."""
+        status, _ = _post(
+            f"{self.proxy.url}/v1/chat/completions",
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "system", "content": "Be brief."},
+                    {
+                        "role": "assistant",
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": CALL_ID_1,
+                                "type": "function",
+                                "function": {"name": "get_date", "arguments": "{}"},
+                            }
+                        ],
+                    },
+                    {
+                        "role": "tool",
+                        "tool_call_id": CALL_ID_1,
+                        "content": "2026-04-24",
+                    },
+                ],
+            },
+        )
+        # Strict upstream rejects the missing-reasoning history with 400.
+        # The point of this test is the proxy did NOT pre-empt with 409.
+        self.assertNotEqual(status, 409)
+        self.assertEqual(status, 400)
+        self.assertEqual(len(StrictFakeDeepSeek.requests), 1)
+

 # ---------------------------------------------------------------------------
 # Streaming behaviour
--- a/tests/test_trace.py
+++ b/tests/test_trace.py
@ -11,6 +11,7 @@ import threading
 from tempfile import TemporaryDirectory
 import time
 import unittest
+from urllib.error import HTTPError
 from urllib.request import Request, urlopen

 from deepseek_cursor_proxy.config import ProxyConfig
@ -207,6 +208,36 @@ class TraceIntegrationTests(unittest.TestCase):
        with urlopen(request, timeout=5) as response:
            return json.loads(response.read())

+    def test_traces_unsupported_post_path_with_body(self) -> None:
+        request = Request(
+            f"{self.proxy.url}/v1/summarize",
+            data=json.dumps(
+                {
+                    "model": "gpt-4o-mini",
+                    "messages": [{"role": "user", "content": "summarize"}],
+                }
+            ).encode("utf-8"),
+            method="POST",
+            headers={
+                "Authorization": "Bearer sk-from-cursor",
+                "Content-Type": "application/json",
+            },
+        )
+        with self.assertRaises(HTTPError) as captured:
+            urlopen(request, timeout=5)
+        self.assertEqual(captured.exception.code, 404)
+        captured.exception.read()
+
+        trace = _read_single_trace(self.writer.session_dir)
+        self.assertEqual(trace["request"]["method"], "POST")
+        self.assertEqual(trace["request"]["path"], "/v1/summarize")
+        self.assertEqual(trace["request"]["body"]["model"], "gpt-4o-mini")
+        self.assertEqual(trace["request"]["summary"]["model"], "gpt-4o-mini")
+        self.assertEqual(trace["completion"]["status"], "rejected")
+        self.assertEqual(trace["completion"]["http_status"], 404)
+        self.assertEqual(trace["transform"], {})
+        self.assertEqual(_CannedUpstream.requests, [])
+
    def test_captures_non_streaming_replay_without_api_key(self) -> None:
        self._post(
            {
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@ -23,12 +23,26 @@ from deepseek_cursor_proxy.transform import (
    extract_text_content,
    normalize_reasoning_effort,
    prepare_upstream_request,
+    reasoning_cache_namespace,
    rewrite_response_body,
    strip_cursor_thinking_blocks,
    strip_recovery_notice_for_upstream,
 )


+def _default_cache_namespace() -> str:
+    return reasoning_cache_namespace(
+        ProxyConfig(),
+        "deepseek-v4-pro",
+        {"type": "enabled"},
+        "high",
+    )
+
+
+def _cache_scope(messages: list[dict]) -> str:
+    return conversation_scope(messages, _default_cache_namespace())
+
+
 class ContentHelpersTests(unittest.TestCase):
    def test_extract_text_content_flattens_multipart_array(self) -> None:
        content = [
@ -334,5 +348,372 @@ class ResponseRewriteTests(unittest.TestCase):
        self.assertEqual(usage["prompt_cache_miss_tokens"], 4)


+class CrossModeAndModelTests(unittest.TestCase):
+    """Regression coverage for PR #28's cross-mode/model context preservation
+    (Pro↔Flash family normalization, portable turn-scoped keys, recovery
+    boundary continuation). Originally shipped with PR #28 in test_transform.py
+    and dropped by PR #33's test refactor; restored from commit 5f14da3."""
+
+    def setUp(self) -> None:
+        self.store = ReasoningStore(":memory:")
+
+    def tearDown(self) -> None:
+        self.store.close()
+
+    def test_deepseek_pro_and_flash_share_reasoning_namespace(self) -> None:
+        config = ProxyConfig()
+        namespace_pro = reasoning_cache_namespace(
+            config,
+            "deepseek-v4-pro",
+            {"type": "enabled"},
+            "high",
+            "Bearer key-a",
+        )
+        namespace_flash = reasoning_cache_namespace(
+            config,
+            "deepseek-v4-flash",
+            {"type": "enabled"},
+            "high",
+            "Bearer key-a",
+        )
+        self.assertEqual(namespace_pro, namespace_flash)
+
+        prior = [{"role": "user", "content": "read README"}]
+        tool_call = {
+            "id": "call_shared",
+            "type": "function",
+            "function": {
+                "name": "read_file",
+                "arguments": '{"path":"README.md"}',
+            },
+        }
+        self.store.store_assistant_message(
+            {
+                "role": "assistant",
+                "content": "",
+                "reasoning_content": "Shared DeepSeek reasoning.",
+                "tool_calls": [tool_call],
+            },
+            conversation_scope(prior, namespace_pro),
+            namespace_pro,
+            prior,
+        )
+
+        prepared = prepare_upstream_request(
+            {
+                "model": "deepseek-v4-flash",
+                "messages": [
+                    *prior,
+                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
+                ],
+            },
+            config,
+            self.store,
+            authorization="Bearer key-a",
+        )
+
+        self.assertEqual(prepared.missing_reasoning_messages, 0)
+        self.assertEqual(
+            prepared.payload["messages"][1]["reasoning_content"],
+            "Shared DeepSeek reasoning.",
+        )
+
+    def test_strict_hit_backfills_portable_cache_for_mode_switch(self) -> None:
+        agent_prior = [
+            {"role": "system", "content": "Agent mode."},
+            {"role": "user", "content": "set up the task"},
+            {"role": "user", "content": "read README"},
+        ]
+        plan_prior = [
+            {"role": "system", "content": "Plan mode."},
+            {"role": "user", "content": "set up the task"},
+            {"role": "user", "content": "read README"},
+        ]
+        tool_call = {
+            "id": "call_mode_switch",
+            "type": "function",
+            "function": {"name": "read_file", "arguments": '{"path":"README.md"}'},
+        }
+        assistant_message = {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Need README before answering.",
+            "tool_calls": [tool_call],
+        }
+        # Store under Agent scope only — no portable aliases yet.
+        self.store.store_assistant_message(
+            assistant_message,
+            _cache_scope(agent_prior),
+        )
+
+        # Agent re-request: strict scope hit, should backfill portable.
+        strict_prepared = prepare_upstream_request(
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    *agent_prior,
+                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
+                ],
+            },
+            ProxyConfig(),
+            self.store,
+        )
+        # Plan re-request: scope changed (different system prompt) but the
+        # turn signature still matches, so the portable alias hits.
+        portable_prepared = prepare_upstream_request(
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    *plan_prior,
+                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
+                ],
+            },
+            ProxyConfig(),
+            self.store,
+        )
+
+        self.assertEqual(strict_prepared.patched_reasoning_messages, 1)
+        self.assertEqual(portable_prepared.patched_reasoning_messages, 1)
+        self.assertEqual(portable_prepared.missing_reasoning_messages, 0)
+        self.assertEqual(
+            portable_prepared.payload["messages"][3]["reasoning_content"],
+            "Need README before answering.",
+        )
+        self.assertTrue(
+            str(portable_prepared.reasoning_diagnostics[-1]["hit_kind"]).startswith(
+                "portable_"
+            )
+        )
+
+    def test_portable_turn_cache_restores_final_assistant_after_tool_result(
+        self,
+    ) -> None:
+        agent_user = {"role": "user", "content": "look up project state"}
+        plan_user = dict(agent_user)
+        tool_call = {
+            "id": "call_project_state",
+            "type": "function",
+            "function": {"name": "lookup", "arguments": '{"query":"state"}'},
+        }
+        tool_result = {
+            "role": "tool",
+            "tool_call_id": "call_project_state",
+            "content": '{"state":"ready"}',
+        }
+        tool_assistant = {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Need the project state.",
+            "tool_calls": [tool_call],
+        }
+        final_assistant = {
+            "role": "assistant",
+            "content": "The project is ready.",
+            "reasoning_content": "The tool result is enough to answer.",
+        }
+        agent_initial_prior = [
+            {"role": "system", "content": "Agent mode."},
+            agent_user,
+        ]
+        agent_final_prior = [*agent_initial_prior, tool_assistant, tool_result]
+        self.store.store_assistant_message(
+            tool_assistant,
+            _cache_scope(agent_initial_prior),
+            _default_cache_namespace(),
+            agent_initial_prior,
+        )
+        self.store.store_assistant_message(
+            final_assistant,
+            _cache_scope(agent_final_prior),
+            _default_cache_namespace(),
+            agent_final_prior,
+        )
+
+        prepared = prepare_upstream_request(
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "system", "content": "Plan mode."},
+                    plan_user,
+                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
+                    tool_result,
+                    {"role": "assistant", "content": "The project is ready."},
+                    {"role": "user", "content": "continue"},
+                ],
+            },
+            ProxyConfig(missing_reasoning_strategy="reject"),
+            self.store,
+        )
+
+        self.assertEqual(prepared.missing_reasoning_messages, 0)
+        self.assertEqual(prepared.patched_reasoning_messages, 2)
+        self.assertEqual(
+            prepared.payload["messages"][4]["reasoning_content"],
+            "The tool result is enough to answer.",
+        )
+
+    def test_portable_turn_cache_isolated_for_reused_tool_call_id(self) -> None:
+        # Two different conversations both happen to reuse the same
+        # tool_call.id. Cache must NOT cross-contaminate.
+        tool_call = {
+            "id": "call_reused",
+            "type": "function",
+            "function": {"name": "lookup", "arguments": "{}"},
+        }
+        assistant_a = {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Reasoning for thread A.",
+            "tool_calls": [tool_call],
+        }
+        assistant_b = {
+            "role": "assistant",
+            "content": "",
+            "reasoning_content": "Reasoning for thread B.",
+            "tool_calls": [tool_call],
+        }
+        prior_a = [
+            {"role": "system", "content": "Agent mode."},
+            {"role": "user", "content": "thread A"},
+        ]
+        prior_b = [
+            {"role": "system", "content": "Agent mode."},
+            {"role": "user", "content": "thread B"},
+        ]
+        self.store.store_assistant_message(
+            assistant_a,
+            _cache_scope(prior_a),
+            _default_cache_namespace(),
+            prior_a,
+        )
+        self.store.store_assistant_message(
+            assistant_b,
+            _cache_scope(prior_b),
+            _default_cache_namespace(),
+            prior_b,
+        )
+
+        # Plan-mode replay of thread A — should retrieve A's reasoning, not B's.
+        prepared = prepare_upstream_request(
+            {
+                "model": "deepseek-v4-pro",
+                "messages": [
+                    {"role": "system", "content": "Plan mode."},
+                    {"role": "user", "content": "thread A"},
+                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
+                ],
+            },
+            ProxyConfig(),
+            self.store,
+        )
+
+        self.assertEqual(
+            prepared.payload["messages"][2]["reasoning_content"],
+            "Reasoning for thread A.",
+        )
+
+    def test_recovered_response_is_recorded_under_pre_recovery_scope(self) -> None:
+        old_tool_call = {
+            "id": "call_old",
+            "type": "function",
+            "function": {
+                "name": "read_file",
+                "arguments": '{"path":"README.md"}',
+            },
+        }
+        new_tool_call = {
+            "id": "call_new",
+            "type": "function",
+            "function": {"name": "lookup", "arguments": '{"query":"new"}'},
+        }
+        first_payload = {
+            "model": "deepseek-v4-pro",
+            "messages": [
+                {"role": "user", "content": "old model turn"},
+                {"role": "assistant", "content": "", "tool_calls": [old_tool_call]},
+                {"role": "tool", "tool_call_id": "call_old", "content": "old result"},
+                {"role": "user", "content": "continue with DeepSeek"},
+            ],
+        }
+        first_recovered = prepare_upstream_request(
+            first_payload,
+            ProxyConfig(missing_reasoning_strategy="recover"),
+            self.store,
+        )
+        self.assertEqual(first_recovered.recovered_reasoning_messages, 1)
+
+        # Simulate DeepSeek's response to the recovered request.
+        response_body = json.dumps(
+            {
+                "id": "chatcmpl-test",
+                "object": "chat.completion",
+                "model": "deepseek-v4-pro",
+                "choices": [
+                    {
+                        "index": 0,
+                        "finish_reason": "tool_calls",
+                        "message": {
+                            "role": "assistant",
+                            "content": "",
+                            "reasoning_content": "Need the new lookup.",
+                            "tool_calls": [new_tool_call],
+                        },
+                    }
+                ],
+            }
+        ).encode()
+        rewritten = rewrite_response_body(
+            response_body,
+            "deepseek-v4-pro",
+            self.store,
+            first_recovered.payload["messages"],
+            first_recovered.cache_namespace,
+            content_prefix=first_recovered.recovery_notice,
+            recording_contexts=first_recovered.record_response_contexts,
+        )
+        recovered_assistant = json.loads(rewritten)["choices"][0]["message"]
+
+        # Reasoning must be recorded under BOTH scopes — pre-recovery (so
+        # subsequent Cursor requests echoing the with-prefix history hit) and
+        # post-recovery (so an immediate continuation also hits).
+        self.assertEqual(len(first_recovered.record_response_contexts), 2)
+        for scope, _messages in first_recovered.record_response_contexts:
+            self.assertEqual(
+                self.store.get(
+                    f"scope:{scope}:signature:{message_signature(recovered_assistant)}"
+                ),
+                "Need the new lookup.",
+            )
+        recovered_assistant.pop("reasoning_content", None)
+
+        # Cursor's next request echoes the recovered assistant + tool result.
+        # The proxy should detect the recovery boundary, retire the prefix,
+        # and continue cleanly without recovering again.
+        second_payload = {
+            "model": "deepseek-v4-pro",
+            "messages": [
+                *first_payload["messages"],
+                recovered_assistant,
+                {"role": "tool", "tool_call_id": "call_new", "content": "new result"},
+            ],
+        }
+
+        second_prepared = prepare_upstream_request(
+            second_payload,
+            ProxyConfig(missing_reasoning_strategy="recover"),
+            self.store,
+        )
+
+        self.assertEqual(second_prepared.missing_reasoning_messages, 0)
+        self.assertEqual(second_prepared.recovered_reasoning_messages, 0)
+        self.assertEqual(second_prepared.recovery_dropped_messages, 0)
+        self.assertTrue(second_prepared.continued_recovery_boundary)
+        self.assertGreater(second_prepared.retired_prefix_messages, 0)
+        self.assertEqual(
+            second_prepared.payload["messages"][2]["reasoning_content"],
+            "Need the new lookup.",
+        )
+
+
 if __name__ == "__main__":
    unittest.main()