fix(server): honor missing-reasoning reject mode (#34)

2026-05-01 22:32:27 +08:00 · 2026-05-01 22:32:27 +08:00 · 7bdf177e0f
parent be0310751c
commit 7bdf177e0f
5 changed files with 502 additions and 2 deletions
--- a/src/deepseek_cursor_proxy/server.py
+++ b/src/deepseek_cursor_proxy/server.py
@ -106,6 +106,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
            )
        if request_path not in {"/chat/completions", "/v1/chat/completions"}:
            LOG.warning("rejected unsupported POST path=%s status=404", request_path)
            self._record_request_body_for_trace(trace)
            self._send_json(
                404,
                {"error": {"message": "Only /v1/chat/completions is supported"}},
@ -119,6 +120,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
                "rejected request path=%s status=401 reason=missing_bearer_token",
                request_path,
            )
            self._record_request_body_for_trace(trace)
            self._send_json(
                401,
                {"error": {"message": "Missing Authorization bearer token"}},
@ -161,7 +163,10 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
        if trace is not None:
            trace.record_transform(prepared)
        log_context_summary(prepared)
-        if prepared.missing_reasoning_messages:
+        if (
            prepared.missing_reasoning_messages
            and self.config.missing_reasoning_strategy == "reject"
        ):
            LOG.warning(
                (
                    "strict missing-reasoning mode rejected request path=%s "
@ -470,6 +475,32 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
            raise ValueError("Request body must be a JSON object")
        return payload
    def _record_request_body_for_trace(self, trace: TraceRequest | None) -> None:
        if trace is None:
            return
        try:
            length = int(self.headers.get("Content-Length") or 0)
        except ValueError:
            trace.record_cursor_body_omitted(reason="invalid_content_length")
            return
        if length < 0:
            trace.record_cursor_body_omitted(
                reason="invalid_content_length", body_bytes=length
            )
            return
        if length > self.config.max_request_body_bytes:
            trace.record_cursor_body_omitted(reason="body_too_large", body_bytes=length)
            self.close_connection = True
            return
        try:
            raw_body = self.rfile.read(length)
        except OSError as exc:
            trace.record_cursor_body_omitted(
                reason=f"read_failed:{exc}", body_bytes=length
            )
            return
        trace.record_cursor_body_bytes(raw_body)
    def _upstream_headers(self, stream: bool, authorization: str) -> dict[str, str]:
        headers = {
            "Authorization": authorization,
--- a/src/deepseek_cursor_proxy/trace.py
+++ b/src/deepseek_cursor_proxy/trace.py
@ -206,7 +206,7 @@ class TraceWriter:
                "pid": os.getpid(),
                "base_dir": str(self.base_dir),
                "session_dir": str(self.session_dir),
-                "format": "one JSON file per proxied POST request",
+                "format": "one JSON file per traced POST request",
            },
        )
@ -224,6 +224,26 @@ class TraceRequest:
        self.data["request"]["body"] = payload
        self.data["request"]["summary"] = payload_summary(payload)
    def record_cursor_body_bytes(self, body: bytes) -> None:
        self.data["request"]["body_bytes"] = len(body)
        text = body.decode("utf-8", errors="replace")
        try:
            payload = json.loads(text)
        except json.JSONDecodeError:
            self.data["request"]["body"] = {"text": text}
            return
        self.data["request"]["body"] = payload
        if isinstance(payload, dict):
            self.data["request"]["summary"] = payload_summary(payload)
    def record_cursor_body_omitted(
        self, *, reason: str, body_bytes: int | None = None
    ) -> None:
        omitted: dict[str, Any] = {"reason": reason}
        if body_bytes is not None:
            omitted["body_bytes"] = body_bytes
        self.data["request"]["body_omitted"] = omitted
    def record_transform(self, prepared: Any) -> None:
        self.data["transform"] = {
            "original_model": prepared.original_model,
--- a/tests/test_protocol.py
+++ b/tests/test_protocol.py
@ -658,6 +658,43 @@ class RecoveryTests(_StrictUpstreamCase):
                continue
            self.assertNotIn("deepseek-cursor-proxy", message.get("content", ""))
    def test_recover_mode_does_not_short_circuit_with_409(self) -> None:
        """In `recover` mode, a payload with no user message leaves the
        recovery loop unable to drop anything (`dropped_messages == 0`),
        so `missing_indexes` stays populated. The proxy must NOT 409 in
        that case — it must forward to upstream and relay whatever
        DeepSeek decides. 409 is reserved for `reject` mode."""
        status, _ = _post(
            f"{self.proxy.url}/v1/chat/completions",
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "system", "content": "Be brief."},
                    {
                        "role": "assistant",
                        "content": "",
                        "tool_calls": [
                            {
                                "id": CALL_ID_1,
                                "type": "function",
                                "function": {"name": "get_date", "arguments": "{}"},
                            }
                        ],
                    },
                    {
                        "role": "tool",
                        "tool_call_id": CALL_ID_1,
                        "content": "2026-04-24",
                    },
                ],
            },
        )
        # Strict upstream rejects the missing-reasoning history with 400.
        # The point of this test is the proxy did NOT pre-empt with 409.
        self.assertNotEqual(status, 409)
        self.assertEqual(status, 400)
        self.assertEqual(len(StrictFakeDeepSeek.requests), 1)
 # ---------------------------------------------------------------------------
 # Streaming behaviour
--- a/tests/test_trace.py
+++ b/tests/test_trace.py
@ -11,6 +11,7 @@ import threading
 from tempfile import TemporaryDirectory
 import time
 import unittest
 from urllib.error import HTTPError
 from urllib.request import Request, urlopen
 from deepseek_cursor_proxy.config import ProxyConfig
@ -207,6 +208,36 @@ class TraceIntegrationTests(unittest.TestCase):
        with urlopen(request, timeout=5) as response:
            return json.loads(response.read())
    def test_traces_unsupported_post_path_with_body(self) -> None:
        request = Request(
            f"{self.proxy.url}/v1/summarize",
            data=json.dumps(
                {
                    "model": "gpt-4o-mini",
                    "messages": [{"role": "user", "content": "summarize"}],
                }
            ).encode("utf-8"),
            method="POST",
            headers={
                "Authorization": "Bearer sk-from-cursor",
                "Content-Type": "application/json",
            },
        )
        with self.assertRaises(HTTPError) as captured:
            urlopen(request, timeout=5)
        self.assertEqual(captured.exception.code, 404)
        captured.exception.read()
        trace = _read_single_trace(self.writer.session_dir)
        self.assertEqual(trace["request"]["method"], "POST")
        self.assertEqual(trace["request"]["path"], "/v1/summarize")
        self.assertEqual(trace["request"]["body"]["model"], "gpt-4o-mini")
        self.assertEqual(trace["request"]["summary"]["model"], "gpt-4o-mini")
        self.assertEqual(trace["completion"]["status"], "rejected")
        self.assertEqual(trace["completion"]["http_status"], 404)
        self.assertEqual(trace["transform"], {})
        self.assertEqual(_CannedUpstream.requests, [])
    def test_captures_non_streaming_replay_without_api_key(self) -> None:
        self._post(
            {
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@ -23,12 +23,26 @@ from deepseek_cursor_proxy.transform import (
    extract_text_content,
    normalize_reasoning_effort,
    prepare_upstream_request,
    reasoning_cache_namespace,
    rewrite_response_body,
    strip_cursor_thinking_blocks,
    strip_recovery_notice_for_upstream,
 )
 def _default_cache_namespace() -> str:
    return reasoning_cache_namespace(
        ProxyConfig(),
        "deepseek-v4-pro",
        {"type": "enabled"},
        "high",
    )
 def _cache_scope(messages: list[dict]) -> str:
    return conversation_scope(messages, _default_cache_namespace())
 class ContentHelpersTests(unittest.TestCase):
    def test_extract_text_content_flattens_multipart_array(self) -> None:
        content = [
@ -334,5 +348,372 @@ class ResponseRewriteTests(unittest.TestCase):
        self.assertEqual(usage["prompt_cache_miss_tokens"], 4)
 class CrossModeAndModelTests(unittest.TestCase):
    """Regression coverage for PR #28's cross-mode/model context preservation
    (Pro↔Flash family normalization, portable turn-scoped keys, recovery
    boundary continuation). Originally shipped with PR #28 in test_transform.py
    and dropped by PR #33's test refactor; restored from commit 5f14da3."""
    def setUp(self) -> None:
        self.store = ReasoningStore(":memory:")
    def tearDown(self) -> None:
        self.store.close()
    def test_deepseek_pro_and_flash_share_reasoning_namespace(self) -> None:
        config = ProxyConfig()
        namespace_pro = reasoning_cache_namespace(
            config,
            "deepseek-v4-pro",
            {"type": "enabled"},
            "high",
            "Bearer key-a",
        )
        namespace_flash = reasoning_cache_namespace(
            config,
            "deepseek-v4-flash",
            {"type": "enabled"},
            "high",
            "Bearer key-a",
        )
        self.assertEqual(namespace_pro, namespace_flash)
        prior = [{"role": "user", "content": "read README"}]
        tool_call = {
            "id": "call_shared",
            "type": "function",
            "function": {
                "name": "read_file",
                "arguments": '{"path":"README.md"}',
            },
        }
        self.store.store_assistant_message(
            {
                "role": "assistant",
                "content": "",
                "reasoning_content": "Shared DeepSeek reasoning.",
                "tool_calls": [tool_call],
            },
            conversation_scope(prior, namespace_pro),
            namespace_pro,
            prior,
        )
        prepared = prepare_upstream_request(
            {
                "model": "deepseek-v4-flash",
                "messages": [
                    *prior,
                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
                ],
            },
            config,
            self.store,
            authorization="Bearer key-a",
        )
        self.assertEqual(prepared.missing_reasoning_messages, 0)
        self.assertEqual(
            prepared.payload["messages"][1]["reasoning_content"],
            "Shared DeepSeek reasoning.",
        )
    def test_strict_hit_backfills_portable_cache_for_mode_switch(self) -> None:
        agent_prior = [
            {"role": "system", "content": "Agent mode."},
            {"role": "user", "content": "set up the task"},
            {"role": "user", "content": "read README"},
        ]
        plan_prior = [
            {"role": "system", "content": "Plan mode."},
            {"role": "user", "content": "set up the task"},
            {"role": "user", "content": "read README"},
        ]
        tool_call = {
            "id": "call_mode_switch",
            "type": "function",
            "function": {"name": "read_file", "arguments": '{"path":"README.md"}'},
        }
        assistant_message = {
            "role": "assistant",
            "content": "",
            "reasoning_content": "Need README before answering.",
            "tool_calls": [tool_call],
        }
        # Store under Agent scope only — no portable aliases yet.
        self.store.store_assistant_message(
            assistant_message,
            _cache_scope(agent_prior),
        )
        # Agent re-request: strict scope hit, should backfill portable.
        strict_prepared = prepare_upstream_request(
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    *agent_prior,
                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
                ],
            },
            ProxyConfig(),
            self.store,
        )
        # Plan re-request: scope changed (different system prompt) but the
        # turn signature still matches, so the portable alias hits.
        portable_prepared = prepare_upstream_request(
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    *plan_prior,
                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
                ],
            },
            ProxyConfig(),
            self.store,
        )
        self.assertEqual(strict_prepared.patched_reasoning_messages, 1)
        self.assertEqual(portable_prepared.patched_reasoning_messages, 1)
        self.assertEqual(portable_prepared.missing_reasoning_messages, 0)
        self.assertEqual(
            portable_prepared.payload["messages"][3]["reasoning_content"],
            "Need README before answering.",
        )
        self.assertTrue(
            str(portable_prepared.reasoning_diagnostics[-1]["hit_kind"]).startswith(
                "portable_"
            )
        )
    def test_portable_turn_cache_restores_final_assistant_after_tool_result(
        self,
    ) -> None:
        agent_user = {"role": "user", "content": "look up project state"}
        plan_user = dict(agent_user)
        tool_call = {
            "id": "call_project_state",
            "type": "function",
            "function": {"name": "lookup", "arguments": '{"query":"state"}'},
        }
        tool_result = {
            "role": "tool",
            "tool_call_id": "call_project_state",
            "content": '{"state":"ready"}',
        }
        tool_assistant = {
            "role": "assistant",
            "content": "",
            "reasoning_content": "Need the project state.",
            "tool_calls": [tool_call],
        }
        final_assistant = {
            "role": "assistant",
            "content": "The project is ready.",
            "reasoning_content": "The tool result is enough to answer.",
        }
        agent_initial_prior = [
            {"role": "system", "content": "Agent mode."},
            agent_user,
        ]
        agent_final_prior = [*agent_initial_prior, tool_assistant, tool_result]
        self.store.store_assistant_message(
            tool_assistant,
            _cache_scope(agent_initial_prior),
            _default_cache_namespace(),
            agent_initial_prior,
        )
        self.store.store_assistant_message(
            final_assistant,
            _cache_scope(agent_final_prior),
            _default_cache_namespace(),
            agent_final_prior,
        )
        prepared = prepare_upstream_request(
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "system", "content": "Plan mode."},
                    plan_user,
                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
                    tool_result,
                    {"role": "assistant", "content": "The project is ready."},
                    {"role": "user", "content": "continue"},
                ],
            },
            ProxyConfig(missing_reasoning_strategy="reject"),
            self.store,
        )
        self.assertEqual(prepared.missing_reasoning_messages, 0)
        self.assertEqual(prepared.patched_reasoning_messages, 2)
        self.assertEqual(
            prepared.payload["messages"][4]["reasoning_content"],
            "The tool result is enough to answer.",
        )
    def test_portable_turn_cache_isolated_for_reused_tool_call_id(self) -> None:
        # Two different conversations both happen to reuse the same
        # tool_call.id. Cache must NOT cross-contaminate.
        tool_call = {
            "id": "call_reused",
            "type": "function",
            "function": {"name": "lookup", "arguments": "{}"},
        }
        assistant_a = {
            "role": "assistant",
            "content": "",
            "reasoning_content": "Reasoning for thread A.",
            "tool_calls": [tool_call],
        }
        assistant_b = {
            "role": "assistant",
            "content": "",
            "reasoning_content": "Reasoning for thread B.",
            "tool_calls": [tool_call],
        }
        prior_a = [
            {"role": "system", "content": "Agent mode."},
            {"role": "user", "content": "thread A"},
        ]
        prior_b = [
            {"role": "system", "content": "Agent mode."},
            {"role": "user", "content": "thread B"},
        ]
        self.store.store_assistant_message(
            assistant_a,
            _cache_scope(prior_a),
            _default_cache_namespace(),
            prior_a,
        )
        self.store.store_assistant_message(
            assistant_b,
            _cache_scope(prior_b),
            _default_cache_namespace(),
            prior_b,
        )
        # Plan-mode replay of thread A — should retrieve A's reasoning, not B's.
        prepared = prepare_upstream_request(
            {
                "model": "deepseek-v4-pro",
                "messages": [
                    {"role": "system", "content": "Plan mode."},
                    {"role": "user", "content": "thread A"},
                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
                ],
            },
            ProxyConfig(),
            self.store,
        )
        self.assertEqual(
            prepared.payload["messages"][2]["reasoning_content"],
            "Reasoning for thread A.",
        )
    def test_recovered_response_is_recorded_under_pre_recovery_scope(self) -> None:
        old_tool_call = {
            "id": "call_old",
            "type": "function",
            "function": {
                "name": "read_file",
                "arguments": '{"path":"README.md"}',
            },
        }
        new_tool_call = {
            "id": "call_new",
            "type": "function",
            "function": {"name": "lookup", "arguments": '{"query":"new"}'},
        }
        first_payload = {
            "model": "deepseek-v4-pro",
            "messages": [
                {"role": "user", "content": "old model turn"},
                {"role": "assistant", "content": "", "tool_calls": [old_tool_call]},
                {"role": "tool", "tool_call_id": "call_old", "content": "old result"},
                {"role": "user", "content": "continue with DeepSeek"},
            ],
        }
        first_recovered = prepare_upstream_request(
            first_payload,
            ProxyConfig(missing_reasoning_strategy="recover"),
            self.store,
        )
        self.assertEqual(first_recovered.recovered_reasoning_messages, 1)
        # Simulate DeepSeek's response to the recovered request.
        response_body = json.dumps(
            {
                "id": "chatcmpl-test",
                "object": "chat.completion",
                "model": "deepseek-v4-pro",
                "choices": [
                    {
                        "index": 0,
                        "finish_reason": "tool_calls",
                        "message": {
                            "role": "assistant",
                            "content": "",
                            "reasoning_content": "Need the new lookup.",
                            "tool_calls": [new_tool_call],
                        },
                    }
                ],
            }
        ).encode()
        rewritten = rewrite_response_body(
            response_body,
            "deepseek-v4-pro",
            self.store,
            first_recovered.payload["messages"],
            first_recovered.cache_namespace,
            content_prefix=first_recovered.recovery_notice,
            recording_contexts=first_recovered.record_response_contexts,
        )
        recovered_assistant = json.loads(rewritten)["choices"][0]["message"]
        # Reasoning must be recorded under BOTH scopes — pre-recovery (so
        # subsequent Cursor requests echoing the with-prefix history hit) and
        # post-recovery (so an immediate continuation also hits).
        self.assertEqual(len(first_recovered.record_response_contexts), 2)
        for scope, _messages in first_recovered.record_response_contexts:
            self.assertEqual(
                self.store.get(
                    f"scope:{scope}:signature:{message_signature(recovered_assistant)}"
                ),
                "Need the new lookup.",
            )
        recovered_assistant.pop("reasoning_content", None)
        # Cursor's next request echoes the recovered assistant + tool result.
        # The proxy should detect the recovery boundary, retire the prefix,
        # and continue cleanly without recovering again.
        second_payload = {
            "model": "deepseek-v4-pro",
            "messages": [
                *first_payload["messages"],
                recovered_assistant,
                {"role": "tool", "tool_call_id": "call_new", "content": "new result"},
            ],
        }
        second_prepared = prepare_upstream_request(
            second_payload,
            ProxyConfig(missing_reasoning_strategy="recover"),
            self.store,
        )
        self.assertEqual(second_prepared.missing_reasoning_messages, 0)
        self.assertEqual(second_prepared.recovered_reasoning_messages, 0)
        self.assertEqual(second_prepared.recovery_dropped_messages, 0)
        self.assertTrue(second_prepared.continued_recovery_boundary)
        self.assertGreater(second_prepared.retired_prefix_messages, 0)
        self.assertEqual(
            second_prepared.payload["messages"][2]["reasoning_content"],
            "Need the new lookup.",
        )
 if __name__ == "__main__":
    unittest.main()