From 6b2c6e42c147452b339d48050558a28f54ee6217 Mon Sep 17 00:00:00 2001 From: Yixing Lao Date: Sun, 26 Apr 2026 16:43:04 +0800 Subject: [PATCH] feat(reasoning): add recover strategy for missing reasoning content (#13) --- README.md | 26 +- src/deepseek_cursor_proxy/config.py | 71 ++++-- src/deepseek_cursor_proxy/server.py | 178 ++++++++++---- src/deepseek_cursor_proxy/transform.py | 175 +++++++++++--- tests/test_config.py | 49 +++- tests/test_proxy_end_to_end.py | 117 +++++++-- tests/test_server.py | 28 +++ tests/test_transform.py | 317 +++++++++++++++++++++++-- 8 files changed, 813 insertions(+), 148 deletions(-) diff --git a/README.md b/README.md index a6eb35d..d818952 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Compatibility proxy connecting Cursor to DeepSeek thinking models (`deepseek-v4- ## What It Does -- ✅ Caches DeepSeek `reasoning_content` from regular and streamed responses, then restores it on later tool-call turns when Cursor omits it. If the exact original reasoning is unavailable, the proxy fails closed instead of sending a fake placeholder. See [DeepSeek docs](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) for more details. +- ✅ Caches DeepSeek `reasoning_content` from regular and streamed responses, then restores it on later tool-call turns when Cursor omits it. If old or mixed-model chat history cannot be repaired exactly, the proxy can recover by continuing from recent context and showing a small Cursor-visible notice. See [DeepSeek docs](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) for more details. - ✅ Mirrors streamed `reasoning_content` into Cursor-visible `...` text so that thinking tokens are shown in Cursor's UI. For BYOK/proxy mode, Cursor renders this as normal text, not as a native collapsible thinking block. - ✅ Starts an ngrok tunnel so Cursor can reach the local proxy through a public HTTPS URL. - ✅ Provides other compatibility fixes to make DeepSeek models run well in Cursor. @@ -129,7 +129,7 @@ Select `deepseek-v4-pro` in Cursor and use chat or agent mode as usual. DeepSeek's [thinking mode](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) requires `reasoning_content` from assistant messages in tool-call sequences to be passed back in later requests. Cursor may omit this field, causing DeepSeek to return a 400 error. This proxy sits between Cursor and DeepSeek (`Cursor → ngrok → proxy → DeepSeek API`) and repairs requests when it has the exact original reasoning cached. -- Core fix: every DeepSeek response, streaming or non-streaming, has its `reasoning_content` stored in a local SQLite cache keyed by message signature, tool-call ID, and tool-call function signature. On outgoing thinking-mode requests, the proxy restores missing `reasoning_content` for tool-call-related assistant messages and sends the complete history to DeepSeek. If the cache is cold, such as after a proxy restart, it returns a local error instead of fabricating reasoning. +- Core fix: every DeepSeek response, streaming or non-streaming, has its `reasoning_content` stored in a local SQLite cache keyed by message signature, tool-call ID, and tool-call function signature. On outgoing thinking-mode requests, the proxy restores missing `reasoning_content` for tool-call-related assistant messages and sends the complete history to DeepSeek. If the cache is cold, such as after a proxy restart or model switch, the default recovery mode omits older unrecoverable tool-call history, continues from the latest user request, logs the recovery, and prefixes the next Cursor response with a small notice. - Multi-conversation isolation: cache keys are scoped by a SHA-256 hash of the canonical conversation prefix (roles, content, tool calls, excluding `reasoning_content`) plus the upstream model/configuration and an API-key hash. Concurrent or interleaved threads with different histories get different scopes, so reused tool-call IDs do not collide. Byte-identical cloned histories are indistinguishable unless Cursor sends a differentiating history. - DeepSeek [prefix caching](https://api-docs.deepseek.com/guides/kv_cache) compatibility: the proxy does not inject synthetic thread IDs, timestamps, or cache-control messages into the prompt. When it restores cached reasoning, it restores the exact original string, preserving repeated prefixes for DeepSeek's automatic best-effort context cache. - Additional compatibility fixes: the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases per DeepSeek docs, strips mirrored `` blocks from assistant content, converts multi-part content arrays to plain text, logs DeepSeek prompt-cache usage when available, and mirrors `reasoning_content` into Cursor-visible `...` blocks for thinking display. @@ -150,15 +150,25 @@ Run without ngrok for local curl testing: PROXY_NGROK=false deepseek-cursor-proxy --port 9000 --verbose ``` -If Cursor shows `missing_reasoning_content`, the current chat contains thinking-mode tool-call history whose original DeepSeek `reasoning_content` is not in the local cache. This commonly happens when continuing an older chat after a proxy restart, cache clear, or cache format/config change. The local 409 response includes a diagnostic placeholder so the cause is visible, but that placeholder is not forwarded to DeepSeek in the default safe mode. Start a new chat, or retry from the original tool-call turn while the proxy is running so it can capture the reasoning. +If the current chat contains thinking-mode tool-call history whose original DeepSeek `reasoning_content` is not in the local cache, the default `recover` mode avoids hard failure by dropping older unrecoverable tool-call history, forwarding the latest user request with a system recovery note, logging what happened, and prefixing the next assistant response with: -For debugging an old Cursor history, you can opt into a non-compliant compatibility fallback: - -```bash -deepseek-cursor-proxy --verbose --missing-reasoning-strategy placeholder +```text +[deepseek-cursor-proxy] Recovered this DeepSeek chat because older tool-call reasoning was unavailable; continuing with recent context only. ``` -This inserts a loud placeholder into missing `reasoning_content` fields and forwards the request. It may still be rejected by DeepSeek and should not be used for normal work. +This commonly happens when continuing an older chat after a proxy restart, cache clear, cache format/config change, or switching from another model into DeepSeek. If you run strict debugging mode, the proxy returns `missing_reasoning_content` instead of recovering and the error message tells you to switch back to recover mode. + +The recovery strategy is not a config-file setting. For strict DeepSeek API behavior while debugging, pass the runtime flag: + +```bash +deepseek-cursor-proxy --verbose --missing-reasoning-strategy reject +``` + +To turn automatic recovery back on, restart without that flag or pass: + +```bash +deepseek-cursor-proxy --verbose --missing-reasoning-strategy recover +``` Use another config file: diff --git a/src/deepseek_cursor_proxy/config.py b/src/deepseek_cursor_proxy/config.py index 307da71..8d7e3fb 100644 --- a/src/deepseek_cursor_proxy/config.py +++ b/src/deepseek_cursor_proxy/config.py @@ -15,7 +15,10 @@ REASONING_CONTENT_FILE_NAME = "reasoning_content.sqlite3" TRUE_VALUES = {"1", "true", "yes", "on"} FALSE_VALUES = {"0", "false", "no", "off"} MISSING = object() -DEFAULT_CONFIG_TEXT = """# This file was created automatically at ~/.deepseek-cursor-proxy/config.yaml. +DEFAULT_CONFIG_HEADER = ( + "# This file was created automatically at ~/.deepseek-cursor-proxy/config.yaml." +) +DEFAULT_CONFIG_TEXT = f"""{DEFAULT_CONFIG_HEADER} # API keys are read from Cursor's Authorization header and forwarded upstream. # `model` is the fallback when a request has no model; Cursor's requested @@ -35,7 +38,6 @@ max_request_body_bytes: 20971520 cors: false reasoning_content_path: reasoning_content.sqlite3 -missing_reasoning_strategy: reject reasoning_cache_max_age_seconds: 604800 reasoning_cache_max_rows: 10000 """ @@ -76,6 +78,42 @@ def load_config_file(config_path: str | Path) -> dict[str, Any]: return dict(loaded) +def migrate_default_config_file( + settings: dict[str, Any], + config_path: Path, + live_env: Mapping[str, str], + original_config_path: str | Path | None, +) -> dict[str, Any]: + if original_config_path is not None: + return settings + if "DEEPSEEK_CURSOR_PROXY_CONFIG_PATH" in live_env: + return settings + if config_path != default_config_path(): + return settings + if "missing_reasoning_strategy" not in settings: + return settings + + migrated = dict(settings) + migrated.pop("missing_reasoning_strategy", None) + try: + text = config_path.read_text(encoding="utf-8") + except OSError: + return migrated + if not text.startswith(DEFAULT_CONFIG_HEADER): + return settings + + updated_lines = [ + line + for line in text.splitlines() + if not line.strip().startswith("missing_reasoning_strategy:") + ] + updated_text = "\n".join(updated_lines) + "\n" + if updated_text != text: + config_path.write_text(updated_text, encoding="utf-8") + config_path.chmod(0o600) + return migrated + + def resolve_config_path( env: Mapping[str, str] | None, config_path: str | Path | None ) -> Path: @@ -150,6 +188,7 @@ def settings_and_env( env: Mapping[str, str] | None, config_path: str | Path | None ) -> tuple[dict[str, Any], dict[str, str], Path]: live_env = dict(os.environ if env is None else env) + original_config_path = config_path config_path = resolve_config_path(live_env, config_path) if ( config_path == default_config_path() @@ -157,7 +196,14 @@ def settings_and_env( and not config_path.exists() ): populate_default_config_file(config_path) - return load_config_file(config_path), live_env, config_path + settings = load_config_file(config_path) + settings = migrate_default_config_file( + settings, + config_path, + live_env, + original_config_path, + ) + return settings, live_env, config_path @dataclass(frozen=True) @@ -172,7 +218,7 @@ class ProxyConfig: request_timeout: float = 300.0 max_request_body_bytes: int = 20 * 1024 * 1024 reasoning_content_path: Path = field(default_factory=default_reasoning_content_path) - missing_reasoning_strategy: str = "reject" + missing_reasoning_strategy: str = "recover" reasoning_cache_max_age_seconds: int = 7 * 24 * 60 * 60 reasoning_cache_max_rows: int = 10000 cursor_display_reasoning: bool = True @@ -207,22 +253,6 @@ class ProxyConfig: if thinking not in {"enabled", "disabled", "pass-through"}: thinking = "enabled" - missing_reasoning_strategy = ( - as_str( - setting_value( - settings, - live_env, - "missing_reasoning_strategy", - "MISSING_REASONING_STRATEGY", - ), - "reject", - ) - .strip() - .lower() - ) - if missing_reasoning_strategy not in {"reject", "placeholder"}: - missing_reasoning_strategy = "reject" - return cls( host=as_str( setting_value( @@ -307,7 +337,6 @@ class ProxyConfig: default_reasoning_content_path(), config_dir, ), - missing_reasoning_strategy=missing_reasoning_strategy, reasoning_cache_max_age_seconds=as_int( setting_value( settings, diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py index a30e8b8..04d5e91 100644 --- a/src/deepseek_cursor_proxy/server.py +++ b/src/deepseek_cursor_proxy/server.py @@ -3,6 +3,7 @@ from __future__ import annotations import argparse from dataclasses import replace import gzip +from http.client import HTTPException import json import logging from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer @@ -24,7 +25,7 @@ from .reasoning_store import ReasoningStore, conversation_scope from .streaming import CursorReasoningDisplayAdapter, StreamAccumulator from .tunnel import NgrokTunnel, local_tunnel_target from .transform import ( - PLACEHOLDER_REASONING_CONTENT, + RECOVERY_NOTICE_CONTENT, prepare_upstream_request, rewrite_response_body, ) @@ -137,22 +138,32 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): "restored reasoning_content on %s assistant message(s)", prepared.patched_reasoning_messages, ) - if prepared.placeholder_reasoning_messages: + if prepared.recovered_reasoning_messages: + if prepared.recovery_notice: + LOG.warning( + ( + "recovered request because cached reasoning_content was " + "unavailable for %s assistant message(s); omitted %s " + "older message(s) from forwarded history and will show " + "a Cursor notice" + ), + prepared.recovered_reasoning_messages, + prepared.recovery_dropped_messages, + ) + else: + LOG.info( + ( + "continued recovered request; omitted %s old message(s) " + "before the prior recovery boundary" + ), + prepared.recovery_dropped_messages, + ) + if prepared.missing_reasoning_messages: LOG.warning( ( - "inserted placeholder reasoning_content on %s assistant " - "message(s); this is compatibility mode and may still be " - "rejected by DeepSeek" + "strict missing-reasoning mode rejected request path=%s " + "status=409 reason=missing_reasoning_content count=%s" ), - prepared.placeholder_reasoning_messages, - ) - if prepared.missing_reasoning_messages: - diagnostic_placeholder = ( - f"{PLACEHOLDER_REASONING_CONTENT} " - "[not sent upstream because missing_reasoning_strategy=reject]" - ) - LOG.warning( - "rejected request path=%s status=409 reason=missing_reasoning_content count=%s", request_path, prepared.missing_reasoning_messages, ) @@ -161,26 +172,28 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): { "error": { "message": ( - "Missing cached DeepSeek reasoning_content for a " - f"thinking-mode tool-call history on " + "deepseek-cursor-proxy is running in strict " + "missing-reasoning mode and cannot automatically " + "recover this thinking-mode tool-call history because " + "cached DeepSeek reasoning_content is missing for " f"{prepared.missing_reasoning_messages} assistant " - "message(s). This usually means the chat has tool-call " - "turns that were not captured by this proxy/cache. Start " - "a new chat or retry from the original tool-call turn." + "message(s). Restart without " + "`--missing-reasoning-strategy reject`, or pass " + "`--missing-reasoning-strategy recover`, so the proxy " + "can recover from partial chat history automatically." ), "type": "missing_reasoning_content", "code": "missing_reasoning_content", "missing_reasoning_messages": prepared.missing_reasoning_messages, - "diagnostic_placeholder": diagnostic_placeholder, } }, ) return LOG.info( - "deepseek send: %s patched=%s placeholder=%s", + "deepseek send: %s patched=%s recovered=%s", compact_request_stats(prepared.payload), prepared.patched_reasoning_messages, - prepared.placeholder_reasoning_messages, + prepared.recovered_reasoning_messages, ) if self.config.verbose: @@ -252,6 +265,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): prepared.original_model, prepared.payload["messages"], prepared.cache_namespace, + prepared.recovery_notice, ) else: sent_response = self._proxy_regular_response( @@ -259,19 +273,21 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): prepared.original_model, prepared.payload["messages"], prepared.cache_namespace, + prepared.recovery_notice, ) if not sent_response: return LOG.info( ( "request complete status=%s stream=%s elapsed_ms=%s " - "patched_reasoning=%s missing_reasoning=%s" + "patched_reasoning=%s missing_reasoning=%s recovered_reasoning=%s" ), upstream_status, bool(prepared.payload.get("stream")), elapsed_ms(started), prepared.patched_reasoning_messages, prepared.missing_reasoning_messages, + prepared.recovered_reasoning_messages, ) def _cursor_authorization(self) -> str | None: @@ -419,6 +435,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): original_model: str, request_messages: list[dict[str, Any]], cache_namespace: str, + recovery_notice: str | None = None, ) -> bool: body = read_response_body(response) try: @@ -428,6 +445,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): self.reasoning_store, request_messages, cache_namespace, + content_prefix=recovery_notice, ) except (json.JSONDecodeError, UnicodeDecodeError) as exc: LOG.warning("failed to rewrite upstream JSON response: %s", exc) @@ -457,6 +475,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): original_model: str, request_messages: list[dict[str, Any]], cache_namespace: str, + recovery_notice: str | None = None, ) -> bool: sent_headers = self._send_response_headers( getattr(response, "status", 200), @@ -479,12 +498,22 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): ) scope = conversation_scope(request_messages, cache_namespace) finalized = False + pending_recovery_notice = recovery_notice while True: - line = response.readline() + try: + line = response.readline() + except (HTTPException, OSError) as exc: + LOG.warning("upstream streaming response read failed: %s", exc) + return False if not line: break - rewritten, finalized = self._rewrite_sse_line( - line, original_model, accumulator, scope, display_adapter + rewritten, finalized, pending_recovery_notice = self._rewrite_sse_line( + line, + original_model, + accumulator, + scope, + display_adapter, + pending_recovery_notice, ) if not self._write_to_client( rewritten, "sending streaming response chunk", flush=True @@ -508,10 +537,11 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): accumulator: StreamAccumulator, scope: str, display_adapter: CursorReasoningDisplayAdapter | None, - ) -> tuple[bytes, bool]: + recovery_notice: str | None = None, + ) -> tuple[bytes, bool, str | None]: stripped = line.strip() if not stripped.startswith(b"data:"): - return line, False + return line, False, recovery_notice data = stripped[len(b"data:") :].strip() if data == b"[DONE]": @@ -520,19 +550,30 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): stored = accumulator.store_reasoning(self.reasoning_store, scope) if stored: LOG.info("stored %s streaming reasoning cache key(s)", stored) + prefix = b"" if display_adapter is None: - return b"data: [DONE]\n\n", True + if recovery_notice: + prefix += sse_data( + recovery_notice_chunk(original_model, recovery_notice) + ) + return prefix + b"data: [DONE]\n\n", True, None closing_chunk = display_adapter.flush_chunk(original_model) - if closing_chunk is None: - return b"data: [DONE]\n\n", True - return sse_data(closing_chunk) + b"data: [DONE]\n\n", True + if closing_chunk is not None: + prefix += sse_data(closing_chunk) + if recovery_notice: + prefix += sse_data( + recovery_notice_chunk(original_model, recovery_notice) + ) + return prefix + b"data: [DONE]\n\n", True, None try: chunk = json.loads(data.decode("utf-8")) except (json.JSONDecodeError, UnicodeDecodeError): - return line, False + return line, False, recovery_notice if isinstance(chunk, dict): + if recovery_notice and inject_recovery_notice(chunk, recovery_notice): + recovery_notice = None accumulator.ingest_chunk(chunk) stored = accumulator.store_ready_reasoning(self.reasoning_store, scope) if stored: @@ -544,13 +585,17 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler): chunk["model"] = original_model ending = b"\r\n" if line.endswith(b"\r\n") else b"\n" return ( - b"data: " - + json.dumps(chunk, ensure_ascii=False, separators=(",", ":")).encode( - "utf-8" - ) - + ending - ), False - return line, False + ( + b"data: " + + json.dumps( + chunk, ensure_ascii=False, separators=(",", ":") + ).encode("utf-8") + + ending + ), + False, + recovery_notice, + ) + return line, False, recovery_notice def build_arg_parser() -> argparse.ArgumentParser: @@ -602,10 +647,10 @@ def build_arg_parser() -> argparse.ArgumentParser: ) parser.add_argument( "--missing-reasoning-strategy", - choices=["reject", "placeholder"], + choices=["recover", "reject"], help=( - "What to do when required reasoning_content is missing: reject " - "(safe default) or placeholder (unsafe compatibility fallback)" + "What to do when required reasoning_content is missing: " + "recover (friendly default) or reject (strict debugging mode)" ), ) parser.add_argument( @@ -740,6 +785,45 @@ def sse_data(payload: dict[str, Any]) -> bytes: ) +def inject_recovery_notice(chunk: dict[str, Any], notice: str) -> bool: + choices = chunk.get("choices") + if not isinstance(choices, list): + return False + for choice in choices: + if not isinstance(choice, dict): + continue + delta = choice.get("delta") + if not isinstance(delta, dict): + continue + if "content" not in delta and not delta.get("tool_calls"): + continue + existing_content = delta.get("content") + delta["content"] = notice + ( + existing_content if isinstance(existing_content, str) else "" + ) + return True + return False + + +def recovery_notice_chunk( + model: str, + notice: str = RECOVERY_NOTICE_CONTENT, +) -> dict[str, Any]: + return { + "id": "chatcmpl-deepseek-cursor-proxy-recovery", + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": model, + "choices": [ + { + "index": 0, + "delta": {"content": notice}, + "finish_reason": None, + } + ], + } + + def summarize_chat_payload(payload: dict[str, Any]) -> str: messages = payload.get("messages") tools = payload.get("tools") @@ -841,14 +925,6 @@ def main(argv: list[str] | None = None) -> int: config.missing_reasoning_strategy, config.reasoning_content_path, ) - if config.missing_reasoning_strategy == "placeholder": - LOG.warning( - ( - "missing_reasoning_strategy=placeholder is not DeepSeek-compliant; " - "use only to test old Cursor histories whose original reasoning " - "cannot be recovered" - ) - ) if config.verbose: LOG.info("logging mode=verbose metadata=detailed bodies=true") LOG.warning( diff --git a/src/deepseek_cursor_proxy/transform.py b/src/deepseek_cursor_proxy/transform.py index 06787fa..7ba2eb5 100644 --- a/src/deepseek_cursor_proxy/transform.py +++ b/src/deepseek_cursor_proxy/transform.py @@ -67,11 +67,20 @@ CURSOR_THINKING_BLOCK_RE = re.compile( re.IGNORECASE, ) -PLACEHOLDER_REASONING_CONTENT = ( - "[deepseek-cursor-proxy placeholder reasoning_content: original DeepSeek " - "reasoning_content was missing from Cursor history and unavailable in the " - "local cache. This is an opt-in compatibility fallback, not the original " - "model reasoning.]" +RECOVERY_NOTICE_TEXT = ( + "[deepseek-cursor-proxy] Recovered this DeepSeek chat because older " + "tool-call reasoning was unavailable; continuing with recent context only." +) +LEGACY_RECOVERY_NOTICE_TEXT = ( + "Note: recovered this DeepSeek chat because older tool-call reasoning " + "was unavailable; continuing with recent context only." +) +RECOVERY_NOTICE_CONTENT = f"{RECOVERY_NOTICE_TEXT}\n\n" +RECOVERY_SYSTEM_CONTENT = ( + "deepseek-cursor-proxy recovered this request because older DeepSeek " + "thinking-mode tool-call reasoning_content was unavailable. Older " + "unrecoverable tool-call history was omitted; continue using only the " + "remaining recovered context." ) @@ -82,8 +91,10 @@ class PreparedRequest: upstream_model: str cache_namespace: str patched_reasoning_messages: int - placeholder_reasoning_messages: int missing_reasoning_messages: int + recovered_reasoning_messages: int = 0 + recovery_dropped_messages: int = 0 + recovery_notice: str | None = None def normalize_reasoning_effort(value: Any) -> str: @@ -203,8 +214,7 @@ def normalize_message( cache_namespace: str, repair_reasoning: bool, keep_reasoning: bool, - missing_reasoning_strategy: str, -) -> tuple[dict[str, Any], bool, bool, bool]: +) -> tuple[dict[str, Any], bool, bool]: if not isinstance(message, dict): message = {"role": "user", "content": str(message)} normalized = {key: value for key, value in message.items() if key in MESSAGE_FIELDS} @@ -228,7 +238,6 @@ def normalize_message( ] patched = False - placeholder = False missing = False if normalized["role"] == "assistant": if not keep_reasoning: @@ -249,17 +258,13 @@ def normalize_message( normalized["reasoning_content"] = restored patched = True if needs_reasoning and not patched: - if missing_reasoning_strategy == "placeholder": - normalized["reasoning_content"] = PLACEHOLDER_REASONING_CONTENT - placeholder = True - else: - missing = True + missing = True allowed_fields = ROLE_MESSAGE_FIELDS.get(str(normalized["role"]), MESSAGE_FIELDS) normalized = { key: value for key, value in normalized.items() if key in allowed_fields } - return normalized, patched, placeholder, missing + return normalized, patched, missing def normalize_messages( @@ -268,32 +273,102 @@ def normalize_messages( cache_namespace: str, repair_reasoning: bool, keep_reasoning: bool, - missing_reasoning_strategy: str, -) -> tuple[list[dict[str, Any]], int, int, int]: +) -> tuple[list[dict[str, Any]], int, list[int]]: if not isinstance(messages, list): - return [], 0, 0, 0 + return [], 0, [] normalized_messages: list[dict[str, Any]] = [] patched_count = 0 - placeholder_count = 0 - missing_count = 0 + missing_indexes: list[int] = [] for message in messages: - normalized, patched, placeholder, missing = normalize_message( + normalized, patched, missing = normalize_message( message, store, normalized_messages, cache_namespace, repair_reasoning, keep_reasoning, - missing_reasoning_strategy, ) normalized_messages.append(normalized) if patched: patched_count += 1 - if placeholder: - placeholder_count += 1 if missing: - missing_count += 1 - return normalized_messages, patched_count, placeholder_count, missing_count + missing_indexes.append(len(normalized_messages) - 1) + return normalized_messages, patched_count, missing_indexes + + +def has_recovery_notice(message: dict[str, Any]) -> bool: + content = message.get("content") + return ( + message.get("role") == "assistant" + and isinstance(content, str) + and content.startswith((RECOVERY_NOTICE_TEXT, LEGACY_RECOVERY_NOTICE_TEXT)) + ) + + +def leading_system_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + leading_messages: list[dict[str, Any]] = [] + for message in messages: + if message.get("role") == "system": + leading_messages.append(message) + continue + break + return leading_messages + + +def recover_messages_from_missing_reasoning( + messages: list[dict[str, Any]], + missing_indexes: list[int], +) -> tuple[list[dict[str, Any]], int, str | None]: + recovery_boundary_index = next( + ( + index + for index in range(len(messages) - 1, -1, -1) + if has_recovery_notice(messages[index]) + and any(missing_index < index for missing_index in missing_indexes) + ), + -1, + ) + if recovery_boundary_index != -1: + context_user_index = next( + ( + index + for index in range(recovery_boundary_index - 1, -1, -1) + if messages[index].get("role") == "user" + ), + -1, + ) + leading_messages = leading_system_messages(messages) + recovered_tail = [] + if context_user_index != -1: + recovered_tail.append(messages[context_user_index]) + recovered_tail.extend(messages[recovery_boundary_index:]) + recovered = [ + *leading_messages, + {"role": "system", "content": RECOVERY_SYSTEM_CONTENT}, + *recovered_tail, + ] + kept_context_messages = 1 if context_user_index != -1 else 0 + omitted_messages = ( + recovery_boundary_index - len(leading_messages) - kept_context_messages + ) + return recovered, omitted_messages, None + + last_user_index = next( + ( + index + for index in range(len(messages) - 1, -1, -1) + if messages[index].get("role") == "user" + ), + -1, + ) + if last_user_index == -1: + return messages, 0, None + + recovered = leading_system_messages(messages) + omitted_messages = len(messages) - len(recovered) - 1 + recovered.append({"role": "system", "content": RECOVERY_SYSTEM_CONTENT}) + recovered.append(messages[last_user_index]) + return recovered, omitted_messages, RECOVERY_NOTICE_CONTENT def assistant_needs_reasoning_for_tool_context( @@ -403,14 +478,33 @@ def prepare_upstream_request( prepared.get("reasoning_effort"), authorization, ) - messages, patched_count, placeholder_count, missing_count = normalize_messages( + messages, patched_count, missing_indexes = normalize_messages( payload.get("messages"), store, cache_namespace, repair_reasoning=thinking_enabled, keep_reasoning=not thinking_disabled, - missing_reasoning_strategy=config.missing_reasoning_strategy, ) + recovered_count = 0 + recovery_dropped_messages = 0 + recovery_notice = None + while missing_indexes and config.missing_reasoning_strategy == "recover": + recovered_messages, dropped_messages, notice = ( + recover_messages_from_missing_reasoning(messages, missing_indexes) + ) + if not dropped_messages: + break + recovered_count += len(missing_indexes) + recovery_dropped_messages += dropped_messages + if notice: + recovery_notice = notice + messages, patched_count, missing_indexes = normalize_messages( + recovered_messages, + store, + cache_namespace, + repair_reasoning=thinking_enabled, + keep_reasoning=not thinking_disabled, + ) prepared["messages"] = messages return PreparedRequest( @@ -419,8 +513,10 @@ def prepare_upstream_request( upstream_model=upstream_model, cache_namespace=cache_namespace, patched_reasoning_messages=patched_count, - placeholder_reasoning_messages=placeholder_count, - missing_reasoning_messages=missing_count, + missing_reasoning_messages=len(missing_indexes), + recovered_reasoning_messages=recovered_count, + recovery_dropped_messages=recovery_dropped_messages, + recovery_notice=recovery_notice, ) @@ -452,9 +548,12 @@ def rewrite_response_body( store: ReasoningStore | None, request_messages: list[dict[str, Any]], cache_namespace: str = "", + content_prefix: str | None = None, ) -> bytes: response_payload = json.loads(body.decode("utf-8")) if isinstance(response_payload, dict): + if content_prefix: + prefix_response_content(response_payload, content_prefix) record_response_reasoning( response_payload, store, request_messages, cache_namespace ) @@ -463,3 +562,19 @@ def rewrite_response_body( return json.dumps( response_payload, ensure_ascii=False, separators=(",", ":") ).encode("utf-8") + + +def prefix_response_content(response_payload: dict[str, Any], prefix: str) -> bool: + choices = response_payload.get("choices") + if not isinstance(choices, list): + return False + for choice in choices: + if not isinstance(choice, dict): + continue + message = choice.get("message") + if not isinstance(message, dict): + continue + content = message.get("content") + message["content"] = prefix + (content if isinstance(content, str) else "") + return True + return False diff --git a/tests/test_config.py b/tests/test_config.py index 030c4ed..facd9a2 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -8,6 +8,7 @@ import unittest from unittest.mock import patch from deepseek_cursor_proxy.config import ( + DEFAULT_CONFIG_TEXT, ProxyConfig, default_config_path, default_reasoning_content_path, @@ -43,8 +44,35 @@ class ConfigTests(unittest.TestCase): self.assertIn( "model: deepseek-v4-pro", config_path.read_text(encoding="utf-8") ) + self.assertNotIn( + "missing_reasoning_strategy", + config_path.read_text(encoding="utf-8"), + ) self.assertEqual(stat.S_IMODE(config_path.stat().st_mode), 0o600) self.assertEqual(config.upstream_model, "deepseek-v4-pro") + self.assertEqual(config.missing_reasoning_strategy, "recover") + + def test_legacy_generated_default_config_removes_missing_reasoning_key( + self, + ) -> None: + with TemporaryDirectory() as temp_dir: + home = Path(temp_dir) + + with patch("deepseek_cursor_proxy.config.Path.home", return_value=home): + config_path = default_config_path() + config_path.parent.mkdir(parents=True) + config_path.write_text( + DEFAULT_CONFIG_TEXT + "\nmissing_reasoning_strategy: reject\n", + encoding="utf-8", + ) + + config = ProxyConfig.from_file(env={}, config_path=None) + + self.assertEqual(config.missing_reasoning_strategy, "recover") + self.assertNotIn( + "missing_reasoning_strategy", + config_path.read_text(encoding="utf-8"), + ) def test_missing_explicit_config_file_is_not_populated(self) -> None: with TemporaryDirectory() as temp_dir: @@ -76,6 +104,18 @@ class ConfigTests(unittest.TestCase): self.assertEqual(config.port, 9100) self.assertEqual(config.reasoning_content_path, reasoning_content_path) + def test_missing_reasoning_strategy_config_key_is_ignored(self) -> None: + with TemporaryDirectory() as temp_dir: + config_path = Path(temp_dir) / "config.yaml" + config_path.write_text( + "missing_reasoning_strategy: reject\n", + encoding="utf-8", + ) + + config = ProxyConfig.from_file(env={}, config_path=config_path) + + self.assertEqual(config.missing_reasoning_strategy, "recover") + def test_environment_overrides_config_file(self) -> None: with TemporaryDirectory() as temp_dir: config_path = Path(temp_dir) / "config.yaml" @@ -142,7 +182,6 @@ class ConfigTests(unittest.TestCase): "PROXY_NGROK": "yes", "PROXY_CORS": "true", "PROXY_MAX_REQUEST_BODY_BYTES": "1234", - "MISSING_REASONING_STRATEGY": "placeholder", "REASONING_CACHE_MAX_AGE_SECONDS": "60", "REASONING_CACHE_MAX_ROWS": "50", }, @@ -153,17 +192,17 @@ class ConfigTests(unittest.TestCase): self.assertTrue(config.ngrok) self.assertTrue(config.cors) self.assertEqual(config.max_request_body_bytes, 1234) - self.assertEqual(config.missing_reasoning_strategy, "placeholder") + self.assertEqual(config.missing_reasoning_strategy, "recover") self.assertEqual(config.reasoning_cache_max_age_seconds, 60) self.assertEqual(config.reasoning_cache_max_rows, 50) - def test_invalid_missing_reasoning_strategy_defaults_to_reject(self) -> None: + def test_missing_reasoning_strategy_environment_is_ignored(self) -> None: config = ProxyConfig.from_file( - env={"MISSING_REASONING_STRATEGY": "invent"}, + env={"MISSING_REASONING_STRATEGY": "reject"}, config_path=Path("/does/not/exist"), ) - self.assertEqual(config.missing_reasoning_strategy, "reject") + self.assertEqual(config.missing_reasoning_strategy, "recover") def test_cursor_reasoning_display_can_be_disabled_from_config(self) -> None: with TemporaryDirectory() as temp_dir: diff --git a/tests/test_proxy_end_to_end.py b/tests/test_proxy_end_to_end.py index d1c3b1e..72a18b2 100644 --- a/tests/test_proxy_end_to_end.py +++ b/tests/test_proxy_end_to_end.py @@ -17,7 +17,7 @@ from deepseek_cursor_proxy.reasoning_store import ( ) from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer from deepseek_cursor_proxy.transform import ( - PLACEHOLDER_REASONING_CONTENT, + RECOVERY_NOTICE_CONTENT, reasoning_cache_namespace, ) @@ -626,9 +626,14 @@ class ProxyEndToEndTests(unittest.TestCase): self.assertIn("too large", payload["error"]["message"]) self.assertEqual(FakeDeepSeekHandler.requests, []) - def test_proxy_rejects_uncached_cursor_tool_history_without_placeholder( + def test_proxy_rejects_uncached_cursor_tool_history_in_strict_mode( self, ) -> None: + self.proxy.server.config = replace( + self.proxy.server.config, + missing_reasoning_strategy="reject", + ) + status, payload = post_json( f"{self.proxy.url}/v1/chat/completions", second_cursor_request(include_reasoning=False), @@ -637,29 +642,79 @@ class ProxyEndToEndTests(unittest.TestCase): self.assertEqual(status, 409) self.assertEqual(payload["error"]["missing_reasoning_messages"], 1) self.assertIn("1 assistant message", payload["error"]["message"]) + self.assertIn("strict missing-reasoning mode", payload["error"]["message"]) self.assertIn( - "not sent upstream", - payload["error"]["diagnostic_placeholder"], + "--missing-reasoning-strategy recover", payload["error"]["message"] ) self.assertEqual(FakeDeepSeekHandler.requests, []) - def test_proxy_can_forward_placeholder_for_uncached_cursor_tool_history( - self, - ) -> None: - self.proxy.server.config = replace( - self.proxy.server.config, - missing_reasoning_strategy="placeholder", - ) - - status, _ = post_json( - f"{self.proxy.url}/v1/chat/completions", - second_cursor_request(include_reasoning=False), - ) + def test_proxy_recovers_uncached_cursor_tool_history(self) -> None: + with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured: + status, payload = post_json( + f"{self.proxy.url}/v1/chat/completions", + third_cursor_request_missing_all_reasoning(), + ) self.assertEqual(status, 200) + self.assertTrue( + payload["choices"][0]["message"]["content"].startswith( + RECOVERY_NOTICE_CONTENT + ) + ) self.assertEqual( - FakeDeepSeekHandler.requests[0]["messages"][1]["reasoning_content"], - PLACEHOLDER_REASONING_CONTENT, + [ + message["role"] + for message in FakeDeepSeekHandler.requests[0]["messages"] + ], + ["system", "user"], + ) + self.assertIn( + "recovered this request", + FakeDeepSeekHandler.requests[0]["messages"][0]["content"], + ) + self.assertEqual( + FakeDeepSeekHandler.requests[0]["messages"][1], + {"role": "user", "content": "Thanks, now continue."}, + ) + self.assertIn( + "cached reasoning_content was unavailable", + "\n".join(captured.output), + ) + + def test_proxy_keeps_deepseek_context_after_recovery_boundary(self) -> None: + status, first = post_json( + f"{self.proxy.url}/v1/chat/completions", + third_cursor_request_missing_all_reasoning(), + ) + self.assertEqual(status, 200) + + recovered_assistant = dict(first["choices"][0]["message"]) + recovered_assistant.pop("reasoning_content", None) + payload = third_cursor_request_missing_all_reasoning() + payload["messages"].append(recovered_assistant) + payload["messages"].append( + { + "role": "tool", + "tool_call_id": "call_date", + "content": "2026-04-24", + } + ) + + status, second = post_json(f"{self.proxy.url}/v1/chat/completions", payload) + + self.assertEqual(status, 200) + self.assertFalse( + second["choices"][0]["message"]["content"].startswith( + RECOVERY_NOTICE_CONTENT + ) + ) + second_upstream_messages = FakeDeepSeekHandler.requests[1]["messages"] + self.assertEqual( + [message["role"] for message in second_upstream_messages], + ["system", "user", "assistant", "tool"], + ) + self.assertEqual( + second_upstream_messages[2]["reasoning_content"], TOOL_REASONING ) @@ -893,6 +948,32 @@ class ReasoningStreamingProxyTests(unittest.TestCase): "Need context.", ) + def test_streaming_recovery_notice_is_visible_in_cursor_content(self) -> None: + payload = third_cursor_request_missing_all_reasoning() + payload["stream"] = True + request = Request( + f"{self.proxy.url}/v1/chat/completions", + data=json.dumps(payload).encode("utf-8"), + method="POST", + headers={ + "Authorization": "Bearer sk-cursor-test", + "Content-Type": "application/json", + }, + ) + + with urlopen(request, timeout=2) as response: + body = response.read().decode("utf-8") + + chunks = [ + json.loads(line.removeprefix("data: ")) + for line in body.splitlines() + if line.startswith("data: {") + ] + self.assertEqual( + chunks[2]["choices"][0]["delta"]["content"], + "\n\n\n" + RECOVERY_NOTICE_CONTENT + FINAL_CONTENT, + ) + class StreamingToolRaceProxyTests(unittest.TestCase): def setUp(self) -> None: diff --git a/tests/test_server.py b/tests/test_server.py index 8b1d675..04ced54 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -41,6 +41,14 @@ class FakeStreamingResponse: return self._lines.pop(0) +class FailingStreamingResponse: + status = 200 + headers = {"Content-Type": "text/event-stream"} + + def readline(self) -> bytes: + raise OSError("record layer failure") + + class BrokenPipeWfile: def write(self, body: bytes) -> None: raise BrokenPipeError("test disconnect") @@ -158,6 +166,26 @@ class ServerTests(unittest.TestCase): self.assertEqual(response.readline_calls, 1) self.assertIn("sending streaming response chunk", "\n".join(captured.output)) + def test_streaming_response_handles_upstream_read_failure(self) -> None: + handler = make_proxy_handler(BytesIO()) + + try: + with self.assertLogs("deepseek_cursor_proxy", level="WARNING") as captured: + sent = handler._proxy_streaming_response( + FailingStreamingResponse(), + "deepseek-v4-pro", + [{"role": "user", "content": "hi"}], + "cache-namespace", + ) + finally: + handler.server.reasoning_store.close() + + self.assertFalse(sent) + self.assertIn( + "upstream streaming response read failed", + "\n".join(captured.output), + ) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_transform.py b/tests/test_transform.py index 0fd9b9a..f6cbc81 100644 --- a/tests/test_transform.py +++ b/tests/test_transform.py @@ -4,9 +4,13 @@ import json import unittest from deepseek_cursor_proxy.config import ProxyConfig -from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_scope +from deepseek_cursor_proxy.reasoning_store import ( + ReasoningStore, + conversation_scope, + message_signature, +) from deepseek_cursor_proxy.transform import ( - PLACEHOLDER_REASONING_CONTENT, + RECOVERY_NOTICE_CONTENT, extract_text_content, prepare_upstream_request, reasoning_cache_namespace, @@ -72,7 +76,11 @@ class TransformTests(unittest.TestCase): ], } - prepared = prepare_upstream_request(payload, ProxyConfig(), self.store) + prepared = prepare_upstream_request( + payload, + ProxyConfig(missing_reasoning_strategy="reject"), + self.store, + ) self.assertEqual(prepared.payload["messages"][1]["content"], "Visible answer.") @@ -135,7 +143,11 @@ class TransformTests(unittest.TestCase): "tool_choice": "required", } - prepared = prepare_upstream_request(payload, ProxyConfig(), self.store) + prepared = prepare_upstream_request( + payload, + ProxyConfig(missing_reasoning_strategy="reject"), + self.store, + ) self.assertEqual(prepared.payload["tool_choice"], "required") @@ -201,7 +213,11 @@ class TransformTests(unittest.TestCase): ], } - prepared = prepare_upstream_request(payload, ProxyConfig(), self.store) + prepared = prepare_upstream_request( + payload, + ProxyConfig(missing_reasoning_strategy="reject"), + self.store, + ) self.assertEqual(prepared.patched_reasoning_messages, 1) self.assertEqual( @@ -339,7 +355,11 @@ class TransformTests(unittest.TestCase): ], } - prepared = prepare_upstream_request(payload, ProxyConfig(), self.store) + prepared = prepare_upstream_request( + payload, + ProxyConfig(missing_reasoning_strategy="reject"), + self.store, + ) self.assertEqual(prepared.patched_reasoning_messages, 1) self.assertEqual( @@ -572,16 +592,21 @@ class TransformTests(unittest.TestCase): ], } - prepared = prepare_upstream_request(payload, ProxyConfig(), self.store) + prepared = prepare_upstream_request( + payload, + ProxyConfig(missing_reasoning_strategy="reject"), + self.store, + ) self.assertEqual(prepared.patched_reasoning_messages, 0) self.assertEqual(prepared.missing_reasoning_messages, 1) self.assertNotIn("reasoning_content", prepared.payload["messages"][1]) - def test_can_insert_placeholder_for_uncached_assistant_tool_call(self) -> None: + def test_can_recover_uncached_tool_history_from_latest_user(self) -> None: payload = { "model": "deepseek-v4-pro", "messages": [ + {"role": "system", "content": "Follow project rules."}, {"role": "user", "content": "read README"}, { "role": "assistant", @@ -602,21 +627,227 @@ class TransformTests(unittest.TestCase): "tool_call_id": "call_uncached", "content": "file text", }, + {"role": "user", "content": "continue with the summary"}, ], } prepared = prepare_upstream_request( payload, - ProxyConfig(missing_reasoning_strategy="placeholder"), + ProxyConfig(missing_reasoning_strategy="recover"), self.store, ) - self.assertEqual(prepared.patched_reasoning_messages, 0) - self.assertEqual(prepared.placeholder_reasoning_messages, 1) self.assertEqual(prepared.missing_reasoning_messages, 0) + self.assertEqual(prepared.recovered_reasoning_messages, 1) + self.assertEqual(prepared.recovery_dropped_messages, 3) + self.assertEqual(prepared.recovery_notice, RECOVERY_NOTICE_CONTENT) self.assertEqual( - prepared.payload["messages"][1]["reasoning_content"], - PLACEHOLDER_REASONING_CONTENT, + [message["role"] for message in prepared.payload["messages"]], + ["system", "system", "user"], + ) + self.assertIn( + "recovered this request", prepared.payload["messages"][1]["content"] + ) + self.assertEqual( + prepared.payload["messages"][2], + {"role": "user", "content": "continue with the summary"}, + ) + + def test_recovery_boundary_preserves_later_deepseek_tool_context(self) -> None: + old_tool_call = { + "id": "call_old", + "type": "function", + "function": { + "name": "read_file", + "arguments": '{"path":"README.md"}', + }, + } + new_tool_call = { + "id": "call_new", + "type": "function", + "function": { + "name": "lookup", + "arguments": '{"query":"new"}', + }, + } + first_recovered = prepare_upstream_request( + { + "model": "deepseek-v4-pro", + "messages": [ + {"role": "user", "content": "old model turn"}, + { + "role": "assistant", + "content": "", + "tool_calls": [old_tool_call], + }, + { + "role": "tool", + "tool_call_id": "call_old", + "content": "old result", + }, + {"role": "user", "content": "continue with DeepSeek"}, + ], + }, + ProxyConfig(missing_reasoning_strategy="recover"), + self.store, + ) + recovered_tool_message = { + "role": "assistant", + "content": RECOVERY_NOTICE_CONTENT, + "reasoning_content": "Need the new lookup.", + "tool_calls": [new_tool_call], + } + self.store.store_assistant_message( + recovered_tool_message, + conversation_scope( + first_recovered.payload["messages"], + first_recovered.cache_namespace, + ), + ) + + prepared = prepare_upstream_request( + { + "model": "deepseek-v4-pro", + "messages": [ + {"role": "user", "content": "old model turn"}, + { + "role": "assistant", + "content": "", + "tool_calls": [old_tool_call], + }, + { + "role": "tool", + "tool_call_id": "call_old", + "content": "old result", + }, + {"role": "user", "content": "continue with DeepSeek"}, + { + "role": "assistant", + "content": RECOVERY_NOTICE_CONTENT, + "tool_calls": [new_tool_call], + }, + { + "role": "tool", + "tool_call_id": "call_new", + "content": "new result", + }, + ], + }, + ProxyConfig(missing_reasoning_strategy="recover"), + self.store, + ) + + self.assertEqual(prepared.missing_reasoning_messages, 0) + self.assertIsNone(prepared.recovery_notice) + self.assertEqual( + [message["role"] for message in prepared.payload["messages"]], + ["system", "user", "assistant", "tool"], + ) + self.assertEqual( + prepared.payload["messages"][2]["reasoning_content"], + "Need the new lookup.", + ) + self.assertEqual( + prepared.payload["messages"][3], + { + "role": "tool", + "tool_call_id": "call_new", + "content": "new result", + }, + ) + + def test_recovery_boundary_accepts_legacy_notice_text(self) -> None: + legacy_recovery_notice = ( + "Note: recovered this DeepSeek chat because older tool-call reasoning " + "was unavailable; continuing with recent context only.\n\n" + ) + old_tool_call = { + "id": "call_old", + "type": "function", + "function": { + "name": "read_file", + "arguments": '{"path":"README.md"}', + }, + } + new_tool_call = { + "id": "call_new", + "type": "function", + "function": { + "name": "lookup", + "arguments": '{"query":"new"}', + }, + } + first_recovered = prepare_upstream_request( + { + "model": "deepseek-v4-pro", + "messages": [ + {"role": "user", "content": "old model turn"}, + { + "role": "assistant", + "content": "", + "tool_calls": [old_tool_call], + }, + { + "role": "tool", + "tool_call_id": "call_old", + "content": "old result", + }, + {"role": "user", "content": "continue with DeepSeek"}, + ], + }, + ProxyConfig(missing_reasoning_strategy="recover"), + self.store, + ) + self.store.store_assistant_message( + { + "role": "assistant", + "content": legacy_recovery_notice, + "reasoning_content": "Need the new lookup.", + "tool_calls": [new_tool_call], + }, + conversation_scope( + first_recovered.payload["messages"], + first_recovered.cache_namespace, + ), + ) + + prepared = prepare_upstream_request( + { + "model": "deepseek-v4-pro", + "messages": [ + {"role": "user", "content": "old model turn"}, + { + "role": "assistant", + "content": "", + "tool_calls": [old_tool_call], + }, + { + "role": "tool", + "tool_call_id": "call_old", + "content": "old result", + }, + {"role": "user", "content": "continue with DeepSeek"}, + { + "role": "assistant", + "content": legacy_recovery_notice, + "tool_calls": [new_tool_call], + }, + { + "role": "tool", + "tool_call_id": "call_new", + "content": "new result", + }, + ], + }, + ProxyConfig(missing_reasoning_strategy="recover"), + self.store, + ) + + self.assertEqual(prepared.missing_reasoning_messages, 0) + self.assertIsNone(prepared.recovery_notice) + self.assertEqual( + prepared.payload["messages"][2]["reasoning_content"], + "Need the new lookup.", ) def test_reports_missing_reasoning_for_uncached_assistant_after_tool_result( @@ -651,7 +882,11 @@ class TransformTests(unittest.TestCase): ], } - prepared = prepare_upstream_request(payload, ProxyConfig(), self.store) + prepared = prepare_upstream_request( + payload, + ProxyConfig(missing_reasoning_strategy="reject"), + self.store, + ) self.assertEqual(prepared.missing_reasoning_messages, 1) self.assertNotIn("reasoning_content", prepared.payload["messages"][3]) @@ -707,7 +942,7 @@ class TransformTests(unittest.TestCase): self.assertNotIn("reasoning_content", prepared.payload["messages"][1]) def test_reasoning_cache_is_namespaced_by_authorization(self) -> None: - config = ProxyConfig() + config = ProxyConfig(missing_reasoning_strategy="reject") prior = [{"role": "user", "content": "read README"}] namespace_a = reasoning_cache_namespace( config, @@ -814,6 +1049,58 @@ class TransformTests(unittest.TestCase): "I need to inspect the repo.", ) + def test_rewrite_response_can_prefix_recovery_notice_before_storing( + self, + ) -> None: + body = json.dumps( + { + "id": "chatcmpl-test", + "object": "chat.completion", + "model": "deepseek-v4-pro", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "Summary.", + "reasoning_content": "Tool result is enough.", + }, + } + ], + } + ).encode() + + request_messages = [ + {"role": "user", "content": "read README"}, + {"role": "tool", "tool_call_id": "call_abc", "content": "file text"}, + ] + rewritten = rewrite_response_body( + body, + "deepseek-v4-pro", + self.store, + request_messages, + content_prefix=RECOVERY_NOTICE_CONTENT, + ) + payload = json.loads(rewritten) + stored_message = { + "role": "assistant", + "content": RECOVERY_NOTICE_CONTENT + "Summary.", + "reasoning_content": "Tool result is enough.", + } + + self.assertEqual( + payload["choices"][0]["message"]["content"], + RECOVERY_NOTICE_CONTENT + "Summary.", + ) + self.assertEqual( + self.store.get( + f"scope:{conversation_scope(request_messages)}:signature:" + f"{message_signature(stored_message)}" + ), + "Tool result is enough.", + ) + def test_rewrite_response_preserves_prompt_cache_usage_fields(self) -> None: body = json.dumps( {