From 87bef7b66077576c399bb4a990e78d2efb8a063b Mon Sep 17 00:00:00 2001
From: Yixing Lao <yixing.lao@gmail.com>
Date: Fri, 24 Apr 2026 22:44:26 +0800
Subject: [PATCH] revert(proxy): use fallback instead of fail-closed cache (#7)

---
 README.md                                    |  19 +-
 config.example.yaml                          |   6 -
 src/deepseek_cursor_proxy/config.py          |  46 ----
 src/deepseek_cursor_proxy/reasoning_store.py |  68 +----
 src/deepseek_cursor_proxy/server.py          | 147 ++--------
 src/deepseek_cursor_proxy/streaming.py       |  30 +--
 src/deepseek_cursor_proxy/transform.py       | 150 ++++-------
 tests/test_config.py                         |   8 -
 tests/test_proxy_end_to_end.py               | 211 +--------------
 tests/test_reasoning_store.py                |  46 +---
 tests/test_streaming.py                      |  78 ------
 tests/test_transform.py                      | 266 ++-----------------
 12 files changed, 100 insertions(+), 975 deletions(-)
diff --git a/README.md b/README.md
index 4ab85e5..d85b8dc 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ Compatibility proxy connecting Cursor to DeepSeek thinking models (`deepseek-v4-
 
 ## What It Does
 
-- ✅ Caches DeepSeek `reasoning_content` from regular and streamed responses, then restores it on later tool-call turns when Cursor omits it. If the exact original reasoning is unavailable, the proxy fails closed instead of sending a fake placeholder. See [DeepSeek docs](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) for more details.
+- ✅ Caches DeepSeek `reasoning_content` from regular and streamed responses, then restores it on later tool-call turns when Cursor omits it. See [DeepSeek docs](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) for more details.
 - ✅ Mirrors streamed `reasoning_content` into Cursor-visible `<think>...</think>` text so that thinking tokens are shown in Cursor's UI. For BYOK/proxy mode, Cursor renders this as normal text, not as a native collapsible thinking block.
 - ✅ Starts an ngrok tunnel so Cursor can reach the local proxy through a public HTTPS URL.
 - ✅ Provides other compatibility fixes to make DeepSeek models run well in Cursor.
@@ -53,8 +53,6 @@ In Cursor, add the DeepSeek custom model and point it at this proxy:
 - API Key: your DeepSeek API key
 - Base URL: your ngrok HTTPS URL with the `/v1` API version path
 
-The proxy respects the DeepSeek model name Cursor sends, such as `deepseek-v4-pro` or `deepseek-v4-flash`. The `model` field in `config.yaml` is only the fallback used when a request does not include a model.
-
 For example, if ngrok dashboard shows `https://example.ngrok-free.app`, use:
 
 ```text
@@ -96,15 +94,6 @@ Select `deepseek-v4-pro` in Cursor and use chat or agent mode as usual.
 
 ![Chatting with DeepSeek in Cursor](assets/cursor_chat.png)
 
-## How It Works
-
-DeepSeek's [thinking mode](https://api-docs.deepseek.com/guides/thinking_mode#tool-calls) requires `reasoning_content` from assistant messages in tool-call sequences to be passed back in later requests. Cursor may omit this field, causing DeepSeek to return a 400 error. This proxy sits between Cursor and DeepSeek (`Cursor → ngrok → proxy → DeepSeek API`) and repairs requests when it has the exact original reasoning cached.
-
-- Core fix: every DeepSeek response, streaming or non-streaming, has its `reasoning_content` stored in a local SQLite cache keyed by message signature, tool-call ID, and tool-call function signature. On outgoing thinking-mode requests, the proxy restores missing `reasoning_content` for tool-call-related assistant messages and sends the complete history to DeepSeek. If the cache is cold, such as after a proxy restart, it returns a local error instead of fabricating reasoning.
-- Multi-conversation isolation: cache keys are scoped by a SHA-256 hash of the canonical conversation prefix (roles, content, tool calls, excluding `reasoning_content`) plus the upstream model/configuration and an API-key hash. Concurrent or interleaved threads with different histories get different scopes, so reused tool-call IDs do not collide. Byte-identical cloned histories are indistinguishable unless Cursor sends a differentiating history.
-- DeepSeek [prefix caching](https://api-docs.deepseek.com/guides/kv_cache) compatibility: the proxy does not inject synthetic thread IDs, timestamps, or cache-control messages into the prompt. When it restores cached reasoning, it restores the exact original string, preserving repeated prefixes for DeepSeek's automatic best-effort context cache.
-- Additional compatibility fixes: the proxy converts legacy `functions`/`function_call` fields to `tools`/`tool_choice`, preserves required and named tool-choice semantics, normalizes `reasoning_effort` aliases per DeepSeek docs, strips mirrored `<think>` blocks from assistant content, converts multi-part content arrays to plain text, logs DeepSeek prompt-cache usage when available, and mirrors `reasoning_content` into Cursor-visible `<think>...</think>` blocks for thinking display.
-
 ## Debugging
 
 Run with verbose output:
@@ -125,12 +114,6 @@ Use another config file:
 deepseek-cursor-proxy --config ./dev.config.yaml
 ```
 
-Clear the local reasoning cache:
-
-```bash
-deepseek-cursor-proxy --clear-reasoning-cache
-```
-
 Run tests:
 
 ```bash
diff --git a/config.example.yaml b/config.example.yaml
index 4aa6462..f5c950c 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -1,8 +1,6 @@
 # This file was created automatically at ~/.deepseek-cursor-proxy/config.yaml.
 # API keys are read from Cursor's Authorization header and forwarded upstream.
 
-# `model` is the fallback when a request has no model; Cursor's requested
-# DeepSeek model name is otherwise respected.
 base_url: https://api.deepseek.com
 model: deepseek-v4-pro
 thinking: enabled
@@ -14,9 +12,5 @@ port: 9000
 ngrok: true
 verbose: false
 request_timeout: 300
-max_request_body_bytes: 20971520
-cors: false
 
 reasoning_content_path: reasoning_content.sqlite3
-reasoning_cache_max_age_seconds: 604800
-reasoning_cache_max_rows: 10000
diff --git a/src/deepseek_cursor_proxy/config.py b/src/deepseek_cursor_proxy/config.py
index 50f23f2..dc00a06 100644
--- a/src/deepseek_cursor_proxy/config.py
+++ b/src/deepseek_cursor_proxy/config.py
@@ -18,8 +18,6 @@ MISSING = object()
 DEFAULT_CONFIG_TEXT = """# This file was created automatically at ~/.deepseek-cursor-proxy/config.yaml.
 # API keys are read from Cursor's Authorization header and forwarded upstream.
 
-# `model` is the fallback when a request has no model; Cursor's requested
-# DeepSeek model name is otherwise respected.
 base_url: https://api.deepseek.com
 model: deepseek-v4-pro
 thinking: enabled
@@ -31,12 +29,8 @@ port: 9000
 ngrok: true
 verbose: false
 request_timeout: 300
-max_request_body_bytes: 20971520
-cors: false
 
 reasoning_content_path: reasoning_content.sqlite3
-reasoning_cache_max_age_seconds: 604800
-reasoning_cache_max_rows: 10000
 """
 
 
@@ -169,12 +163,8 @@ class ProxyConfig:
     thinking: str = "enabled"
     reasoning_effort: str = "high"
     request_timeout: float = 300.0
-    max_request_body_bytes: int = 20 * 1024 * 1024
     reasoning_content_path: Path = field(default_factory=default_reasoning_content_path)
-    reasoning_cache_max_age_seconds: int = 7 * 24 * 60 * 60
-    reasoning_cache_max_rows: int = 10000
     cursor_display_reasoning: bool = True
-    cors: bool = False
     verbose: bool = False
     ngrok: bool = False
 
@@ -270,15 +260,6 @@ class ProxyConfig:
                 ),
                 300.0,
             ),
-            max_request_body_bytes=as_int(
-                setting_value(
-                    settings,
-                    live_env,
-                    "max_request_body_bytes",
-                    "PROXY_MAX_REQUEST_BODY_BYTES",
-                ),
-                20 * 1024 * 1024,
-            ),
             reasoning_content_path=as_path(
                 setting_value(
                     settings,
@@ -289,24 +270,6 @@ class ProxyConfig:
                 default_reasoning_content_path(),
                 config_dir,
             ),
-            reasoning_cache_max_age_seconds=as_int(
-                setting_value(
-                    settings,
-                    live_env,
-                    "reasoning_cache_max_age_seconds",
-                    "REASONING_CACHE_MAX_AGE_SECONDS",
-                ),
-                7 * 24 * 60 * 60,
-            ),
-            reasoning_cache_max_rows=as_int(
-                setting_value(
-                    settings,
-                    live_env,
-                    "reasoning_cache_max_rows",
-                    "REASONING_CACHE_MAX_ROWS",
-                ),
-                10000,
-            ),
             cursor_display_reasoning=as_bool(
                 setting_value(
                     settings,
@@ -316,15 +279,6 @@ class ProxyConfig:
                 ),
                 True,
             ),
-            cors=as_bool(
-                setting_value(
-                    settings,
-                    live_env,
-                    "cors",
-                    "PROXY_CORS",
-                ),
-                False,
-            ),
             verbose=as_bool(
                 setting_value(
                     settings,
diff --git a/src/deepseek_cursor_proxy/reasoning_store.py b/src/deepseek_cursor_proxy/reasoning_store.py
index 385c974..14f8997 100644
--- a/src/deepseek_cursor_proxy/reasoning_store.py
+++ b/src/deepseek_cursor_proxy/reasoning_store.py
@@ -76,11 +76,8 @@ def canonical_scope_message(message: dict[str, Any]) -> dict[str, Any]:
     return canonical
 
 
-def conversation_scope(messages: list[dict[str, Any]], namespace: str = "") -> str:
-    scope_messages = [canonical_scope_message(message) for message in messages]
-    payload: Any = scope_messages
-    if namespace:
-        payload = {"namespace": namespace, "messages": scope_messages}
+def conversation_scope(messages: list[dict[str, Any]]) -> str:
+    payload = [canonical_scope_message(message) for message in messages]
     canonical = json.dumps(
         payload, ensure_ascii=False, sort_keys=True, separators=(",", ":")
     )
@@ -88,14 +85,7 @@ def conversation_scope(messages: list[dict[str, Any]], namespace: str = "") -> s
 
 
 class ReasoningStore:
-    def __init__(
-        self,
-        reasoning_content_path: str | Path,
-        max_age_seconds: int | None = None,
-        max_rows: int | None = None,
-    ) -> None:
-        self.max_age_seconds = max_age_seconds
-        self.max_rows = max_rows
+    def __init__(self, reasoning_content_path: str | Path) -> None:
         if str(reasoning_content_path) == ":memory:":
             self.reasoning_content_path: str | Path = ":memory:"
         else:
@@ -120,14 +110,13 @@ class ReasoningStore:
             """
         )
         self._conn.commit()
-        self.prune()
 
     def close(self) -> None:
         with self._lock:
             self._conn.close()
 
     def put(self, key: str, reasoning: str, message: dict[str, Any]) -> None:
-        if not isinstance(reasoning, str):
+        if not reasoning:
             return
         message_json = json.dumps(message, ensure_ascii=False, sort_keys=True)
         with self._lock:
@@ -142,7 +131,6 @@ class ReasoningStore:
                 """,
                 (key, reasoning, message_json, time.time()),
             )
-            self._prune_locked()
             self._conn.commit()
 
     def get(self, key: str) -> str | None:
@@ -159,7 +147,7 @@ class ReasoningStore:
         if message.get("role") != "assistant":
             return 0
         reasoning = message.get("reasoning_content")
-        if not isinstance(reasoning, str):
+        if not isinstance(reasoning, str) or not reasoning:
             return 0
 
         keys = [f"scope:{scope}:signature:{message_signature(message)}"]
@@ -178,11 +166,11 @@ class ReasoningStore:
 
     def lookup_for_message(self, message: dict[str, Any], scope: str) -> str | None:
         reasoning = self.get(f"scope:{scope}:signature:{message_signature(message)}")
-        if reasoning is not None:
+        if reasoning:
             return reasoning
         for tool_call_id in tool_call_ids(message):
             reasoning = self.get(f"scope:{scope}:tool_call:{tool_call_id}")
-            if reasoning is not None:
+            if reasoning:
                 return reasoning
         for tool_call in message.get("tool_calls") or []:
             if not isinstance(tool_call, dict):
@@ -190,46 +178,6 @@ class ReasoningStore:
             reasoning = self.get(
                 f"scope:{scope}:tool_call_signature:{tool_call_signature(tool_call)}"
             )
-            if reasoning is not None:
+            if reasoning:
                 return reasoning
         return None
-
-    def clear(self) -> int:
-        with self._lock:
-            row = self._conn.execute("SELECT COUNT(*) FROM reasoning_cache").fetchone()
-            count = int(row[0] if row else 0)
-            self._conn.execute("DELETE FROM reasoning_cache")
-            self._conn.commit()
-        return count
-
-    def prune(self) -> int:
-        with self._lock:
-            deleted = self._prune_locked()
-            self._conn.commit()
-        return deleted
-
-    def _prune_locked(self) -> int:
-        deleted = 0
-        if self.max_age_seconds is not None and self.max_age_seconds > 0:
-            cutoff = time.time() - self.max_age_seconds
-            cursor = self._conn.execute(
-                "DELETE FROM reasoning_cache WHERE created_at < ?",
-                (cutoff,),
-            )
-            deleted += cursor.rowcount if cursor.rowcount != -1 else 0
-
-        if self.max_rows is not None and self.max_rows > 0:
-            cursor = self._conn.execute(
-                """
-                DELETE FROM reasoning_cache
-                WHERE key NOT IN (
-                    SELECT key
-                    FROM reasoning_cache
-                    ORDER BY created_at DESC
-                    LIMIT ?
-                )
-                """,
-                (self.max_rows,),
-            )
-            deleted += cursor.rowcount if cursor.rowcount != -1 else 0
-        return deleted
diff --git a/src/deepseek_cursor_proxy/server.py b/src/deepseek_cursor_proxy/server.py
index 4fdce0e..339622a 100644
--- a/src/deepseek_cursor_proxy/server.py
+++ b/src/deepseek_cursor_proxy/server.py
@@ -29,10 +29,6 @@ from .transform import prepare_upstream_request, rewrite_response_body
 LOG = logging.getLogger("deepseek_cursor_proxy")
 
 
-class RequestBodyTooLarge(ValueError):
-    pass
-
-
 class DeepSeekProxyServer(ThreadingHTTPServer):
     config: ProxyConfig
     reasoning_store: ReasoningStore
@@ -106,12 +102,6 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
 
         try:
             payload = self._read_json_body()
-        except RequestBodyTooLarge as exc:
-            LOG.warning(
-                "rejected request path=%s status=413 reason=%s", request_path, exc
-            )
-            self._send_json(413, {"error": {"message": str(exc)}})
-            return
         except ValueError as exc:
             LOG.warning(
                 "rejected request path=%s status=400 reason=%s", request_path, exc
@@ -124,49 +114,28 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
 
         LOG.info("cursor request: %s", summarize_chat_payload(payload))
 
-        prepared = prepare_upstream_request(
-            payload,
-            self.config,
-            self.reasoning_store,
-            authorization=cursor_authorization,
-        )
+        prepared = prepare_upstream_request(payload, self.config, self.reasoning_store)
         if prepared.patched_reasoning_messages:
             LOG.info(
                 "restored reasoning_content on %s assistant message(s)",
                 prepared.patched_reasoning_messages,
             )
-        if prepared.missing_reasoning_messages:
+        if prepared.fallback_reasoning_messages:
             LOG.warning(
-                "rejected request path=%s status=409 reason=missing_reasoning_content count=%s",
-                request_path,
-                prepared.missing_reasoning_messages,
+                "added compatibility reasoning_content placeholder on %s uncached assistant message(s)",
+                prepared.fallback_reasoning_messages,
             )
-            self._send_json(
-                409,
-                {
-                    "error": {
-                        "message": (
-                            "Missing cached DeepSeek reasoning_content for a "
-                            "thinking-mode tool-call history. Retry the tool-call "
-                            "turn so the proxy can capture the original reasoning."
-                        ),
-                        "type": "missing_reasoning_content",
-                        "code": "missing_reasoning_content",
-                    }
-                },
-            )
-            return
 
         if self.config.verbose:
             LOG.info(
                 (
                     "upstream request metadata: original_model=%s upstream_model=%s "
-                    "patched_reasoning=%s missing_reasoning=%s %s"
+                    "patched_reasoning=%s fallback_reasoning=%s %s"
                 ),
                 prepared.original_model,
                 prepared.upstream_model,
                 prepared.patched_reasoning_messages,
-                prepared.missing_reasoning_messages,
+                prepared.fallback_reasoning_messages,
                 summarize_chat_payload(prepared.payload),
             )
 
@@ -222,28 +191,22 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
                 )
             if prepared.payload.get("stream"):
                 self._proxy_streaming_response(
-                    response,
-                    prepared.original_model,
-                    prepared.payload["messages"],
-                    prepared.cache_namespace,
+                    response, prepared.original_model, prepared.payload["messages"]
                 )
             else:
                 self._proxy_regular_response(
-                    response,
-                    prepared.original_model,
-                    prepared.payload["messages"],
-                    prepared.cache_namespace,
+                    response, prepared.original_model, prepared.payload["messages"]
                 )
             LOG.info(
                 (
                     "request complete status=%s stream=%s elapsed_ms=%s "
-                    "patched_reasoning=%s missing_reasoning=%s"
+                    "patched_reasoning=%s fallback_reasoning=%s"
                 ),
                 upstream_status,
                 bool(prepared.payload.get("stream")),
                 elapsed_ms(started),
                 prepared.patched_reasoning_messages,
-                prepared.missing_reasoning_messages,
+                prepared.fallback_reasoning_messages,
             )
 
     def _cursor_authorization(self) -> str | None:
@@ -254,8 +217,6 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
         return f"Bearer {token.strip()}"
 
     def _send_cors_headers(self) -> None:
-        if not self.config.cors:
-            return
         self.send_header("Access-Control-Allow-Origin", "*")
         self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
         self.send_header(
@@ -278,37 +239,18 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
 
     def _send_models(self) -> None:
         created = int(time.time())
-        model_ids = list(
-            dict.fromkeys(
-                [
-                    self.config.upstream_model,
-                    "deepseek-v4-pro",
-                    "deepseek-v4-flash",
-                ]
-            )
-        )
         models = [
             {
-                "id": model_id,
+                "id": self.config.upstream_model,
                 "object": "model",
                 "created": created,
                 "owned_by": "deepseek",
             }
-            for model_id in model_ids
         ]
         self._send_json(200, {"object": "list", "data": models})
 
     def _read_json_body(self) -> dict[str, Any]:
-        try:
-            length = int(self.headers.get("Content-Length") or 0)
-        except ValueError as exc:
-            raise ValueError("Invalid Content-Length") from exc
-        if length < 0:
-            raise ValueError("Invalid Content-Length")
-        if length > self.config.max_request_body_bytes:
-            raise RequestBodyTooLarge(
-                f"Request body is too large; limit is {self.config.max_request_body_bytes} bytes"
-            )
+        length = int(self.headers.get("Content-Length") or 0)
         raw_body = self.rfile.read(length)
         if not raw_body:
             raise ValueError("Request body is empty")
@@ -351,20 +293,14 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
         response: Any,
         original_model: str,
         request_messages: list[dict[str, Any]],
-        cache_namespace: str,
     ) -> None:
         body = read_response_body(response)
         try:
             body = rewrite_response_body(
-                body,
-                original_model,
-                self.reasoning_store,
-                request_messages,
-                cache_namespace,
+                body, original_model, self.reasoning_store, request_messages
             )
         except (json.JSONDecodeError, UnicodeDecodeError) as exc:
             LOG.warning("failed to rewrite upstream JSON response: %s", exc)
-        log_cache_usage_from_body(body)
 
         if self.config.verbose:
             log_bytes("cursor response body", body)
@@ -383,7 +319,6 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
         response: Any,
         original_model: str,
         request_messages: list[dict[str, Any]],
-        cache_namespace: str,
     ) -> None:
         self.send_response(getattr(response, "status", 200))
         self._send_cors_headers()
@@ -399,7 +334,7 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
             if self.config.cursor_display_reasoning
             else None
         )
-        scope = conversation_scope(request_messages, cache_namespace)
+        scope = conversation_scope(request_messages)
         finalized = False
         while True:
             line = response.readline()
@@ -453,10 +388,6 @@ class DeepSeekProxyHandler(BaseHTTPRequestHandler):
 
         if isinstance(chunk, dict):
             accumulator.ingest_chunk(chunk)
-            stored = accumulator.store_finished_reasoning(self.reasoning_store, scope)
-            if stored:
-                LOG.info("stored %s streaming reasoning cache key(s)", stored)
-            log_cache_usage(chunk.get("usage"))
             if display_adapter is not None:
                 display_adapter.rewrite_chunk(chunk)
             if "model" in chunk:
@@ -490,7 +421,7 @@ def build_arg_parser() -> argparse.ArgumentParser:
     )
     parser.add_argument(
         "--model",
-        help="Fallback DeepSeek model when the request has no model, default from config, DEEPSEEK_MODEL, or deepseek-v4-pro",
+        help="Upstream DeepSeek model, default from config, DEEPSEEK_MODEL, or deepseek-v4-pro",
     )
     parser.add_argument(
         "--base-url",
@@ -519,11 +450,6 @@ def build_arg_parser() -> argparse.ArgumentParser:
         action="store_true",
         help="Do not mirror reasoning_content into Cursor-visible <think> content",
     )
-    parser.add_argument(
-        "--clear-reasoning-cache",
-        action="store_true",
-        help="Clear the local reasoning_content SQLite cache and exit",
-    )
     return parser
 
 
@@ -548,25 +474,6 @@ def log_bytes(label: str, body: bytes) -> None:
     log_json(label, payload)
 
 
-def log_cache_usage_from_body(body: bytes) -> None:
-    try:
-        payload = json.loads(body.decode("utf-8"))
-    except (json.JSONDecodeError, UnicodeDecodeError):
-        return
-    if isinstance(payload, dict):
-        log_cache_usage(payload.get("usage"))
-
-
-def log_cache_usage(usage: Any) -> None:
-    if not isinstance(usage, dict):
-        return
-    hit = usage.get("prompt_cache_hit_tokens")
-    miss = usage.get("prompt_cache_miss_tokens")
-    if hit is None and miss is None:
-        return
-    LOG.info("deepseek prompt cache: hit_tokens=%s miss_tokens=%s", hit, miss)
-
-
 def sse_data(payload: dict[str, Any]) -> bytes:
     return (
         b"data: "
@@ -602,16 +509,6 @@ def read_response_body(response: Any) -> bytes:
     return body
 
 
-def warn_if_insecure_upstream(url: str) -> None:
-    parsed = urlparse(url)
-    if parsed.scheme != "http":
-        return
-    host = parsed.hostname or ""
-    if host in {"127.0.0.1", "localhost", "::1"}:
-        return
-    LOG.warning("upstream base_url uses plain HTTP; bearer tokens may be exposed")
-
-
 def main(argv: list[str] | None = None) -> int:
     logging.basicConfig(
         level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
@@ -642,24 +539,14 @@ def main(argv: list[str] | None = None) -> int:
     if updates:
         config = replace(config, **updates)
 
-    warn_if_insecure_upstream(config.upstream_base_url)
-    store = ReasoningStore(
-        config.reasoning_content_path,
-        max_age_seconds=config.reasoning_cache_max_age_seconds,
-        max_rows=config.reasoning_cache_max_rows,
-    )
-    if args.clear_reasoning_cache:
-        deleted = store.clear()
-        LOG.info("cleared %s reasoning cache row(s)", deleted)
-        store.close()
-        return 0
+    store = ReasoningStore(config.reasoning_content_path)
     server = DeepSeekProxyServer((config.host, config.port), DeepSeekProxyHandler)
     server.config = config
     server.reasoning_store = store
 
     LOG.info("listening on http://%s:%s/v1", config.host, config.port)
     LOG.info(
-        "forwarding to %s/chat/completions default_model=%s",
+        "forwarding to %s/chat/completions as %s",
         config.upstream_base_url,
         config.upstream_model,
     )
diff --git a/src/deepseek_cursor_proxy/streaming.py b/src/deepseek_cursor_proxy/streaming.py
index b00de7e..9221401 100644
--- a/src/deepseek_cursor_proxy/streaming.py
+++ b/src/deepseek_cursor_proxy/streaming.py
@@ -16,7 +16,6 @@ class StreamingChoice:
     role: str = "assistant"
     content: str = ""
     reasoning_content: str = ""
-    has_reasoning_content: bool = False
     tool_calls: list[dict[str, Any]] = field(default_factory=list)
     finish_reason: str | None = None
 
@@ -25,7 +24,7 @@ class StreamingChoice:
             "role": self.role,
             "content": self.content,
         }
-        if self.has_reasoning_content:
+        if self.reasoning_content:
             message["reasoning_content"] = self.reasoning_content
         if self.tool_calls:
             message["tool_calls"] = self.tool_calls
@@ -35,7 +34,6 @@ class StreamingChoice:
 class StreamAccumulator:
     def __init__(self) -> None:
         self.choices: dict[int, StreamingChoice] = {}
-        self._stored_choices: set[int] = set()
 
     def ingest_chunk(self, chunk: dict[str, Any]) -> None:
         choices = chunk.get("choices")
@@ -65,22 +63,14 @@ class StreamAccumulator:
 
             reasoning_content = delta.get("reasoning_content")
             if isinstance(reasoning_content, str):
-                choice.has_reasoning_content = True
                 choice.reasoning_content += reasoning_content
 
             self._merge_tool_call_deltas(choice, delta.get("tool_calls"))
 
     def store_reasoning(self, store: ReasoningStore, scope: str) -> int:
         stored = 0
-        for index, choice in self.choices.items():
-            stored += self._store_choice(index, choice, store, scope)
-        return stored
-
-    def store_finished_reasoning(self, store: ReasoningStore, scope: str) -> int:
-        stored = 0
-        for index, choice in self.choices.items():
-            if choice.finish_reason is not None:
-                stored += self._store_choice(index, choice, store, scope)
+        for choice in self.choices.values():
+            stored += store.store_assistant_message(choice.to_message(), scope)
         return stored
 
     def messages(self) -> list[dict[str, Any]]:
@@ -125,20 +115,6 @@ class StreamAccumulator:
                     function_delta["arguments"]
                 )
 
-    def _store_choice(
-        self,
-        index: int,
-        choice: StreamingChoice,
-        store: ReasoningStore,
-        scope: str,
-    ) -> int:
-        if index in self._stored_choices:
-            return 0
-        stored = store.store_assistant_message(choice.to_message(), scope)
-        if stored:
-            self._stored_choices.add(index)
-        return stored
-
 
 class CursorReasoningDisplayAdapter:
     """Mirror reasoning_content into content for Cursor's visible thinking UI path."""
diff --git a/src/deepseek_cursor_proxy/transform.py b/src/deepseek_cursor_proxy/transform.py
index c7177b1..51945c6 100644
--- a/src/deepseek_cursor_proxy/transform.py
+++ b/src/deepseek_cursor_proxy/transform.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-import hashlib
 import json
 import re
 from typing import Any
@@ -73,9 +72,8 @@ class PreparedRequest:
     payload: dict[str, Any]
     original_model: str
     upstream_model: str
-    cache_namespace: str
     patched_reasoning_messages: int
-    missing_reasoning_messages: int
+    fallback_reasoning_messages: int
 
 
 def normalize_reasoning_effort(value: Any) -> str:
@@ -160,30 +158,26 @@ def legacy_function_to_tool(function: Any) -> dict[str, Any]:
 
 def convert_function_call(function_call: Any) -> Any:
     if isinstance(function_call, str):
-        if function_call in {"auto", "none", "required"}:
+        if function_call in {"auto", "none"}:
             return function_call
+        if function_call == "required":
+            return "auto"
         return None
     if isinstance(function_call, dict) and function_call.get("name"):
-        return {
-            "type": "function",
-            "function": {"name": str(function_call["name"])},
-        }
+        return "auto"
     return None
 
 
 def normalize_tool_choice(tool_choice: Any) -> Any:
     if isinstance(tool_choice, str):
-        if tool_choice in {"auto", "none", "required"}:
+        if tool_choice in {"auto", "none"}:
             return tool_choice
+        if tool_choice == "required":
+            return "auto"
         return None
     if isinstance(tool_choice, dict):
         if tool_choice.get("type") == "function":
-            function = tool_choice.get("function")
-            if isinstance(function, dict) and function.get("name"):
-                return {
-                    "type": "function",
-                    "function": {"name": str(function["name"])},
-                }
+            return "auto"
         return tool_choice
     return tool_choice
 
@@ -192,9 +186,6 @@ def normalize_message(
     message: Any,
     store: ReasoningStore | None,
     prior_messages: list[dict[str, Any]],
-    cache_namespace: str,
-    repair_reasoning: bool,
-    keep_reasoning: bool,
 ) -> tuple[dict[str, Any], bool, bool]:
     if not isinstance(message, dict):
         message = {"role": "user", "content": str(message)}
@@ -219,62 +210,49 @@ def normalize_message(
         ]
 
     patched = False
-    missing = False
+    fallback = False
     if normalized["role"] == "assistant":
-        if not keep_reasoning:
+        reasoning = normalized.get("reasoning_content")
+        if not isinstance(reasoning, str) or not reasoning:
             normalized.pop("reasoning_content", None)
-        elif repair_reasoning:
-            reasoning = normalized.get("reasoning_content")
-            if not isinstance(reasoning, str):
-                normalized.pop("reasoning_content", None)
-                needs_reasoning = assistant_needs_reasoning_for_tool_context(
-                    normalized, prior_messages
+            if store is not None:
+                restored = store.lookup_for_message(
+                    normalized, conversation_scope(prior_messages)
                 )
-                if needs_reasoning and store is not None:
-                    restored = store.lookup_for_message(
-                        normalized,
-                        conversation_scope(prior_messages, cache_namespace),
-                    )
-                    if restored is not None:
-                        normalized["reasoning_content"] = restored
-                        patched = True
-                if needs_reasoning and not patched:
-                    missing = True
+                if restored:
+                    normalized["reasoning_content"] = restored
+                    patched = True
+            if not patched and assistant_needs_reasoning_for_tool_context(
+                normalized, prior_messages
+            ):
+                normalized["reasoning_content"] = fallback_reasoning_content(normalized)
+                fallback = True
 
     allowed_fields = ROLE_MESSAGE_FIELDS.get(str(normalized["role"]), MESSAGE_FIELDS)
     normalized = {
         key: value for key, value in normalized.items() if key in allowed_fields
     }
-    return normalized, patched, missing
+    return normalized, patched, fallback
 
 
 def normalize_messages(
-    messages: Any,
-    store: ReasoningStore | None,
-    cache_namespace: str,
-    repair_reasoning: bool,
-    keep_reasoning: bool,
+    messages: Any, store: ReasoningStore | None
 ) -> tuple[list[dict[str, Any]], int, int]:
     if not isinstance(messages, list):
         return [], 0, 0
     normalized_messages: list[dict[str, Any]] = []
     patched_count = 0
-    missing_count = 0
+    fallback_count = 0
     for message in messages:
-        normalized, patched, missing = normalize_message(
-            message,
-            store,
-            normalized_messages,
-            cache_namespace,
-            repair_reasoning,
-            keep_reasoning,
+        normalized, patched, fallback = normalize_message(
+            message, store, normalized_messages
         )
         normalized_messages.append(normalized)
         if patched:
             patched_count += 1
-        if missing:
-            missing_count += 1
-    return normalized_messages, patched_count, missing_count
+        if fallback:
+            fallback_count += 1
+    return normalized_messages, patched_count, fallback_count
 
 
 def assistant_needs_reasoning_for_tool_context(
@@ -292,40 +270,22 @@ def assistant_needs_reasoning_for_tool_context(
     return False
 
 
+def fallback_reasoning_content(message: dict[str, Any]) -> str:
+    if message.get("tool_calls"):
+        return "Compatibility placeholder: Cursor omitted DeepSeek reasoning_content for this tool-call turn."
+    return "Compatibility placeholder: Cursor omitted DeepSeek reasoning_content for this tool-result turn."
+
+
 def upstream_model_for(original_model: str, config: ProxyConfig) -> str:
-    if original_model.startswith("deepseek-"):
+    if config.allow_model_passthrough and original_model.startswith("deepseek-"):
         return original_model
     return config.upstream_model
 
 
-def reasoning_cache_namespace(
-    config: ProxyConfig,
-    upstream_model: str,
-    thinking: Any,
-    reasoning_effort: Any,
-    authorization: str | None = None,
-) -> str:
-    auth_hash = ""
-    if authorization:
-        auth_hash = hashlib.sha256(authorization.encode("utf-8")).hexdigest()
-    payload = {
-        "base_url": config.upstream_base_url,
-        "model": upstream_model,
-        "thinking": thinking,
-        "reasoning_effort": reasoning_effort,
-        "authorization_hash": auth_hash,
-    }
-    canonical = json.dumps(
-        payload, ensure_ascii=False, sort_keys=True, separators=(",", ":")
-    )
-    return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
-
-
 def prepare_upstream_request(
     payload: dict[str, Any],
     config: ProxyConfig,
     store: ReasoningStore | None,
-    authorization: str | None = None,
 ) -> PreparedRequest:
     original_model = str(payload.get("model") or config.upstream_model)
     upstream_model = upstream_model_for(original_model, config)
@@ -337,6 +297,10 @@ def prepare_upstream_request(
         prepared["max_tokens"] = payload["max_completion_tokens"]
 
     prepared["model"] = upstream_model
+    messages, patched_count, fallback_count = normalize_messages(
+        payload.get("messages"), store
+    )
+    prepared["messages"] = messages
 
     if "tools" in prepared and isinstance(prepared["tools"], list):
         prepared["tools"] = [normalize_tool(tool) for tool in prepared["tools"]]
@@ -361,37 +325,17 @@ def prepare_upstream_request(
 
     thinking = prepared.get("thinking")
     thinking_enabled = isinstance(thinking, dict) and thinking.get("type") == "enabled"
-    thinking_disabled = (
-        isinstance(thinking, dict) and thinking.get("type") == "disabled"
-    )
     if thinking_enabled:
         prepared["reasoning_effort"] = normalize_reasoning_effort(
             prepared.get("reasoning_effort") or config.reasoning_effort
         )
 
-    cache_namespace = reasoning_cache_namespace(
-        config,
-        upstream_model,
-        prepared.get("thinking"),
-        prepared.get("reasoning_effort"),
-        authorization,
-    )
-    messages, patched_count, missing_count = normalize_messages(
-        payload.get("messages"),
-        store,
-        cache_namespace,
-        repair_reasoning=thinking_enabled,
-        keep_reasoning=not thinking_disabled,
-    )
-    prepared["messages"] = messages
-
     return PreparedRequest(
         payload=prepared,
         original_model=original_model,
         upstream_model=upstream_model,
-        cache_namespace=cache_namespace,
         patched_reasoning_messages=patched_count,
-        missing_reasoning_messages=missing_count,
+        fallback_reasoning_messages=fallback_count,
     )
 
 
@@ -399,7 +343,6 @@ def record_response_reasoning(
     response_payload: dict[str, Any],
     store: ReasoningStore | None,
     request_messages: list[dict[str, Any]],
-    cache_namespace: str = "",
 ) -> int:
     if store is None:
         return 0
@@ -407,7 +350,7 @@ def record_response_reasoning(
     choices = response_payload.get("choices")
     if not isinstance(choices, list):
         return stored
-    scope = conversation_scope(request_messages, cache_namespace)
+    scope = conversation_scope(request_messages)
     for choice in choices:
         if not isinstance(choice, dict):
             continue
@@ -422,13 +365,10 @@ def rewrite_response_body(
     original_model: str,
     store: ReasoningStore | None,
     request_messages: list[dict[str, Any]],
-    cache_namespace: str = "",
 ) -> bytes:
     response_payload = json.loads(body.decode("utf-8"))
     if isinstance(response_payload, dict):
-        record_response_reasoning(
-            response_payload, store, request_messages, cache_namespace
-        )
+        record_response_reasoning(response_payload, store, request_messages)
         if "model" in response_payload:
             response_payload["model"] = original_model
     return json.dumps(
diff --git a/tests/test_config.py b/tests/test_config.py
index 82a4d03..4394c9a 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -140,20 +140,12 @@ class ConfigTests(unittest.TestCase):
             env={
                 "PROXY_VERBOSE": "true",
                 "PROXY_NGROK": "yes",
-                "PROXY_CORS": "true",
-                "PROXY_MAX_REQUEST_BODY_BYTES": "1234",
-                "REASONING_CACHE_MAX_AGE_SECONDS": "60",
-                "REASONING_CACHE_MAX_ROWS": "50",
             },
             config_path=Path("/does/not/exist"),
         )
 
         self.assertTrue(config.verbose)
         self.assertTrue(config.ngrok)
-        self.assertTrue(config.cors)
-        self.assertEqual(config.max_request_body_bytes, 1234)
-        self.assertEqual(config.reasoning_cache_max_age_seconds, 60)
-        self.assertEqual(config.reasoning_cache_max_rows, 50)
 
     def test_cursor_reasoning_display_can_be_disabled_from_config(self) -> None:
         with TemporaryDirectory() as temp_dir:
diff --git a/tests/test_proxy_end_to_end.py b/tests/test_proxy_end_to_end.py
index d45a005..b1aa714 100644
--- a/tests/test_proxy_end_to_end.py
+++ b/tests/test_proxy_end_to_end.py
@@ -16,7 +16,6 @@ from deepseek_cursor_proxy.reasoning_store import (
     message_signature,
 )
 from deepseek_cursor_proxy.server import DeepSeekProxyHandler, DeepSeekProxyServer
-from deepseek_cursor_proxy.transform import reasoning_cache_namespace
 
 
 TOOL_REASONING = "I need the current date before answering."
@@ -254,85 +253,6 @@ class ReasoningStreamingDeepSeekHandler(BaseHTTPRequestHandler):
         self.wfile.flush()
 
 
-class ToolCallStreamingBeforeDoneDeepSeekHandler(BaseHTTPRequestHandler):
-    requests: list[dict] = []
-
-    def log_message(self, fmt: str, *args: object) -> None:
-        return
-
-    def do_POST(self) -> None:
-        length = int(self.headers.get("Content-Length") or 0)
-        payload = json.loads(self.rfile.read(length).decode("utf-8"))
-        self.__class__.requests.append(payload)
-
-        if payload.get("stream"):
-            self.send_response(200)
-            self.send_header("Content-Type", "text/event-stream")
-            self.end_headers()
-            chunks = [
-                {
-                    "id": "chatcmpl-stream-tool",
-                    "object": "chat.completion.chunk",
-                    "created": 1,
-                    "model": "deepseek-v4-pro",
-                    "choices": [
-                        {
-                            "index": 0,
-                            "delta": {
-                                "role": "assistant",
-                                "reasoning_content": "Streamed tool reasoning.",
-                                "tool_calls": [
-                                    {
-                                        "index": 0,
-                                        "id": "call_stream_tool",
-                                        "type": "function",
-                                        "function": {
-                                            "name": "lookup",
-                                            "arguments": "{}",
-                                        },
-                                    }
-                                ],
-                            },
-                            "finish_reason": None,
-                        }
-                    ],
-                },
-                {
-                    "id": "chatcmpl-stream-tool",
-                    "object": "chat.completion.chunk",
-                    "created": 1,
-                    "model": "deepseek-v4-pro",
-                    "choices": [
-                        {"index": 0, "delta": {}, "finish_reason": "tool_calls"}
-                    ],
-                },
-            ]
-            for chunk in chunks:
-                self.wfile.write(f"data: {json.dumps(chunk)}\n\n".encode("utf-8"))
-                self.wfile.flush()
-            time.sleep(1)
-            self.wfile.write(b"data: [DONE]\n\n")
-            self.wfile.flush()
-            return
-
-        messages = payload.get("messages", [])
-        if (
-            len(messages) >= 2
-            and messages[1].get("reasoning_content") == "Streamed tool reasoning."
-        ):
-            self._send_json(200, plain_response("stream follow-up accepted"))
-            return
-        self._send_json(400, {"error": {"message": "missing streamed reasoning"}})
-
-    def _send_json(self, status: int, payload: dict) -> None:
-        body = json.dumps(payload).encode("utf-8")
-        self.send_response(status)
-        self.send_header("Content-Type", "application/json")
-        self.send_header("Content-Length", str(len(body)))
-        self.end_headers()
-        self.wfile.write(body)
-
-
 def tool_call_response() -> dict:
     return {
         "id": "chatcmpl-tool",
@@ -591,21 +511,7 @@ class ProxyEndToEndTests(unittest.TestCase):
         self.assertEqual(caught.exception.code, 401)
         self.assertEqual(FakeDeepSeekHandler.requests, [])
 
-    def test_proxy_rejects_oversized_request_body(self) -> None:
-        self.proxy.server.config = replace(
-            self.proxy.server.config, max_request_body_bytes=10
-        )
-
-        status, payload = post_json(
-            f"{self.proxy.url}/v1/chat/completions",
-            first_cursor_request(),
-        )
-
-        self.assertEqual(status, 413)
-        self.assertIn("too large", payload["error"]["message"])
-        self.assertEqual(FakeDeepSeekHandler.requests, [])
-
-    def test_proxy_rejects_uncached_cursor_tool_history_without_placeholder(
+    def test_proxy_adds_fallback_reasoning_for_uncached_cursor_tool_history(
         self,
     ) -> None:
         status, _ = post_json(
@@ -613,8 +519,9 @@ class ProxyEndToEndTests(unittest.TestCase):
             second_cursor_request(include_reasoning=False),
         )
 
-        self.assertEqual(status, 409)
-        self.assertEqual(FakeDeepSeekHandler.requests, [])
+        self.assertEqual(status, 200)
+        upstream_messages = FakeDeepSeekHandler.requests[0]["messages"]
+        self.assertIn("reasoning_content", upstream_messages[1])
 
 
 class InterleavedConversationTests(unittest.TestCase):
@@ -830,17 +737,10 @@ class ReasoningStreamingProxyTests(unittest.TestCase):
             "content": FINAL_CONTENT,
             "reasoning_content": "Need context.",
         }
-        cache_namespace = reasoning_cache_namespace(
-            self.proxy.server.config,
-            "deepseek-v4-pro",
-            {"type": "enabled"},
-            "high",
-            "Bearer sk-cursor-test",
-        )
         self.assertEqual(
             self.store.get(
                 "scope:"
-                + conversation_scope(request_messages, cache_namespace)
+                + conversation_scope(request_messages)
                 + ":signature:"
                 + message_signature(stored_message)
             ),
@@ -848,107 +748,6 @@ class ReasoningStreamingProxyTests(unittest.TestCase):
         )
 
 
-class StreamingToolRaceProxyTests(unittest.TestCase):
-    def setUp(self) -> None:
-        ToolCallStreamingBeforeDoneDeepSeekHandler.requests = []
-        self.upstream = ServerFixture(
-            ThreadingHTTPServer(
-                ("127.0.0.1", 0), ToolCallStreamingBeforeDoneDeepSeekHandler
-            )
-        ).start()
-        self.store = ReasoningStore(":memory:")
-        proxy = DeepSeekProxyServer(("127.0.0.1", 0), DeepSeekProxyHandler)
-        proxy.config = ProxyConfig(
-            upstream_base_url=self.upstream.url,
-            upstream_model="deepseek-v4-pro",
-        )
-        proxy.reasoning_store = self.store
-        self.proxy = ServerFixture(proxy).start()
-
-    def tearDown(self) -> None:
-        self.proxy.close()
-        self.upstream.close()
-        self.store.close()
-
-    def test_streaming_tool_reasoning_is_available_before_done(self) -> None:
-        request_messages = [{"role": "user", "content": "stream tool"}]
-        request = Request(
-            f"{self.proxy.url}/v1/chat/completions",
-            data=json.dumps(
-                {
-                    "model": "deepseek-v4-pro",
-                    "stream": True,
-                    "messages": request_messages,
-                    "tools": [
-                        {
-                            "type": "function",
-                            "function": {
-                                "name": "lookup",
-                                "parameters": {"type": "object", "properties": {}},
-                            },
-                        }
-                    ],
-                }
-            ).encode("utf-8"),
-            method="POST",
-            headers={
-                "Authorization": "Bearer sk-cursor-test",
-                "Content-Type": "application/json",
-            },
-        )
-
-        with urlopen(request, timeout=3) as response:
-            while True:
-                line = response.readline().decode("utf-8")
-                self.assertNotEqual(line, "")
-                if '"finish_reason":"tool_calls"' in line:
-                    break
-
-            status, payload = post_json(
-                f"{self.proxy.url}/v1/chat/completions",
-                {
-                    "model": "deepseek-v4-pro",
-                    "messages": [
-                        *request_messages,
-                        {
-                            "role": "assistant",
-                            "content": "",
-                            "tool_calls": [
-                                {
-                                    "id": "call_stream_tool",
-                                    "type": "function",
-                                    "function": {
-                                        "name": "lookup",
-                                        "arguments": "{}",
-                                    },
-                                }
-                            ],
-                        },
-                        {
-                            "role": "tool",
-                            "tool_call_id": "call_stream_tool",
-                            "content": "tool result",
-                        },
-                    ],
-                    "tools": [
-                        {
-                            "type": "function",
-                            "function": {
-                                "name": "lookup",
-                                "parameters": {"type": "object", "properties": {}},
-                            },
-                        }
-                    ],
-                },
-            )
-            response.read()
-
-        self.assertEqual(status, 200, payload)
-        self.assertEqual(
-            payload["choices"][0]["message"]["content"], "stream follow-up accepted"
-        )
-
-
 def first_cursor_request() -> dict:
     return {
         "model": "deepseek-v4-pro",
diff --git a/tests/test_reasoning_store.py b/tests/test_reasoning_store.py
index 9a00307..add2d18 100644
--- a/tests/test_reasoning_store.py
+++ b/tests/test_reasoning_store.py
@@ -5,7 +5,7 @@ import stat
 from tempfile import TemporaryDirectory
 import unittest
 
-from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_scope
+from deepseek_cursor_proxy.reasoning_store import ReasoningStore
 
 
 class ReasoningStoreTests(unittest.TestCase):
@@ -21,50 +21,6 @@ class ReasoningStoreTests(unittest.TestCase):
             self.assertTrue(reasoning_content_path.exists())
             self.assertEqual(stat.S_IMODE(reasoning_content_path.stat().st_mode), 0o600)
 
-    def test_store_prunes_to_max_rows_and_can_clear(self) -> None:
-        store = ReasoningStore(":memory:", max_rows=2)
-        try:
-            store.put("a", "reasoning a", {"role": "assistant"})
-            store.put("b", "reasoning b", {"role": "assistant"})
-            store.put("c", "reasoning c", {"role": "assistant"})
-
-            self.assertIsNone(store.get("a"))
-            self.assertEqual(store.get("b"), "reasoning b")
-            self.assertEqual(store.get("c"), "reasoning c")
-            self.assertEqual(store.clear(), 2)
-            self.assertIsNone(store.get("b"))
-            self.assertIsNone(store.get("c"))
-        finally:
-            store.close()
-
-    def test_empty_reasoning_content_is_stored_as_present_value(self) -> None:
-        store = ReasoningStore(":memory:")
-        try:
-            scope = conversation_scope([{"role": "user", "content": "lookup"}])
-            tool_call = {
-                "id": "call_empty",
-                "type": "function",
-                "function": {"name": "lookup", "arguments": "{}"},
-            }
-            message = {
-                "role": "assistant",
-                "content": "",
-                "reasoning_content": "",
-                "tool_calls": [tool_call],
-            }
-
-            self.assertGreater(store.store_assistant_message(message, scope), 0)
-            self.assertEqual(store.get(f"scope:{scope}:tool_call:call_empty"), "")
-            self.assertEqual(
-                store.lookup_for_message(
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-                    scope,
-                ),
-                "",
-            )
-        finally:
-            store.close()
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
index 26b7cd3..01ad47d 100644
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@@ -77,84 +77,6 @@ class StreamAccumulatorTests(unittest.TestCase):
         )
         store.close()
 
-    def test_stores_reasoning_when_choice_finishes_before_done(self) -> None:
-        store = ReasoningStore(":memory:")
-        accumulator = StreamAccumulator()
-        accumulator.ingest_chunk(
-            {
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {
-                            "role": "assistant",
-                            "reasoning_content": "Need a tool.",
-                            "tool_calls": [
-                                {
-                                    "index": 0,
-                                    "id": "call_stream",
-                                    "type": "function",
-                                    "function": {
-                                        "name": "lookup",
-                                        "arguments": "{}",
-                                    },
-                                }
-                            ],
-                        },
-                        "finish_reason": "tool_calls",
-                    }
-                ]
-            }
-        )
-
-        scope = conversation_scope([{"role": "user", "content": "lookup"}])
-        stored = accumulator.store_finished_reasoning(store, scope)
-
-        self.assertGreater(stored, 0)
-        self.assertEqual(
-            store.get(f"scope:{scope}:tool_call:call_stream"), "Need a tool."
-        )
-        self.assertEqual(accumulator.store_reasoning(store, scope), 0)
-        store.close()
-
-    def test_stores_empty_reasoning_content_when_stream_field_is_present(
-        self,
-    ) -> None:
-        store = ReasoningStore(":memory:")
-        accumulator = StreamAccumulator()
-        accumulator.ingest_chunk(
-            {
-                "choices": [
-                    {
-                        "index": 0,
-                        "delta": {
-                            "role": "assistant",
-                            "reasoning_content": "",
-                            "tool_calls": [
-                                {
-                                    "index": 0,
-                                    "id": "call_empty",
-                                    "type": "function",
-                                    "function": {
-                                        "name": "lookup",
-                                        "arguments": "{}",
-                                    },
-                                }
-                            ],
-                        },
-                        "finish_reason": "tool_calls",
-                    }
-                ]
-            }
-        )
-
-        scope = conversation_scope([{"role": "user", "content": "lookup"}])
-        stored = accumulator.store_finished_reasoning(store, scope)
-
-        self.assertGreater(stored, 0)
-        self.assertEqual(store.get(f"scope:{scope}:tool_call:call_empty"), "")
-        self.assertEqual(accumulator.messages()[0]["reasoning_content"], "")
-        store.close()
-
     def test_returns_accumulated_messages_for_logging(self) -> None:
         accumulator = StreamAccumulator()
         accumulator.ingest_chunk(
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 074862a..910be42 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -8,25 +8,11 @@ from deepseek_cursor_proxy.reasoning_store import ReasoningStore, conversation_s
 from deepseek_cursor_proxy.transform import (
     extract_text_content,
     prepare_upstream_request,
-    reasoning_cache_namespace,
     rewrite_response_body,
     strip_cursor_thinking_blocks,
 )
 
 
-DEFAULT_CONFIG = ProxyConfig()
-DEFAULT_CACHE_NAMESPACE = reasoning_cache_namespace(
-    DEFAULT_CONFIG,
-    "deepseek-v4-pro",
-    {"type": "enabled"},
-    "high",
-)
-
-
-def cache_scope(messages: list[dict]) -> str:
-    return conversation_scope(messages, DEFAULT_CACHE_NAMESPACE)
-
-
 class TransformTests(unittest.TestCase):
     def setUp(self) -> None:
         self.store = ReasoningStore(":memory:")
@@ -89,30 +75,19 @@ class TransformTests(unittest.TestCase):
         prepared = prepare_upstream_request(payload, config, self.store)
 
         self.assertEqual(prepared.original_model, "deepseek-v4-flash")
-        self.assertEqual(prepared.upstream_model, "deepseek-v4-flash")
-        self.assertEqual(prepared.payload["model"], "deepseek-v4-flash")
+        self.assertEqual(prepared.upstream_model, "deepseek-v4-pro")
+        self.assertEqual(prepared.payload["model"], "deepseek-v4-pro")
         self.assertEqual(prepared.payload["thinking"], {"type": "enabled"})
         self.assertEqual(prepared.payload["reasoning_effort"], "high")
         self.assertEqual(prepared.payload["max_tokens"], 123)
         self.assertEqual(prepared.payload["tools"][0]["type"], "function")
         self.assertEqual(
             prepared.payload["tool_choice"],
-            {"type": "function", "function": {"name": "lookup"}},
+            "auto",
         )
         self.assertNotIn("parallel_tool_calls", prepared.payload)
 
-    def test_uses_config_model_only_when_request_model_is_missing(self) -> None:
-        prepared = prepare_upstream_request(
-            {"messages": [{"role": "user", "content": "hi"}]},
-            ProxyConfig(upstream_model="deepseek-v4-flash"),
-            self.store,
-        )
-
-        self.assertEqual(prepared.original_model, "deepseek-v4-flash")
-        self.assertEqual(prepared.upstream_model, "deepseek-v4-flash")
-        self.assertEqual(prepared.payload["model"], "deepseek-v4-flash")
-
-    def test_preserves_required_tool_choice(self) -> None:
+    def test_normalizes_unsupported_required_tool_choice_to_auto(self) -> None:
         payload = {
             "model": "deepseek-v4-pro",
             "messages": [{"role": "user", "content": "call a tool"}],
@@ -122,25 +97,7 @@ class TransformTests(unittest.TestCase):
 
         prepared = prepare_upstream_request(payload, ProxyConfig(), self.store)
 
-        self.assertEqual(prepared.payload["tool_choice"], "required")
-
-    def test_preserves_named_tool_choice(self) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [{"role": "user", "content": "call lookup"}],
-            "tools": [{"type": "function", "function": {"name": "lookup"}}],
-            "tool_choice": {
-                "type": "function",
-                "function": {"name": "lookup"},
-            },
-        }
-
-        prepared = prepare_upstream_request(payload, ProxyConfig(), self.store)
-
-        self.assertEqual(
-            prepared.payload["tool_choice"],
-            {"type": "function", "function": {"name": "lookup"}},
-        )
+        self.assertEqual(prepared.payload["tool_choice"], "auto")
 
     def test_restores_reasoning_content_for_cached_tool_call(self) -> None:
         prior_messages = [{"role": "user", "content": "read README"}]
@@ -160,7 +117,7 @@ class TransformTests(unittest.TestCase):
             ],
         }
         self.store.store_assistant_message(
-            assistant_message, cache_scope(prior_messages)
+            assistant_message, conversation_scope(prior_messages)
         )
 
         payload = {
@@ -194,81 +151,6 @@ class TransformTests(unittest.TestCase):
             "Need the file contents before answering.",
         )
 
-    def test_accepts_empty_reasoning_content_when_present_for_tool_call(
-        self,
-    ) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "user", "content": "read README"},
-                {
-                    "role": "assistant",
-                    "content": "",
-                    "reasoning_content": "",
-                    "tool_calls": [
-                        {
-                            "id": "call_empty",
-                            "type": "function",
-                            "function": {
-                                "name": "read_file",
-                                "arguments": '{"path":"README.md"}',
-                            },
-                        }
-                    ],
-                },
-                {"role": "tool", "tool_call_id": "call_empty", "content": "file text"},
-            ],
-        }
-
-        prepared = prepare_upstream_request(payload, ProxyConfig(), self.store)
-
-        self.assertEqual(prepared.patched_reasoning_messages, 0)
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertIn("reasoning_content", prepared.payload["messages"][1])
-        self.assertEqual(prepared.payload["messages"][1]["reasoning_content"], "")
-
-    def test_restores_empty_reasoning_content_from_cache(self) -> None:
-        prior_messages = [{"role": "user", "content": "read README"}]
-        tool_call = {
-            "id": "call_empty",
-            "type": "function",
-            "function": {
-                "name": "read_file",
-                "arguments": '{"path":"README.md"}',
-            },
-        }
-        self.store.store_assistant_message(
-            {
-                "role": "assistant",
-                "content": "",
-                "reasoning_content": "",
-                "tool_calls": [tool_call],
-            },
-            cache_scope(prior_messages),
-        )
-
-        prepared = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    *prior_messages,
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-                    {
-                        "role": "tool",
-                        "tool_call_id": "call_empty",
-                        "content": "file text",
-                    },
-                ],
-            },
-            ProxyConfig(),
-            self.store,
-        )
-
-        self.assertEqual(prepared.patched_reasoning_messages, 1)
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertIn("reasoning_content", prepared.payload["messages"][1])
-        self.assertEqual(prepared.payload["messages"][1]["reasoning_content"], "")
-
     def test_restores_reasoning_content_for_cached_final_tool_turn_message(
         self,
     ) -> None:
@@ -297,7 +179,7 @@ class TransformTests(unittest.TestCase):
             "reasoning_content": "The tool result is enough to answer.",
         }
         self.store.store_assistant_message(
-            assistant_message, cache_scope(prior_messages)
+            assistant_message, conversation_scope(prior_messages)
         )
 
         payload = {
@@ -353,8 +235,8 @@ class TransformTests(unittest.TestCase):
         prior_a = [{"role": "user", "content": "thread A"}]
         prior_b = [{"role": "user", "content": "thread B"}]
 
-        self.store.store_assistant_message(assistant_a, cache_scope(prior_a))
-        self.store.store_assistant_message(assistant_b, cache_scope(prior_b))
+        self.store.store_assistant_message(assistant_a, conversation_scope(prior_a))
+        self.store.store_assistant_message(assistant_b, conversation_scope(prior_b))
 
         payload_a = {
             "model": "deepseek-v4-pro",
@@ -385,7 +267,7 @@ class TransformTests(unittest.TestCase):
 
     def test_exact_message_signature_wins_over_tool_call_id_fallback(self) -> None:
         prior = [{"role": "user", "content": "same conversation prefix"}]
-        scope = cache_scope(prior)
+        scope = conversation_scope(prior)
         first_tool_call = {
             "id": "call_reused",
             "type": "function",
@@ -454,7 +336,7 @@ class TransformTests(unittest.TestCase):
                 }
             ],
         }
-        self.store.store_assistant_message(assistant_message, cache_scope(prior))
+        self.store.store_assistant_message(assistant_message, conversation_scope(prior))
 
         payload = {
             "model": "deepseek-v4-pro",
@@ -504,7 +386,7 @@ class TransformTests(unittest.TestCase):
                 "reasoning_content": "Need to call the file tool.",
                 "tool_calls": [tool_call],
             },
-            cache_scope(prior),
+            conversation_scope(prior),
         )
 
         prepared = prepare_upstream_request(
@@ -530,7 +412,7 @@ class TransformTests(unittest.TestCase):
             "Need to call the file tool.",
         )
 
-    def test_reports_missing_reasoning_for_uncached_assistant_tool_call(self) -> None:
+    def test_adds_fallback_reasoning_for_uncached_assistant_tool_call(self) -> None:
         payload = {
             "model": "deepseek-v4-pro",
             "messages": [
@@ -560,10 +442,10 @@ class TransformTests(unittest.TestCase):
         prepared = prepare_upstream_request(payload, ProxyConfig(), self.store)
 
         self.assertEqual(prepared.patched_reasoning_messages, 0)
-        self.assertEqual(prepared.missing_reasoning_messages, 1)
-        self.assertNotIn("reasoning_content", prepared.payload["messages"][1])
+        self.assertEqual(prepared.fallback_reasoning_messages, 1)
+        self.assertIn("reasoning_content", prepared.payload["messages"][1])
 
-    def test_reports_missing_reasoning_for_uncached_assistant_after_tool_result(
+    def test_adds_fallback_reasoning_for_uncached_assistant_after_tool_result(
         self,
     ) -> None:
         payload = {
@@ -597,10 +479,10 @@ class TransformTests(unittest.TestCase):
 
         prepared = prepare_upstream_request(payload, ProxyConfig(), self.store)
 
-        self.assertEqual(prepared.missing_reasoning_messages, 1)
-        self.assertNotIn("reasoning_content", prepared.payload["messages"][3])
+        self.assertEqual(prepared.fallback_reasoning_messages, 1)
+        self.assertIn("reasoning_content", prepared.payload["messages"][3])
 
-    def test_does_not_report_missing_reasoning_for_plain_chat_history(self) -> None:
+    def test_does_not_add_fallback_reasoning_for_plain_chat_history(self) -> None:
         payload = {
             "model": "deepseek-v4-pro",
             "messages": [
@@ -612,86 +494,7 @@ class TransformTests(unittest.TestCase):
 
         prepared = prepare_upstream_request(payload, ProxyConfig(), self.store)
 
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertNotIn("reasoning_content", prepared.payload["messages"][1])
-
-    def test_does_not_repair_reasoning_when_thinking_is_disabled(self) -> None:
-        payload = {
-            "model": "deepseek-v4-pro",
-            "messages": [
-                {"role": "user", "content": "read README"},
-                {
-                    "role": "assistant",
-                    "content": "",
-                    "reasoning_content": "Should be removed in non-thinking mode.",
-                    "tool_calls": [
-                        {
-                            "id": "call_uncached",
-                            "type": "function",
-                            "function": {
-                                "name": "read_file",
-                                "arguments": '{"path":"README.md"}',
-                            },
-                        }
-                    ],
-                },
-                {
-                    "role": "tool",
-                    "tool_call_id": "call_uncached",
-                    "content": "file text",
-                },
-            ],
-        }
-
-        prepared = prepare_upstream_request(
-            payload, ProxyConfig(thinking="disabled"), self.store
-        )
-
-        self.assertEqual(prepared.missing_reasoning_messages, 0)
-        self.assertNotIn("reasoning_content", prepared.payload["messages"][1])
-
-    def test_reasoning_cache_is_namespaced_by_authorization(self) -> None:
-        config = ProxyConfig()
-        prior = [{"role": "user", "content": "read README"}]
-        namespace_a = reasoning_cache_namespace(
-            config,
-            config.upstream_model,
-            {"type": "enabled"},
-            "high",
-            "Bearer key-a",
-        )
-        tool_call = {
-            "id": "call_123",
-            "type": "function",
-            "function": {
-                "name": "read_file",
-                "arguments": '{"path":"README.md"}',
-            },
-        }
-        self.store.store_assistant_message(
-            {
-                "role": "assistant",
-                "content": "",
-                "reasoning_content": "Reasoning for key A.",
-                "tool_calls": [tool_call],
-            },
-            conversation_scope(prior, namespace_a),
-        )
-
-        prepared = prepare_upstream_request(
-            {
-                "model": "deepseek-v4-pro",
-                "messages": [
-                    *prior,
-                    {"role": "assistant", "content": "", "tool_calls": [tool_call]},
-                ],
-            },
-            config,
-            self.store,
-            authorization="Bearer key-b",
-        )
-
-        self.assertEqual(prepared.missing_reasoning_messages, 1)
+        self.assertEqual(prepared.fallback_reasoning_messages, 0)
         self.assertNotIn("reasoning_content", prepared.payload["messages"][1])
 
     def test_converted_function_message_uses_tool_schema(self) -> None:
@@ -758,35 +561,6 @@ class TransformTests(unittest.TestCase):
             "I need to inspect the repo.",
         )
 
-    def test_rewrite_response_preserves_prompt_cache_usage_fields(self) -> None:
-        body = json.dumps(
-            {
-                "id": "chatcmpl-test",
-                "object": "chat.completion",
-                "model": "deepseek-v4-pro",
-                "choices": [
-                    {
-                        "index": 0,
-                        "finish_reason": "stop",
-                        "message": {"role": "assistant", "content": "ok"},
-                    }
-                ],
-                "usage": {
-                    "prompt_tokens": 10,
-                    "prompt_cache_hit_tokens": 6,
-                    "prompt_cache_miss_tokens": 4,
-                    "completion_tokens": 1,
-                    "total_tokens": 11,
-                },
-            }
-        ).encode()
-
-        rewritten = rewrite_response_body(body, "deepseek-v4-flash", self.store, [])
-        payload = json.loads(rewritten)
-
-        self.assertEqual(payload["usage"]["prompt_cache_hit_tokens"], 6)
-        self.assertEqual(payload["usage"]["prompt_cache_miss_tokens"], 4)
-
 
 if __name__ == "__main__":
     unittest.main()