From 5d294c5c412a7f41d97bc9bfb74dbbe133c8cd2e Mon Sep 17 00:00:00 2001 From: bigemon Date: Sat, 16 May 2026 14:27:46 +0800 Subject: [PATCH] fix: normalize numeric JSON Schema constraints (minLength/minimum/etc.) - Add _coerce_numeric_constraint() to fix string-valued numeric constraints - Add normalize_tool_schema() to strip anyOf/const/$schema/patternProperties - Log warning when a constraint cannot be coerced (helps diagnose bad schemas) - Fixes DeepSeek 400 '"1" is not of type integer' for GitNexus MCP tools Closes: N/A (internal fix) --- src/deepseek_cursor_proxy/transform.py | 109 ++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/src/deepseek_cursor_proxy/transform.py b/src/deepseek_cursor_proxy/transform.py index 0053d98..9de68ef 100644 --- a/src/deepseek_cursor_proxy/transform.py +++ b/src/deepseek_cursor_proxy/transform.py @@ -184,6 +184,104 @@ def normalize_tool_call(tool_call: Any) -> dict[str, Any]: return normalized +def _coerce_numeric_constraint(prop_schema: dict) -> dict: + """Fix JSON Schema numeric constraints that are strings instead of numbers. + + DeepSeek requires minLength/maxLength/minimum/maximum/etc. to be actual + numbers, not strings. Schemas generated by some MCP servers export them + as strings (e.g. "minLength": "1" instead of "minLength": 1). + This function catches and coerces those fields. + """ + NUMERIC_FIELDS = ( + ("minimum", float), ("maximum", float), + ("exclusiveMinimum", float), ("exclusiveMaximum", float), + ("minLength", int), ("maxLength", int), + ("minItems", int), ("maxItems", int), + ("minProperties", int), ("maxProperties", int), + ("multipleOf", float), + ("timeout", int), ("timeoutMs", int), + ("crossDepth", int), ("maxDepth", int), + ) + result = dict(prop_schema) + for field, type_fn in NUMERIC_FIELDS: + val = result.get(field) + if val is None: + continue + if isinstance(val, (int, float)): + continue + try: + result[field] = type_fn(val) + except (ValueError, TypeError): + result.pop(field, None) + LOG.warning( + "Dropping unparseable numeric constraint %s=%r on field %s " + "(expected %s, got %r)", + field, val, prop_name, type_fn.__name__.replace("float", "number").replace("int", "integer"), type(val).__name__, + ) + return result + + +def normalize_tool_schema(schema: Any) -> dict[str, Any]: + """Strip DeepSeek-incompatible JSON Schema constructs from tool parameters. + + Removes/changes: + - anyOf / allOf / oneOf (DeepSeek rejects compound schemas) + - $schema meta fields / const / patternProperties / definitions / $ref + - numeric constraints that are strings instead of numbers (minLength etc.) + - enum values whose type doesn't match the declared schema type + """ + if not isinstance(schema, dict): + return {} + + result = dict(schema) + + # Remove DeepSeek-unsupported keywords + for bad_key in ("anyOf", "allOf", "oneOf", "$schema", "const", + "patternProperties", "definitions", "$ref"): + result.pop(bad_key, None) + + # Recursively clean nested schemas in properties + properties = result.get("properties") + if isinstance(properties, dict): + for prop_name, prop_schema in properties.items(): + if isinstance(prop_schema, dict): + # 1. Coerce string numeric constraints to real numbers + cleaned = _coerce_numeric_constraint(prop_schema) + # 2. Recurse into nested schemas + nested = normalize_tool_schema(cleaned) + result["properties"][prop_name] = nested + + # 3. Fix enum type mismatches + declared_type = nested.get("type") + enum_values = nested.get("enum") + if enum_values and declared_type in ("integer", "number", "boolean"): + converted = [] + for v in enum_values: + try: + if declared_type == "integer": + converted.append(int(v)) + elif declared_type == "number": + converted.append(float(v)) + elif declared_type == "boolean": + converted.append(bool(v) if not isinstance(v, str) else + v.lower() in ("true", "1", "yes")) + except (ValueError, TypeError): + converted.append(v) + result["properties"][prop_name] = dict(nested) + result["properties"][prop_name]["enum"] = converted + + # Recurse into items schema + if "items" in result and isinstance(result["items"], dict): + result["items"] = normalize_tool_schema(result["items"]) + + # Recurse into additionalProperties schema + ap = result.get("additionalProperties") + if isinstance(ap, dict): + result["additionalProperties"] = normalize_tool_schema(ap) + + return result + + def normalize_tool(tool: Any) -> dict[str, Any]: if not isinstance(tool, dict): return { @@ -194,7 +292,11 @@ def normalize_tool(tool: Any) -> dict[str, Any]: normalized["type"] = normalized.get("type") or "function" function = normalized.get("function") if isinstance(function, dict): - normalized["function"] = function + # Apply schema normalization to fix DeepSeek-incompatible constructs + params = function.get("parameters", {}) + if isinstance(params, dict): + normalized["function"] = dict(function) + normalized["function"]["parameters"] = normalize_tool_schema(params) return normalized @@ -771,6 +873,11 @@ def prepare_upstream_request( if "tools" in prepared and isinstance(prepared["tools"], list): prepared["tools"] = [normalize_tool(tool) for tool in prepared["tools"]] + # DEBUG: log the mcp_gitnexus_context schema after normalization + for t in prepared["tools"]: + fname = t.get("function", {}).get("name", "") + if "gitnexus_context" in fname: + LOG.debug(f"[NORMALIZED] {fname} params: {t.get('function', {}).get('parameters', {})}") elif isinstance(payload.get("functions"), list): prepared["tools"] = [ legacy_function_to_tool(function) for function in payload["functions"]