fix: normalize numeric JSON Schema constraints (minLength/minimum/etc.)

- Add _coerce_numeric_constraint() to fix string-valued numeric constraints
- Add normalize_tool_schema() to strip anyOf/const/$schema/patternProperties
- Log warning when a constraint cannot be coerced (helps diagnose bad schemas)
- Fixes DeepSeek 400 '"1" is not of type integer' for GitNexus MCP tools

Closes: N/A (internal fix)
main
bigemon 2026-05-16 14:27:46 +08:00
parent ea3da01417
commit 5d294c5c41
1 changed files with 108 additions and 1 deletions

View File

@ -184,6 +184,104 @@ def normalize_tool_call(tool_call: Any) -> dict[str, Any]:
return normalized return normalized
def _coerce_numeric_constraint(prop_schema: dict) -> dict:
"""Fix JSON Schema numeric constraints that are strings instead of numbers.
DeepSeek requires minLength/maxLength/minimum/maximum/etc. to be actual
numbers, not strings. Schemas generated by some MCP servers export them
as strings (e.g. "minLength": "1" instead of "minLength": 1).
This function catches and coerces those fields.
"""
NUMERIC_FIELDS = (
("minimum", float), ("maximum", float),
("exclusiveMinimum", float), ("exclusiveMaximum", float),
("minLength", int), ("maxLength", int),
("minItems", int), ("maxItems", int),
("minProperties", int), ("maxProperties", int),
("multipleOf", float),
("timeout", int), ("timeoutMs", int),
("crossDepth", int), ("maxDepth", int),
)
result = dict(prop_schema)
for field, type_fn in NUMERIC_FIELDS:
val = result.get(field)
if val is None:
continue
if isinstance(val, (int, float)):
continue
try:
result[field] = type_fn(val)
except (ValueError, TypeError):
result.pop(field, None)
LOG.warning(
"Dropping unparseable numeric constraint %s=%r on field %s "
"(expected %s, got %r)",
field, val, prop_name, type_fn.__name__.replace("float", "number").replace("int", "integer"), type(val).__name__,
)
return result
def normalize_tool_schema(schema: Any) -> dict[str, Any]:
"""Strip DeepSeek-incompatible JSON Schema constructs from tool parameters.
Removes/changes:
- anyOf / allOf / oneOf (DeepSeek rejects compound schemas)
- $schema meta fields / const / patternProperties / definitions / $ref
- numeric constraints that are strings instead of numbers (minLength etc.)
- enum values whose type doesn't match the declared schema type
"""
if not isinstance(schema, dict):
return {}
result = dict(schema)
# Remove DeepSeek-unsupported keywords
for bad_key in ("anyOf", "allOf", "oneOf", "$schema", "const",
"patternProperties", "definitions", "$ref"):
result.pop(bad_key, None)
# Recursively clean nested schemas in properties
properties = result.get("properties")
if isinstance(properties, dict):
for prop_name, prop_schema in properties.items():
if isinstance(prop_schema, dict):
# 1. Coerce string numeric constraints to real numbers
cleaned = _coerce_numeric_constraint(prop_schema)
# 2. Recurse into nested schemas
nested = normalize_tool_schema(cleaned)
result["properties"][prop_name] = nested
# 3. Fix enum type mismatches
declared_type = nested.get("type")
enum_values = nested.get("enum")
if enum_values and declared_type in ("integer", "number", "boolean"):
converted = []
for v in enum_values:
try:
if declared_type == "integer":
converted.append(int(v))
elif declared_type == "number":
converted.append(float(v))
elif declared_type == "boolean":
converted.append(bool(v) if not isinstance(v, str) else
v.lower() in ("true", "1", "yes"))
except (ValueError, TypeError):
converted.append(v)
result["properties"][prop_name] = dict(nested)
result["properties"][prop_name]["enum"] = converted
# Recurse into items schema
if "items" in result and isinstance(result["items"], dict):
result["items"] = normalize_tool_schema(result["items"])
# Recurse into additionalProperties schema
ap = result.get("additionalProperties")
if isinstance(ap, dict):
result["additionalProperties"] = normalize_tool_schema(ap)
return result
def normalize_tool(tool: Any) -> dict[str, Any]: def normalize_tool(tool: Any) -> dict[str, Any]:
if not isinstance(tool, dict): if not isinstance(tool, dict):
return { return {
@ -194,7 +292,11 @@ def normalize_tool(tool: Any) -> dict[str, Any]:
normalized["type"] = normalized.get("type") or "function" normalized["type"] = normalized.get("type") or "function"
function = normalized.get("function") function = normalized.get("function")
if isinstance(function, dict): if isinstance(function, dict):
normalized["function"] = function # Apply schema normalization to fix DeepSeek-incompatible constructs
params = function.get("parameters", {})
if isinstance(params, dict):
normalized["function"] = dict(function)
normalized["function"]["parameters"] = normalize_tool_schema(params)
return normalized return normalized
@ -771,6 +873,11 @@ def prepare_upstream_request(
if "tools" in prepared and isinstance(prepared["tools"], list): if "tools" in prepared and isinstance(prepared["tools"], list):
prepared["tools"] = [normalize_tool(tool) for tool in prepared["tools"]] prepared["tools"] = [normalize_tool(tool) for tool in prepared["tools"]]
# DEBUG: log the mcp_gitnexus_context schema after normalization
for t in prepared["tools"]:
fname = t.get("function", {}).get("name", "")
if "gitnexus_context" in fname:
LOG.debug(f"[NORMALIZED] {fname} params: {t.get('function', {}).get('parameters', {})}")
elif isinstance(payload.get("functions"), list): elif isinstance(payload.get("functions"), list):
prepared["tools"] = [ prepared["tools"] = [
legacy_function_to_tool(function) for function in payload["functions"] legacy_function_to_tool(function) for function in payload["functions"]