From 9f73b599ca283b66f98a4fba151fe8cc93d98320 Mon Sep 17 00:00:00 2001 From: Isaac Miller Date: Fri, 26 Sep 2025 14:15:57 -0400 Subject: [PATCH 1/6] Add failing test --- tests/adapters/test_json_adapter.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/adapters/test_json_adapter.py b/tests/adapters/test_json_adapter.py index 2acd4e63c3..2ea407c484 100644 --- a/tests/adapters/test_json_adapter.py +++ b/tests/adapters/test_json_adapter.py @@ -239,6 +239,35 @@ def test_json_adapter_parse_raise_error_on_mismatch_fields(): ) +# Issue (#8759) +def test_json_adapter_parse_handles_braces_inside_string_values(): + class CodeIssue(pydantic.BaseModel): + issue_type: str + severity_level: str + problem_code_snippet: str + + class CodeReview(dspy.Signature): + reasoning: str = dspy.OutputField(desc="Short chain-of-thought analysis") + issue_list: list[CodeIssue] = dspy.OutputField(desc="Detected issues") + + adapter = dspy.JSONAdapter() + completion = ( + "Here is the review output you asked for:\n\n" + "{\n" + " \"reasoning\": \"Inspecting the conditional reveals an unmatched brace.\",\n" + " \"issue_list\": [\n" + " {\n" + " \"issue_type\": \"style\",\n" + " \"severity_level\": \"fatal\",\n" + " \"problem_code_snippet\": \"if (user) {\"\n" + " }\n" + " ]\n" + "}\n" + ) + + adapter.parse(CodeReview, completion) + + def test_json_adapter_formats_image(): # Test basic image formatting image = dspy.Image(url="https://example.com/image.jpg") From d645dced736893986ab3bf2c122ee1484b7401fb Mon Sep 17 00:00:00 2001 From: Isaac Miller Date: Fri, 26 Sep 2025 14:17:24 -0400 Subject: [PATCH 2/6] ruff --- tests/adapters/test_json_adapter.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/adapters/test_json_adapter.py b/tests/adapters/test_json_adapter.py index 2ea407c484..d899872df7 100644 --- a/tests/adapters/test_json_adapter.py +++ b/tests/adapters/test_json_adapter.py @@ -254,12 +254,12 @@ class CodeReview(dspy.Signature): completion = ( "Here is the review output you asked for:\n\n" "{\n" - " \"reasoning\": \"Inspecting the conditional reveals an unmatched brace.\",\n" - " \"issue_list\": [\n" + ' "reasoning": "Inspecting the conditional reveals an unmatched brace.",\n' + ' "issue_list": [\n' " {\n" - " \"issue_type\": \"style\",\n" - " \"severity_level\": \"fatal\",\n" - " \"problem_code_snippet\": \"if (user) {\"\n" + ' "issue_type": "style",\n' + ' "severity_level": "fatal",\n' + ' "problem_code_snippet": "if (user) {"\n' " }\n" " ]\n" "}\n" From b4a62b87b17a938b017d373ea6918e98dd1bca1e Mon Sep 17 00:00:00 2001 From: Isaac Miller Date: Fri, 26 Sep 2025 14:59:00 -0400 Subject: [PATCH 3/6] Change regex parsing into state machine to find first complete json span --- dspy/adapters/json_adapter.py | 47 ++++++++++++++++++++++++++--- tests/adapters/test_json_adapter.py | 9 +++++- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/dspy/adapters/json_adapter.py b/dspy/adapters/json_adapter.py index ae534d778f..842cb89ddb 100644 --- a/dspy/adapters/json_adapter.py +++ b/dspy/adapters/json_adapter.py @@ -5,7 +5,6 @@ import json_repair import litellm import pydantic -import regex from pydantic.fields import FieldInfo from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName @@ -151,10 +150,9 @@ def format_assistant_message_content( return self.format_field_with_value(fields_with_values, role="assistant") def parse(self, signature: type[Signature], completion: str) -> dict[str, Any]: - pattern = r"\{(?:[^{}]|(?R))*\}" - match = regex.search(pattern, completion, regex.DOTALL) - if match: - completion = match.group(0) + extracted_object = _extract_first_json_object(completion) + if extracted_object: + completion = extracted_object fields = json_repair.loads(completion) if not isinstance(fields, dict): @@ -290,3 +288,42 @@ def enforce_required(schema_part: dict): pydantic_model.model_json_schema = lambda *args, **kwargs: schema return pydantic_model + +def _extract_first_json_object(text: str) -> str | None: + """Return the first balanced JSON object found in text or None if absent.""" + + in_string = False + escape = False + depth = 0 + start_idx: int | None = None + seen_lbrace = False + + for idx, char in enumerate(text): + if seen_lbrace and in_string: + if escape: + escape = False + elif char == "\\": + escape = True + elif char == '"': + in_string = False + continue + + if seen_lbrace and char == '"': + in_string = True + continue + + if char == '{': + if depth == 0: + start_idx = idx + seen_lbrace = True + depth += 1 + continue + + if char == '}': + if depth == 0 or start_idx is None: + continue + depth -= 1 + if depth == 0: + return text[start_idx : idx + 1] + + return None \ No newline at end of file diff --git a/tests/adapters/test_json_adapter.py b/tests/adapters/test_json_adapter.py index d899872df7..bd5e39ff1f 100644 --- a/tests/adapters/test_json_adapter.py +++ b/tests/adapters/test_json_adapter.py @@ -265,7 +265,14 @@ class CodeReview(dspy.Signature): "}\n" ) - adapter.parse(CodeReview, completion) + result = adapter.parse(CodeReview, completion) + + assert result["reasoning"] == "Inspecting the conditional reveals an unmatched brace." + assert len(result["issue_list"]) == 1 + issue = result["issue_list"][0] + assert issue.issue_type == "style" + assert issue.severity_level == "fatal" + assert issue.problem_code_snippet == "if (user) {" def test_json_adapter_formats_image(): From 0a135d566067af0c381958d70176de866051a9a3 Mon Sep 17 00:00:00 2001 From: Isaac Miller Date: Fri, 26 Sep 2025 15:08:29 -0400 Subject: [PATCH 4/6] ruff --- dspy/adapters/json_adapter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dspy/adapters/json_adapter.py b/dspy/adapters/json_adapter.py index 842cb89ddb..2b9a579d84 100644 --- a/dspy/adapters/json_adapter.py +++ b/dspy/adapters/json_adapter.py @@ -312,18 +312,18 @@ def _extract_first_json_object(text: str) -> str | None: in_string = True continue - if char == '{': + if char == "{": if depth == 0: start_idx = idx seen_lbrace = True depth += 1 continue - if char == '}': + if char == "}": if depth == 0 or start_idx is None: continue depth -= 1 if depth == 0: return text[start_idx : idx + 1] - return None \ No newline at end of file + return None From 9ad247e0c1d9a690f0f6f19717ccc67b407212ce Mon Sep 17 00:00:00 2001 From: Isaac Miller Date: Fri, 26 Sep 2025 15:17:41 -0400 Subject: [PATCH 5/6] Remove issue tracker comment --- tests/adapters/test_json_adapter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/adapters/test_json_adapter.py b/tests/adapters/test_json_adapter.py index bd5e39ff1f..46214071ea 100644 --- a/tests/adapters/test_json_adapter.py +++ b/tests/adapters/test_json_adapter.py @@ -239,7 +239,6 @@ def test_json_adapter_parse_raise_error_on_mismatch_fields(): ) -# Issue (#8759) def test_json_adapter_parse_handles_braces_inside_string_values(): class CodeIssue(pydantic.BaseModel): issue_type: str From f981c77fc9af69df38ee55230e78d68ab5e3fdfe Mon Sep 17 00:00:00 2001 From: Isaac Miller Date: Tue, 4 Nov 2025 14:45:15 -0500 Subject: [PATCH 6/6] move extractor to utils and add tests --- dspy/adapters/json_adapter.py | 40 +------------------------- dspy/adapters/utils.py | 39 ++++++++++++++++++++++++++ tests/adapters/test_adapter_utils.py | 42 +++++++++++++++++++++++++++- 3 files changed, 81 insertions(+), 40 deletions(-) diff --git a/dspy/adapters/json_adapter.py b/dspy/adapters/json_adapter.py index 2b9a579d84..302b4fd2f1 100644 --- a/dspy/adapters/json_adapter.py +++ b/dspy/adapters/json_adapter.py @@ -10,6 +10,7 @@ from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName from dspy.adapters.types.tool import ToolCalls from dspy.adapters.utils import ( + _extract_first_json_object, format_field_value, get_annotation_name, parse_value, @@ -288,42 +289,3 @@ def enforce_required(schema_part: dict): pydantic_model.model_json_schema = lambda *args, **kwargs: schema return pydantic_model - -def _extract_first_json_object(text: str) -> str | None: - """Return the first balanced JSON object found in text or None if absent.""" - - in_string = False - escape = False - depth = 0 - start_idx: int | None = None - seen_lbrace = False - - for idx, char in enumerate(text): - if seen_lbrace and in_string: - if escape: - escape = False - elif char == "\\": - escape = True - elif char == '"': - in_string = False - continue - - if seen_lbrace and char == '"': - in_string = True - continue - - if char == "{": - if depth == 0: - start_idx = idx - seen_lbrace = True - depth += 1 - continue - - if char == "}": - if depth == 0 or start_idx is None: - continue - depth -= 1 - if depth == 0: - return text[start_idx : idx + 1] - - return None diff --git a/dspy/adapters/utils.py b/dspy/adapters/utils.py index f38a77ee8a..63740933e9 100644 --- a/dspy/adapters/utils.py +++ b/dspy/adapters/utils.py @@ -280,3 +280,42 @@ def _quoted_string_for_literal_type_annotation(s: str) -> str: else: # Neither => enclose in single quotes return f"'{s}'" + +def _extract_first_json_object(text: str) -> str | None: + """Return the first balanced JSON object found in text or None if absent.""" + + in_string = False + escape = False + depth = 0 + start_idx: int | None = None + seen_lbrace = False + + for idx, char in enumerate(text): + if seen_lbrace and in_string: + if escape: + escape = False + elif char == "\\": + escape = True + elif char == '"': + in_string = False + continue + + if seen_lbrace and char == '"': + in_string = True + continue + + if char == "{": + if depth == 0: + start_idx = idx + seen_lbrace = True + depth += 1 + continue + + if char == "}": + if depth == 0 or start_idx is None: + continue + depth -= 1 + if depth == 0: + return text[start_idx : idx + 1] + + return None diff --git a/tests/adapters/test_adapter_utils.py b/tests/adapters/test_adapter_utils.py index d39ddcf8cb..cefcf65070 100644 --- a/tests/adapters/test_adapter_utils.py +++ b/tests/adapters/test_adapter_utils.py @@ -5,7 +5,7 @@ import pytest from pydantic import BaseModel -from dspy.adapters.utils import parse_value +from dspy.adapters.utils import _extract_first_json_object, parse_value class Profile(BaseModel): @@ -105,3 +105,43 @@ def test_parse_value_json_repair(): malformed = "not json or literal" with pytest.raises(Exception): parse_value(malformed, dict) + + +@pytest.mark.parametrize( + "text,expected", + [ + # JSON at the start of text + ('{"name": "John", "age": 30} and some trailing text', '{"name": "John", "age": 30}'), + # JSON in the middle of text + ('Here is your result: {"status": "success", "data": [1, 2, 3]} done', '{"status": "success", "data": [1, 2, 3]}'), + # JSON at the end of text + ('The answer is {"result": 42}', '{"result": 42}'), + # Nested JSON objects + ('Response: {"outer": {"inner": {"deep": "value"}}, "count": 5}', '{"outer": {"inner": {"deep": "value"}}, "count": 5}'), + # JSON with braces inside string values + ('{"message": "Use {placeholders} like {this}", "valid": true}', '{"message": "Use {placeholders} like {this}", "valid": true}'), + # JSON with escaped quotes in strings + ('{"quote": "She said \\"hello\\" to me"}', '{"quote": "She said \\"hello\\" to me"}'), + # No JSON present + ("This is just plain text without any JSON", None), + # Empty JSON object + ("Here is an empty object: {}", "{}"), + # Unbalanced braces (no valid JSON) + ("This has { an opening but no closing", None), + # Multiple JSON objects - should extract only the first + ('{"first": 1} and then {"second": 2}', '{"first": 1}'), + # JSON with newlines + ("""Here is the result: + { + "name": "Alice", + "scores": [95, 87, 92] + } + End of message""", """{ + "name": "Alice", + "scores": [95, 87, 92] + }"""), + ], +) +def test_extract_first_json_object(text, expected): + result = _extract_first_json_object(text) + assert result == expected