Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions dspy/adapters/json_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
import json_repair
import litellm
import pydantic
import regex
from pydantic.fields import FieldInfo

from dspy.adapters.chat_adapter import ChatAdapter, FieldInfoWithName
from dspy.adapters.types.tool import ToolCalls
from dspy.adapters.utils import (
_extract_first_json_object,
format_field_value,
get_annotation_name,
parse_value,
Expand Down Expand Up @@ -151,10 +151,9 @@ def format_assistant_message_content(
return self.format_field_with_value(fields_with_values, role="assistant")

def parse(self, signature: type[Signature], completion: str) -> dict[str, Any]:
pattern = r"\{(?:[^{}]|(?R))*\}"
match = regex.search(pattern, completion, regex.DOTALL)
if match:
completion = match.group(0)
extracted_object = _extract_first_json_object(completion)
if extracted_object:
completion = extracted_object
fields = json_repair.loads(completion)

if not isinstance(fields, dict):
Expand Down
39 changes: 39 additions & 0 deletions dspy/adapters/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,42 @@ def _quoted_string_for_literal_type_annotation(s: str) -> str:
else:
# Neither => enclose in single quotes
return f"'{s}'"

def _extract_first_json_object(text: str) -> str | None:
"""Return the first balanced JSON object found in text or None if absent."""

in_string = False
escape = False
depth = 0
start_idx: int | None = None
seen_lbrace = False

for idx, char in enumerate(text):
if seen_lbrace and in_string:
if escape:
escape = False
elif char == "\\":
escape = True
elif char == '"':
in_string = False
continue

if seen_lbrace and char == '"':
in_string = True
continue

if char == "{":
if depth == 0:
start_idx = idx
seen_lbrace = True
depth += 1
continue

if char == "}":
if depth == 0 or start_idx is None:
continue
depth -= 1
if depth == 0:
return text[start_idx : idx + 1]

return None
42 changes: 41 additions & 1 deletion tests/adapters/test_adapter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
from pydantic import BaseModel

from dspy.adapters.utils import parse_value
from dspy.adapters.utils import _extract_first_json_object, parse_value


class Profile(BaseModel):
Expand Down Expand Up @@ -105,3 +105,43 @@ def test_parse_value_json_repair():
malformed = "not json or literal"
with pytest.raises(Exception):
parse_value(malformed, dict)


@pytest.mark.parametrize(
"text,expected",
[
# JSON at the start of text
('{"name": "John", "age": 30} and some trailing text', '{"name": "John", "age": 30}'),
# JSON in the middle of text
('Here is your result: {"status": "success", "data": [1, 2, 3]} done', '{"status": "success", "data": [1, 2, 3]}'),
# JSON at the end of text
('The answer is {"result": 42}', '{"result": 42}'),
# Nested JSON objects
('Response: {"outer": {"inner": {"deep": "value"}}, "count": 5}', '{"outer": {"inner": {"deep": "value"}}, "count": 5}'),
# JSON with braces inside string values
('{"message": "Use {placeholders} like {this}", "valid": true}', '{"message": "Use {placeholders} like {this}", "valid": true}'),
# JSON with escaped quotes in strings
('{"quote": "She said \\"hello\\" to me"}', '{"quote": "She said \\"hello\\" to me"}'),
# No JSON present
("This is just plain text without any JSON", None),
# Empty JSON object
("Here is an empty object: {}", "{}"),
# Unbalanced braces (no valid JSON)
("This has { an opening but no closing", None),
# Multiple JSON objects - should extract only the first
('{"first": 1} and then {"second": 2}', '{"first": 1}'),
# JSON with newlines
("""Here is the result:
{
"name": "Alice",
"scores": [95, 87, 92]
}
End of message""", """{
"name": "Alice",
"scores": [95, 87, 92]
}"""),
],
)
def test_extract_first_json_object(text, expected):
result = _extract_first_json_object(text)
assert result == expected
35 changes: 35 additions & 0 deletions tests/adapters/test_json_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,41 @@ def test_json_adapter_parse_raise_error_on_mismatch_fields():
)


def test_json_adapter_parse_handles_braces_inside_string_values():
class CodeIssue(pydantic.BaseModel):
issue_type: str
severity_level: str
problem_code_snippet: str

class CodeReview(dspy.Signature):
reasoning: str = dspy.OutputField(desc="Short chain-of-thought analysis")
issue_list: list[CodeIssue] = dspy.OutputField(desc="Detected issues")

adapter = dspy.JSONAdapter()
completion = (
"Here is the review output you asked for:\n\n"
"{\n"
' "reasoning": "Inspecting the conditional reveals an unmatched brace.",\n'
' "issue_list": [\n'
" {\n"
' "issue_type": "style",\n'
' "severity_level": "fatal",\n'
' "problem_code_snippet": "if (user) {"\n'
" }\n"
" ]\n"
"}\n"
)

result = adapter.parse(CodeReview, completion)

assert result["reasoning"] == "Inspecting the conditional reveals an unmatched brace."
assert len(result["issue_list"]) == 1
issue = result["issue_list"][0]
assert issue.issue_type == "style"
assert issue.severity_level == "fatal"
assert issue.problem_code_snippet == "if (user) {"


def test_json_adapter_formats_image():
# Test basic image formatting
image = dspy.Image(url="https://example.com/image.jpg")
Expand Down