Skip to content

Commit cf45f2a

Browse files
igerberclaude
andcommitted
Address AI review findings: content-first parsing, response helper, tests
P1: Relax status check — extract content first via _extract_response_text(), only fail when no usable content is found (not on status mismatch alone). P2: Extract response parsing into dedicated helper with unit tests covering multiple payload shapes (missing status, null status, SDK output_text, multiple output blocks). Add reasoning-model timeout hint to stderr. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a2e8883 commit cf45f2a

2 files changed

Lines changed: 126 additions & 23 deletions

File tree

.claude/scripts/openai_review.py

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,6 +1111,25 @@ def estimate_tokens(text: str) -> int:
11111111
return len(text) // 4
11121112

11131113

1114+
def _extract_response_text(result: dict) -> str:
1115+
"""Extract review text from a Responses API JSON payload.
1116+
1117+
Tries the top-level ``output_text`` convenience field first (populated by
1118+
the Python SDK but typically null in raw HTTP responses), then walks
1119+
``output[].content[]`` items. Returns an empty string when no text is
1120+
found so the caller can decide how to handle it.
1121+
"""
1122+
text = result.get("output_text") or ""
1123+
if text:
1124+
return text
1125+
for item in result.get("output", []):
1126+
if item.get("type") == "message":
1127+
for block in item.get("content", []):
1128+
if block.get("type") == "output_text":
1129+
text += block.get("text", "")
1130+
return text
1131+
1132+
11141133
def call_openai(
11151134
prompt: str,
11161135
model: str,
@@ -1188,31 +1207,23 @@ def call_openai(
11881207
print(f"Error: Network error — {e.reason}", file=sys.stderr)
11891208
sys.exit(1)
11901209

1191-
status = result.get("status")
1192-
if status != "completed":
1210+
content = _extract_response_text(result)
1211+
1212+
if not content.strip():
1213+
# No usable content — report the best diagnostic we have.
1214+
status = result.get("status", "<missing>")
11931215
detail = result.get("incomplete_details") or result.get("error") or ""
1194-
print(
1195-
f"Error: OpenAI response status is '{status}' (expected 'completed').",
1196-
file=sys.stderr,
1197-
)
1216+
if status not in ("completed", "<missing>"):
1217+
print(
1218+
f"Error: OpenAI response status is '{status}' with no review content.",
1219+
file=sys.stderr,
1220+
)
1221+
else:
1222+
print("Error: Empty review content from OpenAI API.", file=sys.stderr)
11981223
if detail:
11991224
print(f"Detail: {detail}", file=sys.stderr)
12001225
sys.exit(1)
12011226

1202-
# Extract text from output items (output_text is null in raw HTTP responses;
1203-
# the convenience property only exists in the Python SDK).
1204-
content = result.get("output_text") or ""
1205-
if not content:
1206-
for item in result.get("output", []):
1207-
if item.get("type") == "message":
1208-
for block in item.get("content", []):
1209-
if block.get("type") == "output_text":
1210-
content += block.get("text", "")
1211-
1212-
if not content.strip():
1213-
print("Error: Empty review content from OpenAI API.", file=sys.stderr)
1214-
sys.exit(1)
1215-
12161227
usage = result.get("usage", {})
12171228
return (content, usage)
12181229

@@ -1585,6 +1596,12 @@ def main() -> None:
15851596
sys.exit(0)
15861597

15871598
# Call OpenAI API
1599+
if _is_reasoning_model(args.model) and args.timeout == DEFAULT_TIMEOUT:
1600+
print(
1601+
f"Note: {args.model} is a reasoning model. Consider --timeout 900 "
1602+
"for large reviews.",
1603+
file=sys.stderr,
1604+
)
15881605
print(f"Sending review to {args.model}...", file=sys.stderr)
15891606
print(f"Estimated input tokens: ~{est_tokens:,}", file=sys.stderr)
15901607
if cost_str:

tests/test_openai_review.py

Lines changed: 89 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,6 +1572,37 @@ def test_pro_snapshot_matches_pro(self, review_mod):
15721572
assert snapshot == base
15731573

15741574

1575+
class TestExtractResponseText:
1576+
def test_prefers_output_text_field(self, review_mod):
1577+
result = {"output_text": "Direct text.", "output": []}
1578+
assert review_mod._extract_response_text(result) == "Direct text."
1579+
1580+
def test_walks_output_items_when_output_text_null(self, review_mod):
1581+
result = {
1582+
"output_text": None,
1583+
"output": [{"type": "message", "content": [
1584+
{"type": "output_text", "text": "Walked text."},
1585+
]}],
1586+
}
1587+
assert review_mod._extract_response_text(result) == "Walked text."
1588+
1589+
def test_concatenates_multiple_blocks(self, review_mod):
1590+
result = {
1591+
"output_text": None,
1592+
"output": [{"type": "message", "content": [
1593+
{"type": "output_text", "text": "A"},
1594+
{"type": "output_text", "text": "B"},
1595+
]}],
1596+
}
1597+
assert review_mod._extract_response_text(result) == "AB"
1598+
1599+
def test_empty_when_no_output(self, review_mod):
1600+
assert review_mod._extract_response_text({"output_text": None, "output": []}) == ""
1601+
1602+
def test_empty_when_missing_keys(self, review_mod):
1603+
assert review_mod._extract_response_text({}) == ""
1604+
1605+
15751606
class TestResponsesAPIConstants:
15761607
def test_endpoint_is_responses(self, review_mod):
15771608
assert "responses" in review_mod.ENDPOINT
@@ -1652,8 +1683,63 @@ def test_timeout_passed_through(self, review_mod, mock_urlopen):
16521683
review_mod.call_openai("test", "gpt-5.4", "fake-key", timeout=900)
16531684
assert mock_urlopen["timeout"] == 900
16541685

1655-
def test_incomplete_status_exits(self, review_mod, mock_urlopen):
1656-
"""Non-completed status should cause sys.exit."""
1686+
def test_missing_status_with_valid_output_succeeds(self, review_mod, mock_urlopen):
1687+
"""Valid content should be accepted even when status field is absent."""
1688+
mock_urlopen["response_data"] = {
1689+
"output_text": None,
1690+
"output": [{
1691+
"type": "message",
1692+
"content": [{"type": "output_text", "text": "Good review."}],
1693+
}],
1694+
"usage": {"input_tokens": 10, "output_tokens": 5},
1695+
}
1696+
content, _ = review_mod.call_openai("test", "gpt-5.4", "fake-key")
1697+
assert content == "Good review."
1698+
1699+
def test_status_none_with_valid_output_succeeds(self, review_mod, mock_urlopen):
1700+
"""status=None should not prevent content extraction."""
1701+
mock_urlopen["response_data"] = {
1702+
"status": None,
1703+
"output_text": None,
1704+
"output": [{
1705+
"type": "message",
1706+
"content": [{"type": "output_text", "text": "Good review."}],
1707+
}],
1708+
"usage": {"input_tokens": 10, "output_tokens": 5},
1709+
}
1710+
content, _ = review_mod.call_openai("test", "gpt-5.4", "fake-key")
1711+
assert content == "Good review."
1712+
1713+
def test_output_text_convenience_field_used(self, review_mod, mock_urlopen):
1714+
"""When output_text is populated (SDK-style), use it directly."""
1715+
mock_urlopen["response_data"] = {
1716+
"status": "completed",
1717+
"output_text": "SDK-provided text.",
1718+
"output": [],
1719+
"usage": {"input_tokens": 10, "output_tokens": 5},
1720+
}
1721+
content, _ = review_mod.call_openai("test", "gpt-5.4", "fake-key")
1722+
assert content == "SDK-provided text."
1723+
1724+
def test_multiple_output_text_blocks_concatenated(self, review_mod, mock_urlopen):
1725+
"""Multiple output_text blocks should be concatenated in order."""
1726+
mock_urlopen["response_data"] = {
1727+
"status": "completed",
1728+
"output_text": None,
1729+
"output": [{
1730+
"type": "message",
1731+
"content": [
1732+
{"type": "output_text", "text": "Part 1. "},
1733+
{"type": "output_text", "text": "Part 2."},
1734+
],
1735+
}],
1736+
"usage": {"input_tokens": 10, "output_tokens": 5},
1737+
}
1738+
content, _ = review_mod.call_openai("test", "gpt-5.4", "fake-key")
1739+
assert content == "Part 1. Part 2."
1740+
1741+
def test_failed_status_no_content_exits(self, review_mod, mock_urlopen):
1742+
"""Failed status with no usable content should exit."""
16571743
mock_urlopen["response_data"] = {
16581744
"status": "failed",
16591745
"output_text": None,
@@ -1664,7 +1750,7 @@ def test_incomplete_status_exits(self, review_mod, mock_urlopen):
16641750
review_mod.call_openai("test", "gpt-5.4", "fake-key")
16651751

16661752
def test_empty_output_exits(self, review_mod, mock_urlopen):
1667-
"""Empty output items should cause sys.exit."""
1753+
"""Empty output items with completed status should exit."""
16681754
mock_urlopen["response_data"] = {
16691755
"status": "completed",
16701756
"output_text": None,

0 commit comments

Comments
 (0)