Tencent-Hunyuan · ppolariss · May 15, 2026
diff --git a/Hunyuan-OCR-master/utils.py b/Hunyuan-OCR-master/utils.py
@@ -6,6 +6,22 @@
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 
+_SPOTTING_COORD_PATTERN = re.compile(r'\((\d+),(\d+)\),\((\d+),(\d+)\)')
+
+
+def _iter_spotting_items(response: str):
+    """Yield text spans and coordinate groups from a spotting response."""
+    text_start = 0
+    for match in _SPOTTING_COORD_PATTERN.finditer(response):
+        text = response[text_start:match.start()].strip()
+        coord_matches = (
+            (match.group(1), match.group(2)),
+            (match.group(3), match.group(4)),
+        )
+        yield text, coord_matches, match
+        text_start = match.end()
+
+
 def clean_repeated_substrings(text):
     """Clean repeated substrings in text"""
     n = len(text)
@@ -69,18 +85,12 @@ def denormalize_coordinates(coord: Tuple[float, float], image_width: int, image_
 def process_spotting_response(response: str, image_width: int, image_height: int) -> str:
     """Process spotting task response and denormalize coordinates"""
     try:
-        # Find all text and coordinate pairs using regex
-        pattern = r'([^()]+)(\(\d+,\d+\),\(\d+,\d+\))'
-        matches = re.finditer(pattern, response)
-
-        new_response = response
-        for match in matches:
-            text = match.group(1).strip()
-            coords = match.group(2)
-
+        new_response_parts = []
+        last_end = 0
+        for _text, coord_matches, match in _iter_spotting_items(response):
+            new_response_parts.append(response[last_end:match.start()])
+
             # Parse the two coordinate points 
-            coord_pattern = r'\((\d+),(\d+)\)'
-            coord_matches = re.findall(coord_pattern, coords)
             if len(coord_matches) == 2:
                 start_coord = (float(coord_matches[0][0]), float(coord_matches[0][1]))
                 end_coord = (float(coord_matches[1][0]), float(coord_matches[1][1]))
@@ -91,11 +101,13 @@ def process_spotting_response(response: str, image_width: int, image_height: int
 
                 # Build new coordinate string
                 new_coords = f"({denorm_start[0]},{denorm_start[1]}),({denorm_end[0]},{denorm_end[1]})"
-
-                # Replace coordinates in original response
-                new_response = new_response.replace(coords, new_coords)
-
-        return new_response
+                new_response_parts.append(new_coords)
+            else:
+                new_response_parts.append(match.group(0))
+            last_end = match.end()
+
+        new_response_parts.append(response[last_end:])
+        return ''.join(new_response_parts)
 
     except Exception as e:
         print(f"Error processing response: {str(e)}")
@@ -116,19 +128,9 @@ def draw_text_detection_boxes(image: Image, response: str) -> Image:
     except IOError:
         font = ImageFont.load_default()
 
-    # Extract text and coordinates using regex
-    pattern = r'([^()]+)(\(\d+,\d+\),\(\d+,\d+\))'
-    matches = re.finditer(pattern, response)
-
-    for match in matches:
+    # Extract text and coordinates using bbox delimiters, so text may contain parentheses.
+    for text, coord_matches, _match in _iter_spotting_items(response):
         try:
-            text = match.group(1).strip()
-            coords = match.group(2)
-
-            # Parse coordinates
-            coord_pattern = r'\((\d+),(\d+)\)'
-            coord_matches = re.findall(coord_pattern, coords)
-
             if len(coord_matches) == 2:
                 x1, y1 = int(coord_matches[0][0]), int(coord_matches[0][1])
                 x2, y2 = int(coord_matches[1][0]), int(coord_matches[1][1])

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -0,0 +1,45 @@
+import importlib.util
+import unittest
+from pathlib import Path
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+UTILS_PATH = REPO_ROOT / "Hunyuan-OCR-master" / "utils.py"
+
+spec = importlib.util.spec_from_file_location("hyocr_utils", UTILS_PATH)
+utils = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(utils)
+
+
+class SpottingResponseParserTest(unittest.TestCase):
+    def test_spotting_text_can_contain_parentheses(self):
+        response = (
+            "ATLANTA (AP) - Human(43,500),(325,512)"
+            "they (detectives)(371,523),(653,534)"
+        )
+
+        items = list(utils._iter_spotting_items(response))
+
+        self.assertEqual(
+            [item[0] for item in items],
+            ["ATLANTA (AP) - Human", "they (detectives)"],
+        )
+
+    def test_process_spotting_response_preserves_parenthesized_text(self):
+        response = (
+            "ATLANTA (AP) - Human(43,500),(325,512)"
+            "they (detectives)(371,523),(653,534)"
+        )
+
+        processed = utils.process_spotting_response(
+            response,
+            image_width=2000,
+            image_height=1000,
+        )
+
+        self.assertIn("ATLANTA (AP) - Human(86,500),(650,512)", processed)
+        self.assertIn("they (detectives)(742,523),(1306,534)", processed)
+
+
+if __name__ == "__main__":
+    unittest.main()