PaddleOCR+OpenCV detection visuals messed up #17450
Unanswered
4833R11Y45
asked this question in
Q&A
Replies: 0 comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
class Detection:
"""Represents a single OCR detection as a RECTANGLE (x_min, y_min, x_max, y_max)"""
text: str
bbox: Tuple[int, int, int, int] # axis-aligned rectangle!
confidence: float
tile_offset: Tuple[int, int]
def run_paddleocr_on_tile(
ocr_engine: PaddleOCR,
tile: np.ndarray,
tile_offset: Tuple[int, int],
debug: bool = False,
debug_all: bool = False
) -> List[Detection]:
"""
Run PaddleOCR 3.3.2 on a tile. Save all output as (x_min, y_min, x_max, y_max) rectangles.
"""
results = list(ocr_engine.predict(tile))
detections = []
if not results:
if debug: print(" [DEBUG] No results returned from PaddleOCR")
return []
result_obj = results[0]
res_dict = None
if hasattr(result_obj, 'json'):
json_dict = result_obj.json
res_dict = json_dict.get('res', {}) if isinstance(json_dict, dict) else {}
elif hasattr(result_obj, 'res'):
res_dict = result_obj.res
if not (isinstance(res_dict, dict) and 'dt_polys' in res_dict):
if debug: print(" [DEBUG] No dt_polys found")
return []
dt_polys = res_dict.get('dt_polys', [])
rec_texts = res_dict.get('rec_texts', [])
rec_scores = res_dict.get('rec_scores', [])
for i, poly in enumerate(dt_polys):
text = rec_texts[i] if i < len(rec_texts) else ""
conf = rec_scores[i] if i < len(rec_scores) else 1.0
if not text.strip():
continue
# Always use axis-aligned rectangle
points = np.array(poly, dtype=np.float32).reshape((-1, 2))
x_min, y_min = np.min(points, axis=0)
x_max, y_max = np.max(points, axis=0)
bbox = (int(x_min), int(y_min), int(x_max), int(y_max))
detections.append(
Detection(text=text, bbox=bbox, confidence=float(conf), tile_offset=tile_offset)
)
return detections
def visualize_detections(floorplan: np.ndarray,
ceiling_detections: List[Detection],
height_detections: List[Detection],
matches: List[CeilingMatch],
output_path: str):
vis_img = floorplan.copy()
for det in ceiling_detections:
x0, y0, x1, y1 = det.get_global_bbox()
cv2.rectangle(vis_img, (x0, y0), (x1, y1), (0, 255, 0), 2)
cv2.putText(vis_img, det.text, (x0, y0 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
for det in height_detections:
x0, y0, x1, y1 = det.get_global_bbox()
cv2.rectangle(vis_img, (x0, y0), (x1, y1), (255, 0, 0), 2)
cv2.putText(vis_img, det.text, (x0, y0 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
for match in matches:
cxy = match.ceiling_detection.get_global_center()
hxy = match.height_detection.get_global_center()
cv2.line(vis_img, (int(cxy[0]), int(cxy[1])), (int(hxy[0]), int(hxy[1])), (0, 255, 255), 2)
cv2.imwrite(output_path, cv2.cvtColor(vis_img, cv2.COLOR_RGB2BGR))
print(f" Saved visualization to {output_path}")
I am using PaddleOCR 3.2.2, I would be really thankful if anyone can help.
Beta Was this translation helpful? Give feedback.
All reactions