canonical · p-gentili · Jan 23, 2026 · Nov 11, 2025 · Nov 11, 2025 · Oct 28, 2025
diff --git a/tests/keyword_suite/video_text_test.robot b/tests/keyword_suite/video_text_test.robot
@@ -78,7 +78,7 @@ Test Keyword Find Text
     ${length}=              Get Length              ${matched_text}
     Should Be True          ${length} > 1
     Should Be Equal As Strings                      ${matched_text[0]['text']}                      AB123cd
-    Should Be Equal As Numbers                      ${matched_text[0]['confidence']}                100.0
+    Should Be Equal As Numbers                      ${matched_text[0]['similarity']}                100.0
 
     ${matched_text}=        Find Text               regex:${REGEX}          region=${REGEX_REGION}
     ${count}=               Set Variable            0

diff --git a/yarf/rf_libraries/libraries/ocr/rapidocr.py b/yarf/rf_libraries/libraries/ocr/rapidocr.py
@@ -10,6 +10,7 @@
 import rapidfuzz
 from PIL import Image
 from rapidocr import RapidOCR
+from robot.api import logger
 
 from yarf.rf_libraries.libraries.geometry.quad import Quad
 from yarf.vendor.RPA.core.geometry import Region
@@ -24,7 +25,7 @@ class OCRResult:
     Attributes:
         position: Quadrilateral region of the match.
         text: Text found in the match.
-        confidence: Confidence of the match
+        confidence: Estimated probability that the recognized text is correct.
     """
 
     position: Quad
@@ -44,12 +45,18 @@ class RapidOCRReader:
     This class is a singleton to avoid loading the model multiple times.
 
     Attributes:
-        DEFAULT_CONFIDENCE: Default confidence for text detection.
-        DEFAULT_COINCIDENCE: Default coincidence for text similarities.
+        DEFAULT_SIMILARITY_THRESHOLD: Minimum similarity percentage (0-100) for
+         text matching. If the similarity between the found text and the target
+         text is below this threshold, the match is discarded.
+        DEFAULT_CONFIDENCE_THRESHOLD: Minumum confidence percentage (0-100) for
+          text matching. If the confidence of the found text is below this
+          threshold, the match is discarded.
+        SIMILARITY_LOG_THRESHOLD: Minimum similarity to log rejected matches.
     """
 
-    DEFAULT_CONFIDENCE: float = 0.7
-    DEFAULT_COINCIDENCE: float = 80.0
+    DEFAULT_SIMILARITY_THRESHOLD: float = 80.0
+    DEFAULT_CONFIDENCE_THRESHOLD: float = 70.0
+    SIMILARITY_LOG_THRESHOLD: float = 80.0
 
     def __new__(cls) -> "RapidOCRReader":
         if not hasattr(cls, "instance"):
@@ -82,8 +89,8 @@ def find(
         self,
         image: Image.Image | Path,
         text: str,
-        confidence: float = DEFAULT_CONFIDENCE,
-        coincidence: float = DEFAULT_COINCIDENCE,
+        similarity_threshold: float = DEFAULT_SIMILARITY_THRESHOLD,
+        confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
         region: Region | None = None,
         partial: bool = True,
     ) -> list[dict]:
@@ -94,8 +101,12 @@ def find(
         Args:
             image: Path to image or Image object.
             text: Text to find in image.
-            confidence: Minimum confidence for text detection.
-            coincidence: Minimum coincidence for text similarities.
+            similarity_threshold: Minimum similarity percentage (0-100) for
+              text matching. If the similarity between the found text and the
+              target text is below this threshold, the match is discarded.
+            confidence_threshold: Minimum confidence percentage (0-100) for
+              text matching. If the confidence of the found text is below this
+              threshold, the match is discarded.
             region: Limit the region of the screen where to look.
             partial: Use partial matching.
 
@@ -123,9 +134,12 @@ def find(
                 ocr_output.boxes, ocr_output.txts, ocr_output.scores
             )
         ]
+        # Multiply the item confidence with 100 to convert it to percentage
+        for item in result:
+            item.confidence *= 100
 
         matches = self.get_matches(
-            result, text, confidence, coincidence, partial
+            result, text, similarity_threshold, confidence_threshold, partial
         )
 
         if region is not None:
@@ -138,8 +152,8 @@ def get_matches(
         self,
         result: list[OCRResult],
         match_text: str,
-        confidence: float,
-        coincidence: float,
+        similarity_threshold: float,
+        confidence_threshold: float,
         partial: bool,
     ) -> list[dict]:
         """
@@ -148,8 +162,12 @@ def get_matches(
         Args:
             result: List with the OCR results.
             match_text: Text to match.
-            confidence: Minimum confidence for text detection.
-            coincidence: Minimum coincidence for text similarities.
+            similarity_threshold: Minimum similarity percentage (0-100) for
+              text matching. If the similarity between the found text and the
+              target text is below this threshold, the match is discarded.
+            confidence_threshold: Minimum confidence percentage (0-100) for
+              text matching. If the confidence of the found text is below this
+              threshold, the match is discarded.
             partial: Use partial matching.
 
         Returns:
@@ -187,17 +205,27 @@ def directional_ratio(q: str, text: str) -> float:
 
         matches = []
         for item in result:
-            ratio = (
+            similarity = (
                 directional_ratio(match_text, item.text)
                 if partial
                 else rapidfuzz.fuzz.ratio(item.text, match_text)
             )
-            if ratio >= coincidence and item.confidence >= confidence:
+            if (
+                similarity >= similarity_threshold
+                and item.confidence >= confidence_threshold
+            ):
                 matches.append(
                     {
                         "text": item.text,
                         "region": item.position.to_region(),
-                        "confidence": ratio,  # Using the ratio like tesseract
+                        "similarity": similarity,
+                        "confidence": item.confidence,
                     }
                 )
-        return sorted(matches, key=lambda x: x["confidence"], reverse=True)
+            elif similarity >= self.SIMILARITY_LOG_THRESHOLD:
+                logger.debug(
+                    f"Rejected match for text '{match_text}' "
+                    f"with similarity {similarity} "
+                    f"and confidence {item.confidence}: '{item.text}'"
+                )
-            elif similarity >= self.SIMILARITY_LOG_THRESHOLD:
-                logger.debug(
-                    f"Rejected match for text '{match_text}' "
-                    f"with similarity {similarity} "
-                    f"and confidence {item.confidence}: '{item.text}'"
-                )
+            elif (
+                similarity >= self.SIMILARITY_LOG_THRESHOLD
+                and item.confidence >= self.CONFIDENCE_LOG_THRESHOLD
+            ):
+                logger.debug(
+                    f"Rejected match for text '{match_text}' "
+                    f"with similarity {similarity} "
+                    f"and confidence {item.confidence}: '{item.text}'"
+                )
-            elif similarity >= self.SIMILARITY_LOG_THRESHOLD:
-                logger.debug(
-                    f"Rejected match for text '{match_text}' "
-                    f"with similarity {similarity} "
-                    f"and confidence {item.confidence}: '{item.text}'"
-                )
+            elif (
+                similarity >= self.SIMILARITY_LOG_THRESHOLD
+                and item.confidence >= self.CONFIDENCE_LOG_THRESHOLD
+            ):
+                logger.debug(
+                    f"Rejected match for text '{match_text}' "
+                    f"with similarity {similarity} "
+                    f"and confidence {item.confidence}: '{item.text}'"
+                )
+        return sorted(matches, key=lambda x: x["similarity"], reverse=True)
diff --git a/yarf/rf_libraries/libraries/ocr/tests/test_rapidocr.py b/yarf/rf_libraries/libraries/ocr/tests/test_rapidocr.py
@@ -29,6 +29,7 @@ def mock_to_image():
 @pytest.fixture(autouse=True)
 def mock_reader():
     with patch("yarf.rf_libraries.libraries.ocr.rapidocr.RapidOCR") as p:
+        p.SIMILARITY_LOG_THRESHOLD = RapidOCRReader.SIMILARITY_LOG_THRESHOLD
         yield p
 
 
@@ -41,7 +42,7 @@ def test_read(self, mock_reader):
         mock_reader.reader.return_value = MockRapidOCROutput(
             boxes=np.array([[[0, 0], [0, 0], [0, 0], [0, 0]]]),
             txts=("Hello", "World"),
-            scores=(0.9, 0.8),
+            scores=(90, 80),
         )
         result = RapidOCRReader.read(mock_reader, None)
 
@@ -55,18 +56,24 @@ def test_find(self, mock_reader):
         mock_reader.reader.return_value = MockRapidOCROutput(
             boxes=np.array([[[0, 0], [0, 0], [0, 0], [0, 0]]]),
             txts=("Hello",),
-            scores=(0.9,),
+            scores=(90,),
         )
         mock_reader.get_matches.return_value = [
-            {"text": "Hello", "region": Region(0, 0, 1, 1), "confidence": 100}
+            {
+                "text": "Hello",
+                "region": Region(0, 0, 1, 1),
+                "confidence": 90,
+                "similarity": 100,
+            }
         ]
         result = RapidOCRReader.find(mock_reader, None, "Hello")
 
         assert result == [
             {
                 "text": "Hello",
                 "region": Region(0, 0, 1, 1),
-                "confidence": 100,
+                "confidence": 90,
+                "similarity": 100,
             }
         ]
 
@@ -87,10 +94,15 @@ def test_find_in_region(self, mock_to_image, mock_reader):
         mock_reader.reader.return_value = MockRapidOCROutput(
             boxes=np.array([[[0, 0], [1, 0], [1, 1], [0, 1]]]),
             txts=("Hello World",),
-            scores=(0.9,),
+            scores=(90,),
         )
         mock_reader.get_matches.return_value = [
-            {"text": "Hello", "region": Region(0, 0, 1, 1), "confidence": 100}
+            {
+                "text": "Hello",
+                "region": Region(0, 0, 1, 1),
+                "confidence": 90,
+                "similarity": 100,
+            }
         ]
         result = RapidOCRReader.find(
             mock_reader, None, "Hello", region=Region(0, 0, 1, 1)
@@ -100,50 +112,53 @@ def test_find_in_region(self, mock_to_image, mock_reader):
             {
                 "text": "Hello",
                 "region": Region(0, 0, 1, 1),
-                "confidence": 100,
+                "confidence": 90,
+                "similarity": 100,
             }
         ]
 
     def test_get_matches(self, mock_reader):
         items = [
             OCRResult(
-                Quad([[0, 0], [1, 0], [1, 1], [0, 1]]), "Hello World", 0.9
+                Quad([[0, 0], [1, 0], [1, 1], [0, 1]]), "Hello World", 90
             ),
         ]
         result = RapidOCRReader.get_matches(
-            mock_reader, items, "Hello World", 0.8, 80, False
+            mock_reader, items, "Hello World", 80, 80, False
         )
 
         assert result == [
             {
                 "text": "Hello World",
                 "region": Region(0, 0, 1, 1),
-                "confidence": 100,
+                "confidence": 90,
+                "similarity": 100,
             }
         ]
 
     def test_get_matches_partial(self, mock_reader):
         items = [
-            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Hello World", 0.9),
+            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Hello World", 90),
         ]
         result = RapidOCRReader.get_matches(
-            mock_reader, items, "Hello", 0.8, 80, True
+            mock_reader, items, "Hello", 80, 80, True
         )
 
         assert result == [
             {
                 "text": "Hello World",
                 "region": Region(0, 0, 1, 1),
-                "confidence": 100,
+                "confidence": 90,
+                "similarity": 100,
             }
         ]
 
     def test_get_matches_no_matches(self, mock_reader):
         items = [
-            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Hello World", 0.9),
+            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Hello World", 90),
         ]
         result = RapidOCRReader.get_matches(
-            mock_reader, items, "Hello", 0.8, 90, False
+            mock_reader, items, "Hello", 80, 90, False
         )
 
         assert result == []
@@ -158,18 +173,19 @@ def test_get_matches_no_matches(self, mock_reader):
     def test_substring_match(self, mock_reader, input_text, result_text):
         "Substrings match 100% to longer results"
         items = [
-            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Trash", 0.9),
-            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Move to Trash", 0.9),
-            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Move to ...", 0.9),
+            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Trash", 90),
+            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Move to Trash", 90),
+            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Move to ...", 90),
         ]
         result = RapidOCRReader.get_matches(
-            mock_reader, items, input_text, 0.8, 80, True
+            mock_reader, items, input_text, 80, 80, True
         )
         for text in result_text:
             assert {
                 "text": text,
                 "region": Region(0, 0, 1, 1),
-                "confidence": 100,
+                "confidence": 90,
+                "similarity": 100,
             } in result
 
     @pytest.mark.parametrize(
@@ -184,37 +200,38 @@ def test_asimetric_match(self, mock_reader, input_text, result_text):
         - "Move to Trash"  does not match     "Trash"        .
         """
         items = [
-            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Trash", 0.9),
-            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Move to Trash", 0.9),
-            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Move to ...", 0.9),
+            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Trash", 90),
+            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Move to Trash", 90),
+            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Move to ...", 90),
         ]
         result = RapidOCRReader.get_matches(
-            mock_reader, items, input_text, 0.8, 90, True
+            mock_reader, items, input_text, 90, 80, True
         )
         assert len(result) == 1
         assert result == [
             {
                 "text": result_text,
                 "region": Region(0, 0, 1, 1),
-                "confidence": 100,
+                "similarity": 100,
+                "confidence": 90,
             }
         ]
 
     def test_asimetric_long_match(self, mock_reader):
         items = [
             OCRResult(
-                [[0, 0], [1, 0], [1, 1], [0, 1]], "Trash a set of files", 0.9
+                [[0, 0], [1, 0], [1, 1], [0, 1]], "Trash a set of files", 90
             ),
-            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Move to Trash", 0.9),
-            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "!", 0.9),
+            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "Move to Trash", 90),
+            OCRResult([[0, 0], [1, 0], [1, 1], [0, 1]], "!", 90),
             OCRResult(
                 [[0, 0], [1, 0], [1, 1], [0, 1]],
                 "Move to Downloads",
-                0.9,
+                90,
             ),
         ]
         result = RapidOCRReader.get_matches(
-            mock_reader, items, "Move to Trash!", 0.8, 80, True
+            mock_reader, items, "Move to Trash!", 80, 80, True
         )
 
         assert result[0]["text"] == "Move to Trash"