-
Notifications
You must be signed in to change notification settings - Fork 126
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #715 from roboflow/features/tests_for_google_visio…
…n_ocr Add tests for Google Vision OCR
- Loading branch information
Showing
11 changed files
with
300 additions
and
59 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
89 changes: 89 additions & 0 deletions
89
tests/inference/hosted_platform_tests/workflows_examples/test_workflow_with_google_ocr.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import numpy as np | ||
import pytest | ||
|
||
from inference_sdk import InferenceHTTPClient | ||
from tests.inference.hosted_platform_tests.conftest import ( | ||
GOOGLE_VISION_API_KEY, | ||
ROBOFLOW_API_KEY, | ||
) | ||
|
||
GOOGLE_VISION_OCR_WORKFLOW = { | ||
"version": "1.0", | ||
"inputs": [ | ||
{"type": "WorkflowImage", "name": "image"}, | ||
{"type": "WorkflowParameter", "name": "api_key"}, | ||
], | ||
"steps": [ | ||
{ | ||
"type": "roboflow_core/google_vision_ocr@v1", | ||
"name": "google_vision_ocr", | ||
"image": "$inputs.image", | ||
"ocr_type": "text_detection", | ||
"api_key": "$inputs.api_key", | ||
}, | ||
{ | ||
"type": "roboflow_core/bounding_box_visualization@v1", | ||
"name": "bounding_box_visualization", | ||
"predictions": "$steps.google_vision_ocr.predictions", | ||
"image": "$inputs.image", | ||
}, | ||
{ | ||
"type": "roboflow_core/label_visualization@v1", | ||
"name": "label_visualization", | ||
"predictions": "$steps.google_vision_ocr.predictions", | ||
"image": "$steps.bounding_box_visualization.image", | ||
}, | ||
], | ||
"outputs": [ | ||
{ | ||
"type": "JsonField", | ||
"name": "extracted_text", | ||
"selector": "$steps.google_vision_ocr.text", | ||
}, | ||
{ | ||
"type": "JsonField", | ||
"name": "text_detections", | ||
"selector": "$steps.google_vision_ocr.predictions", | ||
}, | ||
{ | ||
"type": "JsonField", | ||
"name": "text_visualised", | ||
"selector": "$steps.label_visualization.image", | ||
}, | ||
], | ||
} | ||
|
||
|
||
@pytest.mark.skipif(GOOGLE_VISION_API_KEY is None, reason="No OpenAI API key provided") | ||
@pytest.mark.flaky(retries=4, delay=1) | ||
def test_workflow_with_google_api_ocr( | ||
object_detection_service_url: str, | ||
license_plate_image: str, | ||
) -> None: | ||
client = InferenceHTTPClient( | ||
api_url=object_detection_service_url, | ||
api_key=ROBOFLOW_API_KEY, | ||
) | ||
|
||
# when | ||
result = client.run_workflow( | ||
specification=GOOGLE_VISION_OCR_WORKFLOW, | ||
images={ | ||
"image": license_plate_image, | ||
}, | ||
parameters={ | ||
"api_key": GOOGLE_VISION_API_KEY, | ||
}, | ||
) | ||
|
||
# then | ||
assert len(result) == 1, "Single image given, expected single output" | ||
assert set(result[0].keys()) == { | ||
"extracted_text", | ||
"text_visualised", | ||
"text_detections", | ||
}, "Expected all outputs to be delivered" | ||
assert len(result[0]["extracted_text"]) > 0, "Expected text to be extracted" | ||
assert ( | ||
len(result[0]["text_detections"]) == 4 | ||
), "Expected 4 text regions to be detected" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
155 changes: 155 additions & 0 deletions
155
tests/workflows/integration_tests/execution/test_workflow_with_google_vision_ocr.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
import os | ||
|
||
import numpy as np | ||
import pytest | ||
|
||
from inference.core.env import WORKFLOWS_MAX_CONCURRENT_STEPS | ||
from inference.core.managers.base import ModelManager | ||
from inference.core.workflows.core_steps.common.entities import StepExecutionMode | ||
from inference.core.workflows.execution_engine.core import ExecutionEngine | ||
from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import ( | ||
add_to_workflows_gallery, | ||
) | ||
|
||
GOOGLE_VISION_API_KEY = os.getenv("WORKFLOWS_TEST_GOOGLE_VISION_API_KEY") | ||
|
||
GOOGLE_VISION_OCR_WORKFLOW = { | ||
"version": "1.0", | ||
"inputs": [ | ||
{"type": "WorkflowImage", "name": "image"}, | ||
{"type": "WorkflowParameter", "name": "api_key"}, | ||
], | ||
"steps": [ | ||
{ | ||
"type": "roboflow_core/google_vision_ocr@v1", | ||
"name": "google_vision_ocr", | ||
"image": "$inputs.image", | ||
"ocr_type": "text_detection", | ||
"api_key": "$inputs.api_key", | ||
}, | ||
{ | ||
"type": "roboflow_core/bounding_box_visualization@v1", | ||
"name": "bounding_box_visualization", | ||
"predictions": "$steps.google_vision_ocr.predictions", | ||
"image": "$inputs.image", | ||
}, | ||
{ | ||
"type": "roboflow_core/label_visualization@v1", | ||
"name": "label_visualization", | ||
"predictions": "$steps.google_vision_ocr.predictions", | ||
"image": "$steps.bounding_box_visualization.image", | ||
}, | ||
], | ||
"outputs": [ | ||
{ | ||
"type": "JsonField", | ||
"name": "extracted_text", | ||
"selector": "$steps.google_vision_ocr.text", | ||
}, | ||
{ | ||
"type": "JsonField", | ||
"name": "text_detections", | ||
"selector": "$steps.google_vision_ocr.predictions", | ||
}, | ||
{ | ||
"type": "JsonField", | ||
"name": "text_visualised", | ||
"selector": "$steps.label_visualization.image", | ||
}, | ||
], | ||
} | ||
|
||
|
||
@add_to_workflows_gallery( | ||
category="Workflows for OCR", | ||
use_case_title="Google Vision OCR", | ||
use_case_description=""" | ||
In this example, Google Vision OCR is used to extract text from input image. | ||
Additionally, example presents how to combine structured output of Google API | ||
with visualisation blocks. | ||
""", | ||
workflow_definition=GOOGLE_VISION_OCR_WORKFLOW, | ||
workflow_name_in_app="google-vision-ocr", | ||
) | ||
@pytest.mark.skipif( | ||
condition=GOOGLE_VISION_API_KEY is None, reason="Google API key not provided" | ||
) | ||
def test_workflow_with_google_ocr_when_text_should_be_detected( | ||
model_manager: ModelManager, | ||
license_plate_image: np.ndarray, | ||
) -> None: | ||
# given | ||
workflow_init_parameters = { | ||
"workflows_core.model_manager": model_manager, | ||
"workflows_core.step_execution_mode": StepExecutionMode.LOCAL, | ||
} | ||
execution_engine = ExecutionEngine.init( | ||
workflow_definition=GOOGLE_VISION_OCR_WORKFLOW, | ||
init_parameters=workflow_init_parameters, | ||
max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, | ||
) | ||
|
||
# when | ||
result = execution_engine.run( | ||
runtime_parameters={ | ||
"image": [license_plate_image], | ||
"api_key": GOOGLE_VISION_API_KEY, | ||
} | ||
) | ||
|
||
# then | ||
assert len(result) == 1, "Single image given, expected single output" | ||
assert set(result[0].keys()) == { | ||
"extracted_text", | ||
"text_visualised", | ||
"text_detections", | ||
}, "Expected all outputs to be delivered" | ||
assert ( | ||
result[0]["extracted_text"] == "2398027\n2398023\nKn\n239+8072" | ||
), "Extracted text should match reference" | ||
assert not np.allclose( | ||
license_plate_image, result[0]["text_visualised"].numpy_image | ||
), "Expected that visualisation will change the output image" | ||
assert ( | ||
len(result[0]["text_detections"]) == 4 | ||
), "Expected 4 text regions to be detected" | ||
|
||
|
||
@pytest.mark.skipif( | ||
condition=GOOGLE_VISION_API_KEY is None, reason="Google API key not provided" | ||
) | ||
def test_workflow_with_google_ocr_when_no_text_should_be_detected( | ||
model_manager: ModelManager, | ||
dogs_image: np.ndarray, | ||
) -> None: | ||
# given | ||
workflow_init_parameters = { | ||
"workflows_core.model_manager": model_manager, | ||
"workflows_core.step_execution_mode": StepExecutionMode.LOCAL, | ||
} | ||
execution_engine = ExecutionEngine.init( | ||
workflow_definition=GOOGLE_VISION_OCR_WORKFLOW, | ||
init_parameters=workflow_init_parameters, | ||
max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS, | ||
) | ||
|
||
# when | ||
result = execution_engine.run( | ||
runtime_parameters={ | ||
"image": [dogs_image], | ||
"api_key": GOOGLE_VISION_API_KEY, | ||
} | ||
) | ||
|
||
# then | ||
assert len(result) == 1, "Single image given, expected single output" | ||
assert set(result[0].keys()) == { | ||
"extracted_text", | ||
"text_visualised", | ||
"text_detections", | ||
}, "Expected all outputs to be delivered" | ||
assert result[0]["extracted_text"] == "" | ||
assert np.allclose( | ||
dogs_image, result[0]["text_visualised"].numpy_image | ||
), "Expected that visualisation will not change the output image" | ||
assert len(result[0]["text_detections"]) == 0, "Expected 0 text regions detected" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.