Skip to content

Commit

Permalink
Merge pull request #715 from roboflow/features/tests_for_google_visio…
Browse files Browse the repository at this point in the history
…n_ocr

Add tests for Google Vision OCR
  • Loading branch information
PawelPeczek-Roboflow authored Oct 3, 2024
2 parents 9b65794 + ad8ae8c commit 74b52a9
Show file tree
Hide file tree
Showing 11 changed files with 300 additions and 59 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/hosted_inference_e2e_test_production.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ jobs:
python -m pip install -r requirements/requirements.test.unit.txt -r requirements/requirements.test.integration.txt -r requirements/requirements.sdk.http.txt
- name: 📝 E2E test of HOSTED INFERENCE at 🚨 PRODUCTION 🚨 🔥🔥🔥🔥
run:
SKIP_WARMUP=${{ github.event.inputs.skip_warmup }} HOSTED_PLATFORM_TESTS_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} HOSTED_PLATFORM_TESTS_PROJECT=roboflow-platform OPENAI_KEY=${{ secrets.OPEN_AI_API_KEY }} GOOGLE_API_KEY=${{ secrets.GEMINI_API_KEY }} ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }} pytest tests/inference/hosted_platform_tests/
SKIP_WARMUP=${{ github.event.inputs.skip_warmup }} HOSTED_PLATFORM_TESTS_API_KEY=${{ secrets.LOAD_TEST_PRODUCTION_API_KEY }} HOSTED_PLATFORM_TESTS_PROJECT=roboflow-platform OPENAI_KEY=${{ secrets.OPEN_AI_API_KEY }} GOOGLE_API_KEY=${{ secrets.GEMINI_API_KEY }} ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }} GOOGLE_VISION_API_KEY=${{ secrets.GOOGLE_VISION_API_KEY }} pytest tests/inference/hosted_platform_tests/
2 changes: 1 addition & 1 deletion .github/workflows/hosted_inference_e2e_test_staging.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ jobs:
python -m pip install -r requirements/requirements.test.unit.txt -r requirements/requirements.test.integration.txt -r requirements/requirements.sdk.http.txt
- name: 📝 E2E test of HOSTED INFERENCE at 😎 STAGING 😎 🔥🔥🔥🔥
run:
SKIP_WARMUP=${{ github.event.inputs.skip_warmup }} HOSTED_PLATFORM_TESTS_API_KEY=${{ secrets.LOAD_TEST_STAGING_API_KEY }} HOSTED_PLATFORM_TESTS_PROJECT=roboflow-staging OPENAI_KEY=${{ secrets.OPEN_AI_API_KEY }} GOOGLE_API_KEY=${{ secrets.GEMINI_API_KEY }} ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }} pytest tests/inference/hosted_platform_tests/
SKIP_WARMUP=${{ github.event.inputs.skip_warmup }} HOSTED_PLATFORM_TESTS_API_KEY=${{ secrets.LOAD_TEST_STAGING_API_KEY }} HOSTED_PLATFORM_TESTS_PROJECT=roboflow-staging OPENAI_KEY=${{ secrets.OPEN_AI_API_KEY }} GOOGLE_API_KEY=${{ secrets.GEMINI_API_KEY }} ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }} GOOGLE_VISION_API_KEY=${{ secrets.GOOGLE_VISION_API_KEY }} pytest tests/inference/hosted_platform_tests/
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@ class BlockManifest(WorkflowBlockManifest):
json_schema_extra={
"values_metadata": {
"text_detection": {
"name": "Text Detection",
"name": "Any Scene Text Detection",
"description": "Detects and extracts text from any image, including photographs that contain blocks of text.",
},
"ocr_text_detection": {
"name": "OCR Text Detection",
"name": "Document Text Detection",
"description": "Optimized for dense text documents, such as scanned pages or photographs of printed text.",
},
},
Expand Down
1 change: 1 addition & 0 deletions tests/inference/hosted_platform_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class PlatformEnvironment(Enum):
OPENAI_KEY = os.getenv("OPENAI_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
GOOGLE_VISION_API_KEY = os.getenv("GOOGLE_VISION_API_KEY")


@pytest.fixture(scope="session")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import numpy as np
import pytest

from inference_sdk import InferenceHTTPClient
from tests.inference.hosted_platform_tests.conftest import (
GOOGLE_VISION_API_KEY,
ROBOFLOW_API_KEY,
)

GOOGLE_VISION_OCR_WORKFLOW = {
"version": "1.0",
"inputs": [
{"type": "WorkflowImage", "name": "image"},
{"type": "WorkflowParameter", "name": "api_key"},
],
"steps": [
{
"type": "roboflow_core/google_vision_ocr@v1",
"name": "google_vision_ocr",
"image": "$inputs.image",
"ocr_type": "text_detection",
"api_key": "$inputs.api_key",
},
{
"type": "roboflow_core/bounding_box_visualization@v1",
"name": "bounding_box_visualization",
"predictions": "$steps.google_vision_ocr.predictions",
"image": "$inputs.image",
},
{
"type": "roboflow_core/label_visualization@v1",
"name": "label_visualization",
"predictions": "$steps.google_vision_ocr.predictions",
"image": "$steps.bounding_box_visualization.image",
},
],
"outputs": [
{
"type": "JsonField",
"name": "extracted_text",
"selector": "$steps.google_vision_ocr.text",
},
{
"type": "JsonField",
"name": "text_detections",
"selector": "$steps.google_vision_ocr.predictions",
},
{
"type": "JsonField",
"name": "text_visualised",
"selector": "$steps.label_visualization.image",
},
],
}


@pytest.mark.skipif(GOOGLE_VISION_API_KEY is None, reason="No OpenAI API key provided")
@pytest.mark.flaky(retries=4, delay=1)
def test_workflow_with_google_api_ocr(
object_detection_service_url: str,
license_plate_image: str,
) -> None:
client = InferenceHTTPClient(
api_url=object_detection_service_url,
api_key=ROBOFLOW_API_KEY,
)

# when
result = client.run_workflow(
specification=GOOGLE_VISION_OCR_WORKFLOW,
images={
"image": license_plate_image,
},
parameters={
"api_key": GOOGLE_VISION_API_KEY,
},
)

# then
assert len(result) == 1, "Single image given, expected single output"
assert set(result[0].keys()) == {
"extracted_text",
"text_visualised",
"text_detections",
}, "Expected all outputs to be delivered"
assert len(result[0]["extracted_text"]) > 0, "Expected text to be extracted"
assert (
len(result[0]["text_detections"]) == 4
), "Expected 4 text regions to be detected"
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@


@add_to_workflows_gallery(
category="Workflows with multiple models",
use_case_title="Workflow detection models and OCR",
category="Workflows for OCR",
use_case_title="Workflow with DocTR model",
use_case_description="""
This example showcases quite sophisticated workflows usage scenario that assume the following:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import os

import numpy as np
import pytest

from inference.core.env import WORKFLOWS_MAX_CONCURRENT_STEPS
from inference.core.managers.base import ModelManager
from inference.core.workflows.core_steps.common.entities import StepExecutionMode
from inference.core.workflows.execution_engine.core import ExecutionEngine
from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import (
add_to_workflows_gallery,
)

GOOGLE_VISION_API_KEY = os.getenv("WORKFLOWS_TEST_GOOGLE_VISION_API_KEY")

GOOGLE_VISION_OCR_WORKFLOW = {
"version": "1.0",
"inputs": [
{"type": "WorkflowImage", "name": "image"},
{"type": "WorkflowParameter", "name": "api_key"},
],
"steps": [
{
"type": "roboflow_core/google_vision_ocr@v1",
"name": "google_vision_ocr",
"image": "$inputs.image",
"ocr_type": "text_detection",
"api_key": "$inputs.api_key",
},
{
"type": "roboflow_core/bounding_box_visualization@v1",
"name": "bounding_box_visualization",
"predictions": "$steps.google_vision_ocr.predictions",
"image": "$inputs.image",
},
{
"type": "roboflow_core/label_visualization@v1",
"name": "label_visualization",
"predictions": "$steps.google_vision_ocr.predictions",
"image": "$steps.bounding_box_visualization.image",
},
],
"outputs": [
{
"type": "JsonField",
"name": "extracted_text",
"selector": "$steps.google_vision_ocr.text",
},
{
"type": "JsonField",
"name": "text_detections",
"selector": "$steps.google_vision_ocr.predictions",
},
{
"type": "JsonField",
"name": "text_visualised",
"selector": "$steps.label_visualization.image",
},
],
}


@add_to_workflows_gallery(
category="Workflows for OCR",
use_case_title="Google Vision OCR",
use_case_description="""
In this example, Google Vision OCR is used to extract text from input image.
Additionally, example presents how to combine structured output of Google API
with visualisation blocks.
""",
workflow_definition=GOOGLE_VISION_OCR_WORKFLOW,
workflow_name_in_app="google-vision-ocr",
)
@pytest.mark.skipif(
condition=GOOGLE_VISION_API_KEY is None, reason="Google API key not provided"
)
def test_workflow_with_google_ocr_when_text_should_be_detected(
model_manager: ModelManager,
license_plate_image: np.ndarray,
) -> None:
# given
workflow_init_parameters = {
"workflows_core.model_manager": model_manager,
"workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
}
execution_engine = ExecutionEngine.init(
workflow_definition=GOOGLE_VISION_OCR_WORKFLOW,
init_parameters=workflow_init_parameters,
max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
)

# when
result = execution_engine.run(
runtime_parameters={
"image": [license_plate_image],
"api_key": GOOGLE_VISION_API_KEY,
}
)

# then
assert len(result) == 1, "Single image given, expected single output"
assert set(result[0].keys()) == {
"extracted_text",
"text_visualised",
"text_detections",
}, "Expected all outputs to be delivered"
assert (
result[0]["extracted_text"] == "2398027\n2398023\nKn\n239+8072"
), "Extracted text should match reference"
assert not np.allclose(
license_plate_image, result[0]["text_visualised"].numpy_image
), "Expected that visualisation will change the output image"
assert (
len(result[0]["text_detections"]) == 4
), "Expected 4 text regions to be detected"


@pytest.mark.skipif(
condition=GOOGLE_VISION_API_KEY is None, reason="Google API key not provided"
)
def test_workflow_with_google_ocr_when_no_text_should_be_detected(
model_manager: ModelManager,
dogs_image: np.ndarray,
) -> None:
# given
workflow_init_parameters = {
"workflows_core.model_manager": model_manager,
"workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
}
execution_engine = ExecutionEngine.init(
workflow_definition=GOOGLE_VISION_OCR_WORKFLOW,
init_parameters=workflow_init_parameters,
max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
)

# when
result = execution_engine.run(
runtime_parameters={
"image": [dogs_image],
"api_key": GOOGLE_VISION_API_KEY,
}
)

# then
assert len(result) == 1, "Single image given, expected single output"
assert set(result[0].keys()) == {
"extracted_text",
"text_visualised",
"text_detections",
}, "Expected all outputs to be delivered"
assert result[0]["extracted_text"] == ""
assert np.allclose(
dogs_image, result[0]["text_visualised"].numpy_image
), "Expected that visualisation will not change the output image"
assert len(result[0]["text_detections"]) == 0, "Expected 0 text regions detected"
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from inference.core.env import WORKFLOWS_MAX_CONCURRENT_STEPS
from inference.core.managers.base import ModelManager
from inference.core.workflows.core_steps.common.entities import StepExecutionMode
from inference.core.workflows.core_steps.transformations.stitch_images.v1 import OUTPUT_KEY
from inference.core.workflows.core_steps.transformations.stitch_images.v1 import (
OUTPUT_KEY,
)
from inference.core.workflows.execution_engine.core import ExecutionEngine
from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import (
add_to_workflows_gallery,
Expand All @@ -14,7 +16,10 @@
"inputs": [
{"type": "InferenceImage", "name": "image1"},
{"type": "InferenceImage", "name": "image2"},
{"type": "InferenceParameter", "name": "count_of_best_matches_per_query_descriptor"},
{
"type": "InferenceParameter",
"name": "count_of_best_matches_per_query_descriptor",
},
{"type": "InferenceParameter", "name": "max_allowed_reprojection_error"},
],
"steps": [
Expand Down Expand Up @@ -77,6 +82,8 @@ def test_workflow_with_classical_pattern_matching(
assert set(result[0].keys()) == {
"stitched_image",
}, "Expected all declared outputs to be delivered"
assert (
result[0]["stitched_image"].numpy_image.shape == (2918, 2034, 3)
assert result[0]["stitched_image"].numpy_image.shape == (
2918,
2034,
3,
), "Expected result image shape must match (2918, 2034, 3)"
Loading

0 comments on commit 74b52a9

Please sign in to comment.