Skip to content

Commit

Permalink
Merge pull request #718 from roboflow/fix/issue_with_crop_plus_openai…
Browse files Browse the repository at this point in the history
…_block

Fix the problem with VLMs on batch inference
  • Loading branch information
PawelPeczek-Roboflow authored Oct 4, 2024
2 parents 74b52a9 + 8714e1d commit bce522e
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -306,13 +306,13 @@ def run_claude_prompting(
base64_image = base64.b64encode(
encode_image_to_jpeg_bytes(loaded_image)
).decode("ascii")
prompt = PROMPT_BUILDERS[task_type](
generated_prompt = PROMPT_BUILDERS[task_type](
base64_image=base64_image,
prompt=prompt,
output_structure=output_structure,
classes=classes,
)
prompts.append(prompt)
prompts.append(generated_prompt)
return execute_claude_requests(
api_key=api_key,
prompts=prompts,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,15 +303,15 @@ def run_gemini_prompting(
base64_image = base64.b64encode(
encode_image_to_jpeg_bytes(loaded_image)
).decode("ascii")
prompt = PROMPT_BUILDERS[task_type](
generated_prompt = PROMPT_BUILDERS[task_type](
base64_image=base64_image,
prompt=prompt,
output_structure=output_structure,
classes=classes,
temperature=temperature,
max_tokens=max_tokens,
)
gemini_prompts.append(prompt)
gemini_prompts.append(generated_prompt)
return execute_gemini_requests(
google_api_key=google_api_key,
gemini_prompts=gemini_prompts,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,14 +301,14 @@ def run_gpt_4v_llm_prompting(
base64_image = base64.b64encode(
encode_image_to_jpeg_bytes(loaded_image)
).decode("ascii")
prompt = PROMPT_BUILDERS[task_type](
generated_prompt = PROMPT_BUILDERS[task_type](
base64_image=base64_image,
prompt=prompt,
output_structure=output_structure,
classes=classes,
gpt_image_detail=gpt_image_detail,
)
gpt4_prompts.append(prompt)
gpt4_prompts.append(generated_prompt)
return execute_gpt_4v_requests(
openai_api_key=openai_api_key,
gpt4_prompts=gpt4_prompts,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
def test_workflow_with_unconstrained_prompt(
model_manager: ModelManager,
dogs_image: np.ndarray,
license_plate_image: np.ndarray,
) -> None:
# given
workflow_init_parameters = {
Expand All @@ -74,18 +75,22 @@ def test_workflow_with_unconstrained_prompt(
# when
result = execution_engine.run(
runtime_parameters={
"image": [dogs_image],
"image": [dogs_image, license_plate_image],
"api_key": ANTHROPIC_API_KEY,
"prompt": "What is the topic of the image?",
}
)

# then
assert len(result) == 1, "Single image given, expected single output"
assert len(result) == 2, "Single image given, expected single output"
assert set(result[0].keys()) == {"result"}, "Expected all outputs to be delivered"
assert set(result[1].keys()) == {"result"}, "Expected all outputs to be delivered"
assert (
isinstance(result[0]["result"], str) and len(result[0]["result"]) > 0
), "Expected non-empty string generated"
assert (
isinstance(result[1]["result"], str) and len(result[1]["result"]) > 0
), "Expected non-empty string generated"


OCR_WORKFLOW = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
def test_workflow_with_unconstrained_prompt(
model_manager: ModelManager,
dogs_image: np.ndarray,
license_plate_image: np.ndarray,
) -> None:
# given
workflow_init_parameters = {
Expand All @@ -74,18 +75,22 @@ def test_workflow_with_unconstrained_prompt(
# when
result = execution_engine.run(
runtime_parameters={
"image": [dogs_image],
"image": [dogs_image, license_plate_image],
"api_key": GOOGLE_API_KEY,
"prompt": "What is the topic of the image?",
}
)

# then
assert len(result) == 1, "Single image given, expected single output"
assert len(result) == 2, "Single image given, expected single output"
assert set(result[0].keys()) == {"result"}, "Expected all outputs to be delivered"
assert set(result[1].keys()) == {"result"}, "Expected all outputs to be delivered"
assert (
isinstance(result[0]["result"], str) and len(result[0]["result"]) > 0
), "Expected non-empty string generated"
assert (
isinstance(result[1]["result"], str) and len(result[1]["result"]) > 0
), "Expected non-empty string generated"


OCR_WORKFLOW = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
def test_workflow_with_unconstrained_prompt(
model_manager: ModelManager,
dogs_image: np.ndarray,
license_plate_image: np.ndarray,
) -> None:
# given
workflow_init_parameters = {
Expand All @@ -75,18 +76,22 @@ def test_workflow_with_unconstrained_prompt(
# when
result = execution_engine.run(
runtime_parameters={
"image": [dogs_image],
"image": [dogs_image, license_plate_image],
"api_key": OPEN_AI_API_KEY,
"prompt": "What is the topic of the image?",
}
)

# then
assert len(result) == 1, "Single image given, expected single output"
assert len(result) == 2, "Single image given, expected single output"
assert set(result[0].keys()) == {"result"}, "Expected all outputs to be delivered"
assert set(result[1].keys()) == {"result"}, "Expected all outputs to be delivered"
assert (
isinstance(result[0]["result"], str) and len(result[0]["result"]) > 0
), "Expected non-empty string generated"
assert (
isinstance(result[1]["result"], str) and len(result[1]["result"]) > 0
), "Expected non-empty string generated"


OCR_WORKFLOW = {
Expand Down

0 comments on commit bce522e

Please sign in to comment.