Skip to content

Commit

Permalink
Merge branch 'main' into feature/pass-measured-fps-when-handling-live…
Browse files Browse the repository at this point in the history
…-stream
  • Loading branch information
grzegorz-roboflow authored Nov 14, 2024
2 parents 1d0e796 + 1f235f3 commit fc11d3e
Show file tree
Hide file tree
Showing 62 changed files with 4,445 additions and 91 deletions.
26 changes: 14 additions & 12 deletions docker/dockerfiles/Dockerfile.onnx.jetson.5.1.1.stream_manager
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ ENV LANG en_US.UTF-8
RUN apt-get update -y && apt-get install -y \
lshw \
git \
python3.9 \
python3.9-dev \
python3-pip \
python3-matplotlib \
gfortran \
Expand All @@ -24,31 +26,31 @@ COPY requirements/requirements.clip.txt \
requirements/_requirements.txt \
./

RUN pip3 install --ignore-installed PyYAML && rm -rf ~/.cache/pip
RUN python3.9 -m pip install --ignore-installed PyYAML && rm -rf ~/.cache/pip

# We needed to take statically compiled library for last known stable build and put it into hosting
# That was due to faulty builds started 26.06.2024, probably due to release of new version
# of pybind11, which gets automatically pulled while build of zxing_cpp library making
# cmake to fail
RUN wget https://storage.googleapis.com/roboflow-tests-assets/zxing_cpp_library_compiled_for_inference_v0.12.1_python_3.8.tar.gz \
&& tar -xvzf zxing_cpp_library_compiled_for_inference_v0.12.1_python_3.8.tar.gz \
&& mv zxing_cpp-2.2.0.dist-info /usr/local/lib/python3.8/dist-packages/zxing_cpp-2.2.0.dist-info \
&& mv zxingcpp.cpython-38-aarch64-linux-gnu.so /usr/local/lib/python3.8/dist-packages/ \
&& rm zxing_cpp_library_compiled_for_inference_v0.12.1_python_3.8.tar.gz
RUN wget https://storage.googleapis.com/roboflow-tests-assets/zxing_cpp_library_compiled_for_inference_v0.12.1.tar.gz \
&& tar -xvzf zxing_cpp_library_compiled_for_inference_v0.12.1.tar.gz \
&& mv zxing_cpp-2.2.0.dist-info /usr/local/lib/python3.9/dist-packages/zxing_cpp-2.2.0.dist-info \
&& mv zxingcpp.cpython-39-aarch64-linux-gnu.so /usr/local/lib/python3.9/dist-packages/ \
&& rm zxing_cpp_library_compiled_for_inference_v0.12.1.tar.gz

RUN pip3 install --upgrade pip && pip3 install \
RUN python3.9 -m pip install --upgrade pip && python3.9 -m pip install \
-r _requirements.txt \
-r requirements.clip.txt \
-r requirements.http.txt \
"setuptools<=75.5.0" \
--upgrade \
&& rm -rf ~/.cache/pip

RUN pip3 uninstall --yes onnxruntime
RUN wget https://nvidia.box.com/shared/static/iizg3ggrtdkqawkmebbfixo7sce6j365.whl -O onnxruntime_gpu-1.16.0-cp38-cp38-linux_aarch64.whl
RUN pip3 install onnxruntime_gpu-1.16.0-cp38-cp38-linux_aarch64.whl "opencv-python-headless<4.3" \
RUN python3.9 -m pip uninstall --yes onnxruntime
RUN wget https://nvidia.box.com/shared/static/67zek28z497hs9aev7xg2c1wngdeyv4h.whl -O onnxruntime_gpu-1.16.0-cp39-cp39-linux_aarch64.whl
RUN python3.9 -m pip install onnxruntime_gpu-1.16.0-cp39-cp39-linux_aarch64.whl "opencv-python-headless>4" \
&& rm -rf ~/.cache/pip \
&& rm onnxruntime_gpu-1.16.0-cp38-cp38-linux_aarch64.whl
&& rm onnxruntime_gpu-1.16.0-cp39-cp39-linux_aarch64.whl

WORKDIR /app/
COPY inference inference
Expand All @@ -66,4 +68,4 @@ ENV WORKFLOWS_STEP_EXECUTION_MODE=local
ENV WORKFLOWS_MAX_CONCURRENT_STEPS=1
ENV SUPERVISON_DEPRECATION_WARNING=0

ENTRYPOINT ["python3", "-m", "inference.enterprise.stream_management.manager.app"]
ENTRYPOINT ["python3.9", "-m", "inference.enterprise.stream_management.manager.app"]
267 changes: 267 additions & 0 deletions inference/core/workflows/core_steps/formatters/vlm_as_classifier/v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
import json
import logging
import re
from typing import Dict, List, Literal, Optional, Tuple, Type, Union
from uuid import uuid4

from pydantic import ConfigDict, Field

from inference.core.workflows.execution_engine.entities.base import (
OutputDefinition,
WorkflowImageData,
)
from inference.core.workflows.execution_engine.entities.types import (
BOOLEAN_KIND,
CLASSIFICATION_PREDICTION_KIND,
IMAGE_KIND,
INFERENCE_ID_KIND,
LANGUAGE_MODEL_OUTPUT_KIND,
LIST_OF_VALUES_KIND,
Selector,
)
from inference.core.workflows.prototypes.block import (
BlockResult,
WorkflowBlock,
WorkflowBlockManifest,
)

JSON_MARKDOWN_BLOCK_PATTERN = re.compile(r"```json([\s\S]*?)```", flags=re.IGNORECASE)

LONG_DESCRIPTION = """
The block expects string input that would be produced by blocks exposing Large Language Models (LLMs) and
Visual Language Models (VLMs). Input is parsed to classification prediction and returned as block output.
Accepted formats:
- valid JSON strings
- JSON documents wrapped with Markdown tags (very common for GPT responses)
Example:
```
{"my": "json"}
```
**Details regarding block behavior:**
- `error_status` is set `True` whenever parsing cannot be completed
- in case of multiple markdown blocks with raw JSON content - only first will be parsed
"""

SHORT_DESCRIPTION = "Parses raw string into classification prediction."


class BlockManifest(WorkflowBlockManifest):
model_config = ConfigDict(
json_schema_extra={
"name": "VLM as Classifier",
"version": "v2",
"short_description": SHORT_DESCRIPTION,
"long_description": LONG_DESCRIPTION,
"license": "Apache-2.0",
"block_type": "formatter",
}
)
type: Literal["roboflow_core/vlm_as_classifier@v2"]
image: Selector(kind=[IMAGE_KIND]) = Field(
description="The image which was the base to generate VLM prediction",
examples=["$inputs.image", "$steps.cropping.crops"],
)
vlm_output: Selector(kind=[LANGUAGE_MODEL_OUTPUT_KIND]) = Field(
title="VLM Output",
description="The string with raw classification prediction to parse.",
examples=[["$steps.lmm.output"]],
)
classes: Union[
Selector(kind=[LIST_OF_VALUES_KIND]),
Selector(kind=[LIST_OF_VALUES_KIND]),
List[str],
] = Field(
description="List of all classes used by the model, required to "
"generate mapping between class name and class id.",
examples=[["$steps.lmm.classes", "$inputs.classes", ["class_a", "class_b"]]],
)

@classmethod
def describe_outputs(cls) -> List[OutputDefinition]:
return [
OutputDefinition(name="error_status", kind=[BOOLEAN_KIND]),
OutputDefinition(name="predictions", kind=[CLASSIFICATION_PREDICTION_KIND]),
OutputDefinition(name="inference_id", kind=[INFERENCE_ID_KIND]),
]

@classmethod
def get_execution_engine_compatibility(cls) -> Optional[str]:
return ">=1.3.0,<2.0.0"


class VLMAsClassifierBlockV2(WorkflowBlock):

@classmethod
def get_manifest(cls) -> Type[WorkflowBlockManifest]:
return BlockManifest

def run(
self,
image: WorkflowImageData,
vlm_output: str,
classes: List[str],
) -> BlockResult:
inference_id = f"{uuid4()}"
error_status, parsed_data = string2json(
raw_json=vlm_output,
)
if error_status:
return {
"error_status": True,
"predictions": None,
"inference_id": inference_id,
}
if "class_name" in parsed_data and "confidence" in parsed_data:
return parse_multi_class_classification_results(
image=image,
results=parsed_data,
classes=classes,
inference_id=inference_id,
)
if "predicted_classes" in parsed_data:
return parse_multi_label_classification_results(
image=image,
results=parsed_data,
classes=classes,
inference_id=inference_id,
)
return {
"error_status": True,
"predictions": None,
"inference_id": inference_id,
}


def string2json(
raw_json: str,
) -> Tuple[bool, dict]:
json_blocks_found = JSON_MARKDOWN_BLOCK_PATTERN.findall(raw_json)
if len(json_blocks_found) == 0:
return try_parse_json(raw_json)
first_block = json_blocks_found[0]
return try_parse_json(first_block)


def try_parse_json(content: str) -> Tuple[bool, dict]:
try:
return False, json.loads(content)
except Exception as error:
logging.warning(
f"Could not parse JSON to dict in `roboflow_core/vlm_as_classifier@v1` block. "
f"Error type: {error.__class__.__name__}. Details: {error}"
)
return True, {}


def parse_multi_class_classification_results(
image: WorkflowImageData,
results: dict,
classes: List[str],
inference_id: str,
) -> dict:
try:
class2id_mapping = create_classes_index(classes=classes)
height, width = image.numpy_image.shape[:2]
top_class = results["class_name"]
confidences = {top_class: scale_confidence(results["confidence"])}
predictions = []
if top_class not in class2id_mapping:
predictions.append(
{
"class": top_class,
"class_id": -1,
"confidence": confidences.get(top_class, 0.0),
}
)
for class_name, class_id in class2id_mapping.items():
predictions.append(
{
"class": class_name,
"class_id": class_id,
"confidence": confidences.get(class_name, 0.0),
}
)
parsed_prediction = {
"image": {"width": width, "height": height},
"predictions": predictions,
"top": top_class,
"confidence": confidences[top_class],
"inference_id": inference_id,
"parent_id": image.parent_metadata.parent_id,
}
return {
"error_status": False,
"predictions": parsed_prediction,
"inference_id": inference_id,
}
except Exception as error:
logging.warning(
f"Could not parse multi-class classification results in `roboflow_core/vlm_as_classifier@v1` block. "
f"Error type: {error.__class__.__name__}. Details: {error}"
)
return {"error_status": True, "predictions": None, "inference_id": inference_id}


def parse_multi_label_classification_results(
image: WorkflowImageData,
results: dict,
classes: List[str],
inference_id: str,
) -> dict:
try:
class2id_mapping = create_classes_index(classes=classes)
height, width = image.numpy_image.shape[:2]
predicted_classes_confidences = {}
for prediction in results["predicted_classes"]:
if prediction["class"] not in class2id_mapping:
class2id_mapping[prediction["class"]] = -1
if prediction["class"] in predicted_classes_confidences:
old_confidence = predicted_classes_confidences[prediction["class"]]
new_confidence = scale_confidence(value=prediction["confidence"])
predicted_classes_confidences[prediction["class"]] = max(
old_confidence, new_confidence
)
else:
predicted_classes_confidences[prediction["class"]] = scale_confidence(
value=prediction["confidence"]
)
predictions = {
class_name: {
"confidence": predicted_classes_confidences.get(class_name, 0.0),
"class_id": class_id,
}
for class_name, class_id in class2id_mapping.items()
}
parsed_prediction = {
"image": {"width": width, "height": height},
"predictions": predictions,
"predicted_classes": list(predicted_classes_confidences.keys()),
"inference_id": inference_id,
"parent_id": image.parent_metadata.parent_id,
}
return {
"error_status": False,
"predictions": parsed_prediction,
"inference_id": inference_id,
}
except Exception as error:
logging.warning(
f"Could not parse multi-label classification results in `roboflow_core/vlm_as_classifier@v1` block. "
f"Error type: {error.__class__.__name__}. Details: {error}"
)
return {"error_status": True, "predictions": None, "inference_id": inference_id}


def create_classes_index(classes: List[str]) -> Dict[str, int]:
return {class_name: idx for idx, class_name in enumerate(classes)}


def scale_confidence(value: float) -> float:
return min(max(float(value), 0.0), 1.0)
Loading

0 comments on commit fc11d3e

Please sign in to comment.