From 8f5dd02472d24a7c84f730d8122fa3ae8b726954 Mon Sep 17 00:00:00 2001 From: Peter Robicheaux Date: Tue, 12 Nov 2024 01:04:40 +0000 Subject: [PATCH 1/6] Florence fts working --- .../core/workflows/core_steps/common/vlms.py | 9 ++++++ .../models/foundation/florence2/v1.py | 30 +++++++++++-------- inference/models/transformers/transformers.py | 4 +-- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/inference/core/workflows/core_steps/common/vlms.py b/inference/core/workflows/core_steps/common/vlms.py index c85727337..4252100b9 100644 --- a/inference/core/workflows/core_steps/common/vlms.py +++ b/inference/core/workflows/core_steps/common/vlms.py @@ -80,3 +80,12 @@ "description": "Model returns a JSON response with the specified fields", }, } + + +FLORENCE_TASKS_METADATA = { + "unstructured": { + "name": "Unstructured Prompt", + "description": "Use free-form prompt to generate a response. Useful with finetuned models.", + }, + **VLM_TASKS_METADATA, +} diff --git a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py index 930977a6a..72959a894 100644 --- a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py @@ -8,7 +8,7 @@ from inference.core.entities.requests.inference import LMMInferenceRequest from inference.core.managers.base import ModelManager from inference.core.workflows.core_steps.common.entities import StepExecutionMode -from inference.core.workflows.core_steps.common.vlms import VLM_TASKS_METADATA +from inference.core.workflows.core_steps.common.vlms import FLORENCE_TASKS_METADATA from inference.core.workflows.execution_engine.entities.base import ( Batch, OutputDefinition, @@ -21,6 +21,7 @@ LANGUAGE_MODEL_OUTPUT_KIND, LIST_OF_VALUES_KIND, OBJECT_DETECTION_PREDICTION_KIND, + ROBOFLOW_MODEL_ID_KIND, STRING_KIND, ImageInputField, StepOutputImageSelector, @@ -77,12 +78,13 @@ }, {"task_type": "detection-grounded-ocr", "florence_task": ""}, {"task_type": "region-proposal", "florence_task": ""}, + {"task_type": "unstructured", "florence_task": ""} ] TASK_TYPE_TO_FLORENCE_TASK = { task["task_type"]: task["florence_task"] for task in SUPPORTED_TASK_TYPES_LIST } RELEVANT_TASKS_METADATA = { - k: v for k, v in VLM_TASKS_METADATA.items() if k in TASK_TYPE_TO_FLORENCE_TASK + k: v for k, v in FLORENCE_TASKS_METADATA.items() if k in TASK_TYPE_TO_FLORENCE_TASK } RELEVANT_TASKS_DOCS_DESCRIPTION = "\n\n".join( f"* **{v['name']}** (`{k}`) - {v['description']}" @@ -127,6 +129,7 @@ TASKS_REQUIRING_PROMPT = { "phrase-grounded-object-detection", "phrase-grounded-instance-segmentation", + "unstructured", } TASKS_REQUIRING_CLASSES = { "open-vocabulary-object-detection", @@ -164,13 +167,11 @@ class BlockManifest(WorkflowBlockManifest): ) type: Literal["roboflow_core/florence_2@v1"] images: Union[WorkflowImageSelector, StepOutputImageSelector] = ImageInputField - model_version: Union[ - WorkflowParameterSelector(kind=[STRING_KIND]), - Literal["florence-2-base", "florence-2-large"], - ] = Field( + model_id: Union[WorkflowParameterSelector(kind=[ROBOFLOW_MODEL_ID_KIND]), str] = Field( default="florence-2-base", description="Model to be used", examples=["florence-2-base"], + json_schema_extra={"always_visible": True}, ) task_type: TaskType = Field( default="open-vocabulary-object-detection", @@ -317,7 +318,7 @@ def get_manifest(cls) -> Type[WorkflowBlockManifest]: def run( self, images: Batch[WorkflowImageData], - model_version: str, + model_id: str, task_type: TaskType, prompt: Optional[str], classes: Optional[List[str]], @@ -330,7 +331,7 @@ def run( return self.run_locally( images=images, task_type=task_type, - model_version=model_version, + model_id=model_id, prompt=prompt, classes=classes, grounding_detection=grounding_detection, @@ -348,7 +349,7 @@ def run( def run_locally( self, images: Batch[WorkflowImageData], - model_version: str, + model_id: str, task_type: TaskType, prompt: Optional[str], classes: Optional[List[str]], @@ -374,7 +375,7 @@ def run_locally( grounding_selection_mode=grounding_selection_mode, ) self._model_manager.add_model( - model_id=model_version, + model_id=model_id, api_key=self._api_key, ) predictions = [] @@ -387,15 +388,18 @@ def run_locally( continue request = LMMInferenceRequest( api_key=self._api_key, - model_id=model_version, + model_id=model_id, image=image, source="workflow-execution", prompt=task_type + (single_prompt or ""), ) prediction = self._model_manager.infer_from_request_sync( - model_id=model_version, request=request + model_id=model_id, request=request ) - prediction_data = prediction.response[task_type] + if task_type == "": + prediction_data = prediction.response[list(prediction.response.keys())[0]] + else: + prediction_data = prediction.response[task_type] if task_type in TASKS_TO_EXTRACT_LABELS_AS_CLASSES: classes = prediction_data.get("labels", []) predictions.append( diff --git a/inference/models/transformers/transformers.py b/inference/models/transformers/transformers.py index 8a1ed382b..3d19ecb36 100644 --- a/inference/models/transformers/transformers.py +++ b/inference/models/transformers/transformers.py @@ -126,10 +126,12 @@ def predict(self, image_in: Image.Image, prompt="", history=None, **kwargs): max_new_tokens=1000, do_sample=False, early_stopping=False, + no_repeat_ngram_size=0, ) generation = generation[0] if self.generation_includes_input: generation = generation[input_len:] + decoded = self.processor.decode( generation, skip_special_tokens=self.skip_special_tokens ) @@ -151,7 +153,6 @@ def get_infer_bucket_file_list(self) -> list: "config.json", "special_tokens_map.json", "generation_config.json", - "model.safetensors.index.json", "tokenizer.json", re.compile(r"model-\d{5}-of-\d{5}\.safetensors"), "preprocessor_config.json", @@ -286,7 +287,6 @@ def get_infer_bucket_file_list(self) -> list: "adapter_config.json", "special_tokens_map.json", "tokenizer.json", - "tokenizer.model", "adapter_model.safetensors", "preprocessor_config.json", "tokenizer_config.json", From c3a3dd8b43ea56f6d6f7e0f755d1bc1bf0ba7097 Mon Sep 17 00:00:00 2001 From: Peter Robicheaux Date: Tue, 12 Nov 2024 19:09:23 +0000 Subject: [PATCH 2/6] Inference tweaks --- inference/core/workflows/core_steps/loader.py | 4 ++ .../models/foundation/florence2/v1.py | 63 ++++++++++--------- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/inference/core/workflows/core_steps/loader.py b/inference/core/workflows/core_steps/loader.py index efc67723f..476f2d53b 100644 --- a/inference/core/workflows/core_steps/loader.py +++ b/inference/core/workflows/core_steps/loader.py @@ -121,6 +121,9 @@ from inference.core.workflows.core_steps.models.foundation.florence2.v1 import ( Florence2BlockV1, ) +from inference.core.workflows.core_steps.models.foundation.florence2.v2 import ( + Florence2BlockV2, +) from inference.core.workflows.core_steps.models.foundation.google_gemini.v1 import ( GoogleGeminiBlockV1, ) @@ -387,6 +390,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]: DotVisualizationBlockV1, EllipseVisualizationBlockV1, Florence2BlockV1, + Florence2BlockV2, GoogleGeminiBlockV1, GoogleVisionOCRBlockV1, HaloVisualizationBlockV1, diff --git a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py index 72959a894..ae2b9b18d 100644 --- a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py @@ -21,7 +21,6 @@ LANGUAGE_MODEL_OUTPUT_KIND, LIST_OF_VALUES_KIND, OBJECT_DETECTION_PREDICTION_KIND, - ROBOFLOW_MODEL_ID_KIND, STRING_KIND, ImageInputField, StepOutputImageSelector, @@ -78,7 +77,7 @@ }, {"task_type": "detection-grounded-ocr", "florence_task": ""}, {"task_type": "region-proposal", "florence_task": ""}, - {"task_type": "unstructured", "florence_task": ""} + {"task_type": "unstructured", "florence_task": ""}, ] TASK_TYPE_TO_FLORENCE_TASK = { task["task_type"]: task["florence_task"] for task in SUPPORTED_TASK_TYPES_LIST @@ -150,29 +149,8 @@ } -class BlockManifest(WorkflowBlockManifest): - model_config = ConfigDict( - json_schema_extra={ - "name": "Florence-2 Model", - "version": "v1", - "short_description": "Run Florence-2 on an image", - "long_description": LONG_DESCRIPTION, - "license": "Apache-2.0", - "block_type": "model", - "search_keywords": ["Florence", "Florence-2", "Microsoft"], - "is_vlm_block": True, - "task_type_property": "task_type", - }, - protected_namespaces=(), - ) - type: Literal["roboflow_core/florence_2@v1"] +class BaseManifest(WorkflowBlockManifest): images: Union[WorkflowImageSelector, StepOutputImageSelector] = ImageInputField - model_id: Union[WorkflowParameterSelector(kind=[ROBOFLOW_MODEL_ID_KIND]), str] = Field( - default="florence-2-base", - description="Model to be used", - examples=["florence-2-base"], - json_schema_extra={"always_visible": True}, - ) task_type: TaskType = Field( default="open-vocabulary-object-detection", description="Task type to be performed by model. " @@ -294,6 +272,31 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.0.0,<2.0.0" +class BlockManifest(BaseManifest): + type: Literal["roboflow_core/florence_2@v1"] + model_version: Union[ + WorkflowParameterSelector(kind=[STRING_KIND]), + Literal["florence-2-base", "florence-2-large"], + ] = Field( + default="florence-2-base", + description="Model to be used", + examples=["florence-2-base"], + ) + + model_config = ConfigDict( + json_schema_extra={ + "name": "Florence-2 Model", + "version": "v1", + "short_description": "Run Florence-2 on an image", + "long_description": LONG_DESCRIPTION, + "license": "Apache-2.0", + "block_type": "model", + "search_keywords": ["Florence", "Florence-2", "Microsoft"], + "is_vlm_block": True, + "task_type_property": "task_type", + }, + protected_namespaces=(), + ) class Florence2BlockV1(WorkflowBlock): @@ -318,7 +321,7 @@ def get_manifest(cls) -> Type[WorkflowBlockManifest]: def run( self, images: Batch[WorkflowImageData], - model_id: str, + model_version: str, task_type: TaskType, prompt: Optional[str], classes: Optional[List[str]], @@ -331,7 +334,7 @@ def run( return self.run_locally( images=images, task_type=task_type, - model_id=model_id, + model_version=model_version, prompt=prompt, classes=classes, grounding_detection=grounding_detection, @@ -349,7 +352,7 @@ def run( def run_locally( self, images: Batch[WorkflowImageData], - model_id: str, + model_version: str, task_type: TaskType, prompt: Optional[str], classes: Optional[List[str]], @@ -375,7 +378,7 @@ def run_locally( grounding_selection_mode=grounding_selection_mode, ) self._model_manager.add_model( - model_id=model_id, + model_id=model_version, api_key=self._api_key, ) predictions = [] @@ -388,13 +391,13 @@ def run_locally( continue request = LMMInferenceRequest( api_key=self._api_key, - model_id=model_id, + model_id=model_version, image=image, source="workflow-execution", prompt=task_type + (single_prompt or ""), ) prediction = self._model_manager.infer_from_request_sync( - model_id=model_id, request=request + model_id=model_version, request=request ) if task_type == "": prediction_data = prediction.response[list(prediction.response.keys())[0]] From ab24790c8c173e4dc056dc1c3ead03d582571d52 Mon Sep 17 00:00:00 2001 From: Peter Robicheaux Date: Tue, 12 Nov 2024 19:09:56 +0000 Subject: [PATCH 3/6] Add in replacement block --- .../models/foundation/florence2/v2.py | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 inference/core/workflows/core_steps/models/foundation/florence2/v2.py diff --git a/inference/core/workflows/core_steps/models/foundation/florence2/v2.py b/inference/core/workflows/core_steps/models/foundation/florence2/v2.py new file mode 100644 index 000000000..e1a9dc946 --- /dev/null +++ b/inference/core/workflows/core_steps/models/foundation/florence2/v2.py @@ -0,0 +1,78 @@ +from inference.core.workflows.core_steps.models.foundation.florence2.v1 import ( + BaseManifest, + Florence2BlockV1, + TaskType, + GroundingSelectionMode, + LONG_DESCRIPTION, +) +from typing import Type, Union, Optional, List, Literal + +from pydantic import ConfigDict, Field +import supervision as sv + +from inference.core.workflows.execution_engine.entities.types import ( + WorkflowParameterSelector, +) + +from inference.core.workflows.execution_engine.entities.types import ( + ROBOFLOW_MODEL_ID_KIND, +) +from inference.core.workflows.prototypes.block import BlockResult, WorkflowBlockManifest +from inference.core.workflows.execution_engine.entities.base import ( + Batch, + WorkflowImageData, +) + + +class V2BlockManifest(BaseManifest): + type: Literal["roboflow_core/florence_2@v2"] + model_id: Union[WorkflowParameterSelector(kind=[ROBOFLOW_MODEL_ID_KIND]), str] = ( + Field( + default="florence-2-base", + description="Model to be used", + examples=["florence-2-base"], + json_schema_extra={"always_visible": True}, + ) + ) + model_config = ConfigDict( + json_schema_extra={ + "name": "Florence-2 Model", + "version": "v2", + "short_description": "Run Florence-2 on an image", + "long_description": LONG_DESCRIPTION, + "license": "Apache-2.0", + "block_type": "model", + "search_keywords": ["Florence", "Florence-2", "Microsoft"], + "is_vlm_block": True, + "task_type_property": "task_type", + }, + protected_namespaces=(), + ) + + +class Florence2BlockV2(Florence2BlockV1): + @classmethod + def get_manifest(cls) -> Type[WorkflowBlockManifest]: + return V2BlockManifest + + def run( + self, + images: Batch[WorkflowImageData], + model_id: str, + task_type: TaskType, + prompt: Optional[str], + classes: Optional[List[str]], + grounding_detection: Optional[ + Union[Batch[sv.Detections], List[int], List[float]] + ], + grounding_selection_mode: GroundingSelectionMode, + ) -> BlockResult: + return super().run( + images, + model_id, + task_type, + prompt, + classes, + grounding_detection, + grounding_selection_mode, + ) From fcbf977c82a3ae7ef306db352cc5c3b1bcd67505 Mon Sep 17 00:00:00 2001 From: Peter Robicheaux Date: Tue, 12 Nov 2024 19:10:10 +0000 Subject: [PATCH 4/6] Style --- .../models/foundation/florence2/v1.py | 6 ++++- .../models/foundation/florence2/v2.py | 25 ++++++++----------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py index ae2b9b18d..71fbd15ad 100644 --- a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py @@ -272,6 +272,7 @@ def describe_outputs(cls) -> List[OutputDefinition]: def get_execution_engine_compatibility(cls) -> Optional[str]: return ">=1.0.0,<2.0.0" + class BlockManifest(BaseManifest): type: Literal["roboflow_core/florence_2@v1"] model_version: Union[ @@ -298,6 +299,7 @@ class BlockManifest(BaseManifest): protected_namespaces=(), ) + class Florence2BlockV1(WorkflowBlock): def __init__( @@ -400,7 +402,9 @@ def run_locally( model_id=model_version, request=request ) if task_type == "": - prediction_data = prediction.response[list(prediction.response.keys())[0]] + prediction_data = prediction.response[ + list(prediction.response.keys())[0] + ] else: prediction_data = prediction.response[task_type] if task_type in TASKS_TO_EXTRACT_LABELS_AS_CLASSES: diff --git a/inference/core/workflows/core_steps/models/foundation/florence2/v2.py b/inference/core/workflows/core_steps/models/foundation/florence2/v2.py index e1a9dc946..0ff2a0867 100644 --- a/inference/core/workflows/core_steps/models/foundation/florence2/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/florence2/v2.py @@ -1,27 +1,24 @@ +from typing import List, Literal, Optional, Type, Union + +import supervision as sv +from pydantic import ConfigDict, Field + from inference.core.workflows.core_steps.models.foundation.florence2.v1 import ( + LONG_DESCRIPTION, BaseManifest, Florence2BlockV1, - TaskType, GroundingSelectionMode, - LONG_DESCRIPTION, + TaskType, ) -from typing import Type, Union, Optional, List, Literal - -from pydantic import ConfigDict, Field -import supervision as sv - -from inference.core.workflows.execution_engine.entities.types import ( - WorkflowParameterSelector, +from inference.core.workflows.execution_engine.entities.base import ( + Batch, + WorkflowImageData, ) - from inference.core.workflows.execution_engine.entities.types import ( ROBOFLOW_MODEL_ID_KIND, + WorkflowParameterSelector, ) from inference.core.workflows.prototypes.block import BlockResult, WorkflowBlockManifest -from inference.core.workflows.execution_engine.entities.base import ( - Batch, - WorkflowImageData, -) class V2BlockManifest(BaseManifest): From 917d78443e1dcba8dff7ef38fd36025999ae2d2a Mon Sep 17 00:00:00 2001 From: Peter Robicheaux Date: Wed, 13 Nov 2024 22:36:06 +0000 Subject: [PATCH 5/6] Address pr comments --- .../core/workflows/core_steps/common/vlms.py | 9 -------- .../models/foundation/florence2/v1.py | 23 +++++++++++++++---- .../models/foundation/florence2/v2.py | 14 +++++------ 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/inference/core/workflows/core_steps/common/vlms.py b/inference/core/workflows/core_steps/common/vlms.py index 4252100b9..c85727337 100644 --- a/inference/core/workflows/core_steps/common/vlms.py +++ b/inference/core/workflows/core_steps/common/vlms.py @@ -80,12 +80,3 @@ "description": "Model returns a JSON response with the specified fields", }, } - - -FLORENCE_TASKS_METADATA = { - "unstructured": { - "name": "Unstructured Prompt", - "description": "Use free-form prompt to generate a response. Useful with finetuned models.", - }, - **VLM_TASKS_METADATA, -} diff --git a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py index 71fbd15ad..ff64e866b 100644 --- a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py @@ -8,7 +8,7 @@ from inference.core.entities.requests.inference import LMMInferenceRequest from inference.core.managers.base import ModelManager from inference.core.workflows.core_steps.common.entities import StepExecutionMode -from inference.core.workflows.core_steps.common.vlms import FLORENCE_TASKS_METADATA +from inference.core.workflows.core_steps.common.vlms import VLM_TASKS_METADATA from inference.core.workflows.execution_engine.entities.base import ( Batch, OutputDefinition, @@ -37,6 +37,14 @@ T = TypeVar("T") K = TypeVar("K") +FLORENCE_TASKS_METADATA = { + "custom": { + "name": "Custom Prompt", + "description": "Use free-form prompt to generate a response. Useful with finetuned models.", + }, + **VLM_TASKS_METADATA, +} + DETECTIONS_CLASS_NAME_FIELD = "class_name" DETECTION_ID_FIELD = "detection_id" @@ -77,7 +85,7 @@ }, {"task_type": "detection-grounded-ocr", "florence_task": ""}, {"task_type": "region-proposal", "florence_task": ""}, - {"task_type": "unstructured", "florence_task": ""}, + {"task_type": "custom", "florence_task": None}, ] TASK_TYPE_TO_FLORENCE_TASK = { task["task_type"]: task["florence_task"] for task in SUPPORTED_TASK_TYPES_LIST @@ -364,6 +372,8 @@ def run_locally( grounding_selection_mode: GroundingSelectionMode, ) -> BlockResult: requires_detection_grounding = task_type in TASKS_REQUIRING_DETECTION_GROUNDING + + is_not_florence_task = task_type == "custom" task_type = TASK_TYPE_TO_FLORENCE_TASK[task_type] inference_images = [ i.to_inference_format(numpy_preferred=False) for i in images @@ -391,17 +401,22 @@ def run_locally( {"raw_output": None, "parsed_output": None, "classes": None} ) continue + if is_not_florence_task: + prompt = single_prompt or "" + else: + prompt = task_type + (single_prompt or "") + request = LMMInferenceRequest( api_key=self._api_key, model_id=model_version, image=image, source="workflow-execution", - prompt=task_type + (single_prompt or ""), + prompt=prompt, ) prediction = self._model_manager.infer_from_request_sync( model_id=model_version, request=request ) - if task_type == "": + if is_not_florence_task: prediction_data = prediction.response[ list(prediction.response.keys())[0] ] diff --git a/inference/core/workflows/core_steps/models/foundation/florence2/v2.py b/inference/core/workflows/core_steps/models/foundation/florence2/v2.py index 0ff2a0867..28a11e248 100644 --- a/inference/core/workflows/core_steps/models/foundation/florence2/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/florence2/v2.py @@ -65,11 +65,11 @@ def run( grounding_selection_mode: GroundingSelectionMode, ) -> BlockResult: return super().run( - images, - model_id, - task_type, - prompt, - classes, - grounding_detection, - grounding_selection_mode, + images=images, + model_version=model_id, + task_type=task_type, + prompt=prompt, + classes=classes, + grounding_detection=grounding_detection, + grounding_selection_mode=grounding_selection_mode, ) From d6bfbe0a460eebe795620e5a54e0a03aed1e2a12 Mon Sep 17 00:00:00 2001 From: Peter Robicheaux Date: Wed, 13 Nov 2024 23:38:26 +0000 Subject: [PATCH 6/6] Bugfix --- .../core/workflows/core_steps/models/foundation/florence2/v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py index f461eb2d0..b42f50456 100644 --- a/inference/core/workflows/core_steps/models/foundation/florence2/v1.py +++ b/inference/core/workflows/core_steps/models/foundation/florence2/v1.py @@ -134,7 +134,7 @@ TASKS_REQUIRING_PROMPT = { "phrase-grounded-object-detection", "phrase-grounded-instance-segmentation", - "unstructured", + "custom", } TASKS_REQUIRING_CLASSES = { "open-vocabulary-object-detection",