Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions keras_hub/api/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@
from keras_hub.src.models.vit.vit_image_converter import (
ViTImageConverter as ViTImageConverter,
)
from keras_hub.src.models.vit_det.vit_det_image_converter import (
ViTDetImageConverter as ViTDetImageConverter,
)
from keras_hub.src.models.whisper.whisper_audio_converter import (
WhisperAudioConverter as WhisperAudioConverter,
)
Expand Down
5 changes: 0 additions & 5 deletions keras_hub/src/models/vit_det/vit_det_backbone.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import keras
from keras import ops

from keras_hub.src.api_export import keras_hub_export
from keras_hub.src.models.backbone import Backbone
Expand Down Expand Up @@ -105,10 +104,6 @@ def __init__(
)
img_size = img_input.shape[-3]
x = img_input
# VITDet scales inputs based on the standard ImageNet mean/stddev.
x = (x - ops.array([0.485, 0.456, 0.406], dtype=x.dtype)) / (
ops.array([0.229, 0.224, 0.225], dtype=x.dtype)
)
x = ViTDetPatchingAndEmbedding(
kernel_size=(patch_size, patch_size),
strides=(patch_size, patch_size),
Expand Down
40 changes: 40 additions & 0 deletions keras_hub/src/models/vit_det/vit_det_image_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from keras_hub.src.api_export import keras_hub_export
from keras_hub.src.layers.preprocessing.image_converter import ImageConverter
from keras_hub.src.models.vit_det.vit_det_backbone import ViTDetBackbone


@keras_hub_export("keras_hub.layers.ViTDetImageConverter")
class ViTDetImageConverter(ImageConverter):
"""Image converter for ViTDet models.

This layer applies ImageNet normalization (mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]) to input images for ViTDet models.

Args:
image_size: int or tuple of (height, width). The output size of the
image. Defaults to `(1024, 1024)`.

Example:
```python
converter = keras_hub.layers.ViTDetImageConverter(image_size=(1024, 1024))
converter(np.random.rand(1, 512, 512, 3)) # Resizes and normalizes
Comment on lines +19 to +20
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The example code is a bit misleading and incomplete:

  1. It uses np without showing the import numpy as np statement.
  2. np.random.rand() generates float values in [0, 1). The layer then scales this by 1/255, which is likely not the intended demonstration. Using np.random.randint(0, 256, ...) would better simulate a typical uint8 image for which scale=1.0 / 255.0 is appropriate.

Please add the import inside the ````pythonblock and userandint` for clarity.

Suggested change
converter = keras_hub.layers.ViTDetImageConverter(image_size=(1024, 1024))
converter(np.random.rand(1, 512, 512, 3)) # Resizes and normalizes
converter = keras_hub.layers.ViTDetImageConverter(image_size=(1024, 1024))
converter(np.random.randint(0, 256, size=(1, 512, 512, 3))) # Resizes and normalizes

```
"""

backbone_cls = ViTDetBackbone

def __init__(
self,
image_size=(1024, 1024),
**kwargs,
):
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
variance = [x**2 for x in std]
super().__init__(
image_size=image_size,
scale=1.0 / 255.0, # Scale to [0, 1]
mean=mean,
variance=variance,
**kwargs,
)
Loading