diff --git a/keras_hub/src/models/albert/albert_text_classifier_test.py b/keras_hub/src/models/albert/albert_text_classifier_test.py index 3d6413ff99..d9ab9c70d0 100644 --- a/keras_hub/src/models/albert/albert_text_classifier_test.py +++ b/keras_hub/src/models/albert/albert_text_classifier_test.py @@ -61,6 +61,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=AlbertTextClassifier, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in AlbertTextClassifier.presets: diff --git a/keras_hub/src/models/bart/bart_seq_2_seq_lm_test.py b/keras_hub/src/models/bart/bart_seq_2_seq_lm_test.py index f525908b67..983b71610f 100644 --- a/keras_hub/src/models/bart/bart_seq_2_seq_lm_test.py +++ b/keras_hub/src/models/bart/bart_seq_2_seq_lm_test.py @@ -149,6 +149,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=BartSeq2SeqLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in BartSeq2SeqLM.presets: diff --git a/keras_hub/src/models/basnet/basnet_test.py b/keras_hub/src/models/basnet/basnet_test.py index b5bbe405e2..7af901ffd8 100644 --- a/keras_hub/src/models/basnet/basnet_test.py +++ b/keras_hub/src/models/basnet/basnet_test.py @@ -3,6 +3,9 @@ from keras_hub.src.models.basnet.basnet import BASNetImageSegmenter from keras_hub.src.models.basnet.basnet_backbone import BASNetBackbone +from keras_hub.src.models.basnet.basnet_image_converter import ( + BASNetImageConverter, +) from keras_hub.src.models.basnet.basnet_preprocessor import BASNetPreprocessor from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone from keras_hub.src.tests.test_case import TestCase @@ -26,7 +29,9 @@ def setUp(self): image_encoder=self.image_encoder, num_classes=1, ) - self.preprocessor = BASNetPreprocessor() + self.preprocessor = BASNetPreprocessor( + image_converter=BASNetImageConverter(height=64, width=64) + ) self.init_kwargs = { "backbone": self.backbone, "preprocessor": self.preprocessor, @@ -49,6 +54,14 @@ def test_saved_model(self): input_data=self.images, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=BASNetImageSegmenter, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) + def test_end_to_end_model_predict(self): model = BASNetImageSegmenter(**self.init_kwargs) output = model.predict(self.images) diff --git a/keras_hub/src/models/bert/bert_text_classifier_test.py b/keras_hub/src/models/bert/bert_text_classifier_test.py index 606be7c839..2aacfa53d6 100644 --- a/keras_hub/src/models/bert/bert_text_classifier_test.py +++ b/keras_hub/src/models/bert/bert_text_classifier_test.py @@ -53,6 +53,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=BertTextClassifier, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_smallest_preset(self): self.run_preset_test( diff --git a/keras_hub/src/models/bloom/bloom_causal_lm_test.py b/keras_hub/src/models/bloom/bloom_causal_lm_test.py index ada3d8eeb1..c6fc6de3e9 100644 --- a/keras_hub/src/models/bloom/bloom_causal_lm_test.py +++ b/keras_hub/src/models/bloom/bloom_causal_lm_test.py @@ -164,6 +164,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=BloomCausalLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in BloomCausalLM.presets: diff --git a/keras_hub/src/models/clip/clip_backbone_test.py b/keras_hub/src/models/clip/clip_backbone_test.py index d24b8cc553..8e0d224699 100644 --- a/keras_hub/src/models/clip/clip_backbone_test.py +++ b/keras_hub/src/models/clip/clip_backbone_test.py @@ -44,6 +44,16 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=CLIPBackbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-4, "mean": 1e-5}}, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in CLIPBackbone.presets: diff --git a/keras_hub/src/models/cspnet/cspnet_image_classifier_test.py b/keras_hub/src/models/cspnet/cspnet_image_classifier_test.py index 6a5ee517e1..016d6ad478 100644 --- a/keras_hub/src/models/cspnet/cspnet_image_classifier_test.py +++ b/keras_hub/src/models/cspnet/cspnet_image_classifier_test.py @@ -76,3 +76,11 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.images, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=CSPNetImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) diff --git a/keras_hub/src/models/d_fine/d_fine_object_detector_test.py b/keras_hub/src/models/d_fine/d_fine_object_detector_test.py index 3b3bfe14c0..414701cd0b 100644 --- a/keras_hub/src/models/d_fine/d_fine_object_detector_test.py +++ b/keras_hub/src/models/d_fine/d_fine_object_detector_test.py @@ -152,3 +152,30 @@ def test_saved_model(self): init_kwargs=init_kwargs, input_data=self.images, ) + + @pytest.mark.large + def test_litert_export(self): + backbone = DFineBackbone(**self.base_backbone_kwargs) + init_kwargs = { + "backbone": backbone, + "num_classes": 4, + "bounding_box_format": self.bounding_box_format, + "preprocessor": self.preprocessor, + } + + # D-Fine ObjectDetector only takes images as input + input_data = self.images + + self.run_litert_export_test( + cls=DFineObjectDetector, + init_kwargs=init_kwargs, + input_data=input_data, + comparison_mode="statistical", + output_thresholds={ + "intermediate_predicted_corners": {"max": 5.0, "mean": 0.05}, + "intermediate_logits": {"max": 5.0, "mean": 0.1}, + "enc_topk_logits": {"max": 5.0, "mean": 0.03}, + "logits": {"max": 2.0, "mean": 0.03}, + "*": {"max": 1.0, "mean": 0.03}, + }, + ) diff --git a/keras_hub/src/models/deberta_v3/deberta_v3_text_classifier_test.py b/keras_hub/src/models/deberta_v3/deberta_v3_text_classifier_test.py index 11f3d139ee..3f443ae366 100644 --- a/keras_hub/src/models/deberta_v3/deberta_v3_text_classifier_test.py +++ b/keras_hub/src/models/deberta_v3/deberta_v3_text_classifier_test.py @@ -64,6 +64,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=DebertaV3TextClassifier, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in DebertaV3TextClassifier.presets: diff --git a/keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter_test.py b/keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter_test.py index 065bed3caa..0062f7b671 100644 --- a/keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter_test.py +++ b/keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter_test.py @@ -70,3 +70,15 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.images, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=DeepLabV3ImageSegmenter, + init_kwargs=self.init_kwargs, + input_data=self.images, + comparison_mode="statistical", + output_thresholds={ + "*": {"max": 0.6, "mean": 0.3}, + }, + ) diff --git a/keras_hub/src/models/deit/deit_image_classifier_test.py b/keras_hub/src/models/deit/deit_image_classifier_test.py index d64a956cdc..5c784ccf19 100644 --- a/keras_hub/src/models/deit/deit_image_classifier_test.py +++ b/keras_hub/src/models/deit/deit_image_classifier_test.py @@ -55,3 +55,11 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.images, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=DeiTImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) diff --git a/keras_hub/src/models/densenet/densenet_image_classifier_test.py b/keras_hub/src/models/densenet/densenet_image_classifier_test.py index 481005ba7e..18d622d79c 100644 --- a/keras_hub/src/models/densenet/densenet_image_classifier_test.py +++ b/keras_hub/src/models/densenet/densenet_image_classifier_test.py @@ -61,6 +61,14 @@ def test_saved_model(self): input_data=self.images, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=DenseNetImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in DenseNetImageClassifier.presets: diff --git a/keras_hub/src/models/depth_anything/depth_anything_depth_estimator_test.py b/keras_hub/src/models/depth_anything/depth_anything_depth_estimator_test.py index 6277078488..5fedad8131 100644 --- a/keras_hub/src/models/depth_anything/depth_anything_depth_estimator_test.py +++ b/keras_hub/src/models/depth_anything/depth_anything_depth_estimator_test.py @@ -85,6 +85,16 @@ def test_saved_model(self): input_data=self.images, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=DepthAnythingDepthEstimator, + init_kwargs=self.init_kwargs, + input_data=self.images, + comparison_mode="statistical", + output_thresholds={"depths": {"max": 2e-4, "mean": 1e-5}}, + ) + @pytest.mark.extra_large def test_all_presets(self): images = np.ones((2, 518, 518, 3), dtype="float32") diff --git a/keras_hub/src/models/dinov2/dinov2_backbone_test.py b/keras_hub/src/models/dinov2/dinov2_backbone_test.py index ca4edcafc0..a14626a024 100644 --- a/keras_hub/src/models/dinov2/dinov2_backbone_test.py +++ b/keras_hub/src/models/dinov2/dinov2_backbone_test.py @@ -49,6 +49,16 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=DINOV2Backbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-4, "mean": 1e-5}}, + ) + @pytest.mark.large def test_position_embedding_interpolation(self): model = DINOV2Backbone(**self.init_kwargs) @@ -144,6 +154,16 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=DINOV2Backbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-4, "mean": 1e-5}}, + ) + @pytest.mark.kaggle_key_required @pytest.mark.extra_large def test_smallest_preset(self): diff --git a/keras_hub/src/models/dinov3/dinov3_backbone_test.py b/keras_hub/src/models/dinov3/dinov3_backbone_test.py index b8fdd9a0c6..c899e932ce 100644 --- a/keras_hub/src/models/dinov3/dinov3_backbone_test.py +++ b/keras_hub/src/models/dinov3/dinov3_backbone_test.py @@ -52,6 +52,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=DINOV3Backbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.large def test_position_embedding_interpolation(self): model = DINOV3Backbone(**self.init_kwargs) diff --git a/keras_hub/src/models/distil_bert/distil_bert_text_classifier_test.py b/keras_hub/src/models/distil_bert/distil_bert_text_classifier_test.py index 71fdfc52b4..db57d21d0e 100644 --- a/keras_hub/src/models/distil_bert/distil_bert_text_classifier_test.py +++ b/keras_hub/src/models/distil_bert/distil_bert_text_classifier_test.py @@ -59,6 +59,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=DistilBertTextClassifier, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in DistilBertTextClassifier.presets: diff --git a/keras_hub/src/models/efficientnet/efficientnet_image_classifier_test.py b/keras_hub/src/models/efficientnet/efficientnet_image_classifier_test.py index d2b5717b68..18f13e5505 100644 --- a/keras_hub/src/models/efficientnet/efficientnet_image_classifier_test.py +++ b/keras_hub/src/models/efficientnet/efficientnet_image_classifier_test.py @@ -7,6 +7,12 @@ from keras_hub.src.models.efficientnet.efficientnet_image_classifier import ( EfficientNetImageClassifier, ) +from keras_hub.src.models.efficientnet.efficientnet_image_classifier_preprocessor import ( # noqa: E501 + EfficientNetImageClassifierPreprocessor, +) +from keras_hub.src.models.efficientnet.efficientnet_image_converter import ( + EfficientNetImageConverter, +) from keras_hub.src.tests.test_case import TestCase @@ -38,6 +44,9 @@ def setUp(self): self.init_kwargs = { "backbone": backbone, "num_classes": 1000, + "preprocessor": EfficientNetImageClassifierPreprocessor( + image_converter=EfficientNetImageConverter(image_size=(16, 16)) + ), } self.train_data = (self.images, self.labels) @@ -82,3 +91,11 @@ def test_all_presets(self): input_data=self.images, expected_output_shape=(2, 2), ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=EfficientNetImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) diff --git a/keras_hub/src/models/electra/electra_backbone_test.py b/keras_hub/src/models/electra/electra_backbone_test.py index de6416bd7e..a0240c5ed0 100644 --- a/keras_hub/src/models/electra/electra_backbone_test.py +++ b/keras_hub/src/models/electra/electra_backbone_test.py @@ -41,6 +41,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=ElectraBackbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_smallest_preset(self): self.run_preset_test( diff --git a/keras_hub/src/models/esm/esm_classifier_test.py b/keras_hub/src/models/esm/esm_classifier_test.py index 8eeec2b40d..58103a448e 100644 --- a/keras_hub/src/models/esm/esm_classifier_test.py +++ b/keras_hub/src/models/esm/esm_classifier_test.py @@ -51,3 +51,11 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.input_data, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=ESMProteinClassifier, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) diff --git a/keras_hub/src/models/f_net/f_net_text_classifier_test.py b/keras_hub/src/models/f_net/f_net_text_classifier_test.py index 4658e795f6..ceaae7944c 100644 --- a/keras_hub/src/models/f_net/f_net_text_classifier_test.py +++ b/keras_hub/src/models/f_net/f_net_text_classifier_test.py @@ -57,6 +57,20 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + # F-Net does NOT use padding_mask - it only uses token_ids and + # segment_ids. Don't add padding_mask to input_data. + self.run_litert_export_test( + cls=FNetTextClassifier, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + comparison_mode="statistical", + output_thresholds={ + "*": {"max": 0.01, "mean": 0.005}, + }, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in FNetTextClassifier.presets: diff --git a/keras_hub/src/models/falcon/falcon_causal_lm_test.py b/keras_hub/src/models/falcon/falcon_causal_lm_test.py index 393f8a8e97..c8b699b818 100644 --- a/keras_hub/src/models/falcon/falcon_causal_lm_test.py +++ b/keras_hub/src/models/falcon/falcon_causal_lm_test.py @@ -164,6 +164,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=FalconCausalLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in FalconCausalLM.presets: diff --git a/keras_hub/src/models/flux/flux_backbone_test.py b/keras_hub/src/models/flux/flux_backbone_test.py index 5a15e3b7f0..322bfd81a3 100644 --- a/keras_hub/src/models/flux/flux_backbone_test.py +++ b/keras_hub/src/models/flux/flux_backbone_test.py @@ -71,3 +71,13 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.input_data, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=FluxBackbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-4, "mean": 1e-5}}, + ) diff --git a/keras_hub/src/models/gemma/gemma_causal_lm_test.py b/keras_hub/src/models/gemma/gemma_causal_lm_test.py index 0581d42ffe..98c696c82f 100644 --- a/keras_hub/src/models/gemma/gemma_causal_lm_test.py +++ b/keras_hub/src/models/gemma/gemma_causal_lm_test.py @@ -201,6 +201,32 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + """Test LiteRT export for GemmaCausalLM with small test model.""" + model = GemmaCausalLM(**self.init_kwargs) + + # Convert boolean padding_mask to int32 for LiteRT compatibility + input_data = self.input_data.copy() + if "padding_mask" in input_data: + input_data["padding_mask"] = ops.cast( + input_data["padding_mask"], "int32" + ) + + expected_output_shape = ( + 2, + 8, + self.preprocessor.tokenizer.vocabulary_size(), + ) + + self.run_litert_export_test( + model=model, + input_data=input_data, + expected_output_shape=expected_output_shape, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}}, + ) + @pytest.mark.kaggle_key_required @pytest.mark.extra_large def test_all_presets(self): diff --git a/keras_hub/src/models/gemma3/gemma3_causal_lm_test.py b/keras_hub/src/models/gemma3/gemma3_causal_lm_test.py index ad37403752..0054cdb6b9 100644 --- a/keras_hub/src/models/gemma3/gemma3_causal_lm_test.py +++ b/keras_hub/src/models/gemma3/gemma3_causal_lm_test.py @@ -226,6 +226,63 @@ def test_saved_model(self, modality_type): input_data=input_data, ) + @pytest.mark.large + def test_litert_export(self): + """Test LiteRT export for Gemma3CausalLM with small test model.""" + # Use the small text-only model for fast testing + model = Gemma3CausalLM(**self.text_init_kwargs) + + # Test with text input data + input_data = self.text_input_data.copy() + # Convert boolean padding_mask to int32 for LiteRT compatibility + if "padding_mask" in input_data: + input_data["padding_mask"] = ops.cast( + input_data["padding_mask"], "int32" + ) + + expected_output_shape = ( + 2, + 20, + self.text_preprocessor.tokenizer.vocabulary_size(), + ) + + self.run_litert_export_test( + model=model, + input_data=input_data, + expected_output_shape=expected_output_shape, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-2, "mean": 1e-4}}, + ) + + @pytest.mark.large + def test_litert_export_multimodal(self): + """Test LiteRT export for multimodal Gemma3CausalLM with small test + model.""" + # Use the small multimodal model for testing + model = Gemma3CausalLM(**self.init_kwargs) + + # Test with multimodal input data + input_data = self.input_data.copy() + # Convert boolean padding_mask to int32 for LiteRT compatibility + if "padding_mask" in input_data: + input_data["padding_mask"] = ops.cast( + input_data["padding_mask"], "int32" + ) + + expected_output_shape = ( + 2, + 20, + self.preprocessor.tokenizer.vocabulary_size(), + ) + + self.run_litert_export_test( + model=model, + input_data=input_data, + expected_output_shape=expected_output_shape, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-2, "mean": 1e-4}}, + ) + @pytest.mark.kaggle_key_required @pytest.mark.extra_large def test_all_presets(self): diff --git a/keras_hub/src/models/gpt2/gpt2_causal_lm_test.py b/keras_hub/src/models/gpt2/gpt2_causal_lm_test.py index ec3d829047..231ba1f8ee 100644 --- a/keras_hub/src/models/gpt2/gpt2_causal_lm_test.py +++ b/keras_hub/src/models/gpt2/gpt2_causal_lm_test.py @@ -107,6 +107,32 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + """Test LiteRT export for GPT2CausalLM with small test model.""" + model = GPT2CausalLM(**self.init_kwargs) + + # Convert boolean padding_mask to int32 for LiteRT compatibility + input_data = self.input_data.copy() + if "padding_mask" in input_data: + input_data["padding_mask"] = ops.cast( + input_data["padding_mask"], "int32" + ) + + expected_output_shape = ( + 2, + 8, + self.preprocessor.tokenizer.vocabulary_size(), + ) + + self.run_litert_export_test( + model=model, + input_data=input_data, + expected_output_shape=expected_output_shape, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}}, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in GPT2CausalLM.presets: diff --git a/keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_test.py b/keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_test.py index f66c748b9e..4ba327886a 100644 --- a/keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_test.py +++ b/keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_test.py @@ -105,3 +105,15 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.input_data, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=GPTNeoXCausalLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + output_thresholds={ + "max": 1e-3, + "mean": 1e-4, + }, # More lenient thresholds for numerical differences + ) diff --git a/keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_test.py b/keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_test.py index f294a23b72..8eb16b3cad 100644 --- a/keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_test.py +++ b/keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_test.py @@ -89,3 +89,11 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.images, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=HGNetV2ImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) diff --git a/keras_hub/src/models/llama/llama_causal_lm_test.py b/keras_hub/src/models/llama/llama_causal_lm_test.py index 1ff5a3a987..681ae1da83 100644 --- a/keras_hub/src/models/llama/llama_causal_lm_test.py +++ b/keras_hub/src/models/llama/llama_causal_lm_test.py @@ -106,6 +106,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=LlamaCausalLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in LlamaCausalLM.presets: diff --git a/keras_hub/src/models/llama3/llama3_causal_lm_test.py b/keras_hub/src/models/llama3/llama3_causal_lm_test.py index a054b8ae14..6c6386d740 100644 --- a/keras_hub/src/models/llama3/llama3_causal_lm_test.py +++ b/keras_hub/src/models/llama3/llama3_causal_lm_test.py @@ -114,6 +114,32 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + """Test LiteRT export for Llama3CausalLM with small test model.""" + model = Llama3CausalLM(**self.init_kwargs) + + # Convert boolean padding_mask to int32 for LiteRT compatibility + input_data = self.input_data.copy() + if "padding_mask" in input_data: + input_data["padding_mask"] = ops.cast( + input_data["padding_mask"], "int32" + ) + + expected_output_shape = ( + 2, + 7, + self.preprocessor.tokenizer.vocabulary_size(), + ) + + self.run_litert_export_test( + model=model, + input_data=input_data, + expected_output_shape=expected_output_shape, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}}, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in Llama3CausalLM.presets: diff --git a/keras_hub/src/models/mistral/mistral_causal_lm_test.py b/keras_hub/src/models/mistral/mistral_causal_lm_test.py index c682c2866d..f5d9c82ff5 100644 --- a/keras_hub/src/models/mistral/mistral_causal_lm_test.py +++ b/keras_hub/src/models/mistral/mistral_causal_lm_test.py @@ -106,6 +106,32 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + """Test LiteRT export for MistralCausalLM with small test model.""" + model = MistralCausalLM(**self.init_kwargs) + + # Convert boolean padding_mask to int32 for LiteRT compatibility + input_data = self.input_data.copy() + if "padding_mask" in input_data: + input_data["padding_mask"] = ops.cast( + input_data["padding_mask"], "int32" + ) + + expected_output_shape = ( + 2, + 8, + self.preprocessor.tokenizer.vocabulary_size(), + ) + + self.run_litert_export_test( + model=model, + input_data=input_data, + expected_output_shape=expected_output_shape, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}}, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in MistralCausalLM.presets: diff --git a/keras_hub/src/models/mit/mit_image_classifier_test.py b/keras_hub/src/models/mit/mit_image_classifier_test.py index c63a456311..4203ccda42 100644 --- a/keras_hub/src/models/mit/mit_image_classifier_test.py +++ b/keras_hub/src/models/mit/mit_image_classifier_test.py @@ -50,3 +50,11 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.images, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=MiTImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) diff --git a/keras_hub/src/models/mixtral/mixtral_causal_lm_test.py b/keras_hub/src/models/mixtral/mixtral_causal_lm_test.py index a711a06b0e..6417c068a2 100644 --- a/keras_hub/src/models/mixtral/mixtral_causal_lm_test.py +++ b/keras_hub/src/models/mixtral/mixtral_causal_lm_test.py @@ -107,6 +107,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=MixtralCausalLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in MixtralCausalLM.presets: diff --git a/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py index c996122fa5..27e41bcff9 100644 --- a/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py +++ b/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py @@ -101,3 +101,11 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.images, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=MobileNetImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) diff --git a/keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_test.py b/keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_test.py index 219cb6f285..2229295159 100644 --- a/keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_test.py +++ b/keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_test.py @@ -74,3 +74,13 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.images, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=MobileNetV5ImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-4, "mean": 1e-5}}, + ) diff --git a/keras_hub/src/models/moonshine/moonshine_audio_to_text_test.py b/keras_hub/src/models/moonshine/moonshine_audio_to_text_test.py index 5d0a7dbe7a..8b1d9bc8c7 100644 --- a/keras_hub/src/models/moonshine/moonshine_audio_to_text_test.py +++ b/keras_hub/src/models/moonshine/moonshine_audio_to_text_test.py @@ -145,6 +145,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=MoonshineAudioToText, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in MoonshineAudioToText.presets: diff --git a/keras_hub/src/models/opt/opt_causal_lm_test.py b/keras_hub/src/models/opt/opt_causal_lm_test.py index 138c5a5180..6a9aa12262 100644 --- a/keras_hub/src/models/opt/opt_causal_lm_test.py +++ b/keras_hub/src/models/opt/opt_causal_lm_test.py @@ -105,6 +105,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=OPTCausalLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in OPTCausalLM.presets: diff --git a/keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_test.py b/keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_test.py index 1f53cdef04..108b150b50 100644 --- a/keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_test.py +++ b/keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_test.py @@ -106,6 +106,35 @@ def test_saved_model(self): input_data=input_data, ) + @pytest.mark.large + def test_litert_export(self): + input_data = { + "token_ids": np.random.randint( + 0, + self.vocabulary_size, + size=(self.batch_size, self.text_sequence_length), + dtype="int32", + ), + "images": np.ones( + (self.batch_size, self.image_size, self.image_size, 3) + ), + "padding_mask": np.ones( + (self.batch_size, self.text_sequence_length), + dtype="int32", + ), + "response_mask": np.zeros( + (self.batch_size, self.text_sequence_length), + dtype="int32", + ), + } + self.run_litert_export_test( + cls=PaliGemmaCausalLM, + init_kwargs=self.init_kwargs, + input_data=input_data, + comparison_mode="statistical", + output_thresholds={"*": {"max": 2e-6, "mean": 1e-6}}, + ) + def test_pali_gemma_causal_model(self): preprocessed, _, _ = self.preprocessor( { diff --git a/keras_hub/src/models/parseq/parseq_causal_lm_test.py b/keras_hub/src/models/parseq/parseq_causal_lm_test.py index 177c596521..cc23a37666 100644 --- a/keras_hub/src/models/parseq/parseq_causal_lm_test.py +++ b/keras_hub/src/models/parseq/parseq_causal_lm_test.py @@ -101,3 +101,30 @@ def test_causal_lm_basics(self): train_data=self.train_data, expected_output_shape=expected_shape_full, ) + + @pytest.mark.large + def test_litert_export(self): + # Create input data for export test + input_data = { + "images": np.random.randn( + self.batch_size, + self.image_height, + self.image_width, + self.num_channels, + ), + "token_ids": np.random.randint( + 0, + self.vocabulary_size, + (self.batch_size, self.max_label_length), + ), + "padding_mask": np.ones( + (self.batch_size, self.max_label_length), dtype="int32" + ), + } + self.run_litert_export_test( + cls=PARSeqCausalLM, + init_kwargs=self.init_kwargs, + input_data=input_data, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-3, "mean": 1e-4}}, + ) diff --git a/keras_hub/src/models/phi3/phi3_causal_lm_test.py b/keras_hub/src/models/phi3/phi3_causal_lm_test.py index 564d9d4785..0b579d5d27 100644 --- a/keras_hub/src/models/phi3/phi3_causal_lm_test.py +++ b/keras_hub/src/models/phi3/phi3_causal_lm_test.py @@ -107,6 +107,32 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + """Test LiteRT export for Phi3CausalLM with small test model.""" + model = Phi3CausalLM(**self.init_kwargs) + + # Convert boolean padding_mask to int32 for LiteRT compatibility + input_data = self.input_data.copy() + if "padding_mask" in input_data: + input_data["padding_mask"] = ops.cast( + input_data["padding_mask"], "int32" + ) + + expected_output_shape = ( + 2, + 12, + self.preprocessor.tokenizer.vocabulary_size(), + ) + + self.run_litert_export_test( + model=model, + input_data=input_data, + expected_output_shape=expected_output_shape, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}}, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in Phi3CausalLM.presets: diff --git a/keras_hub/src/models/qwen/qwen_causal_lm_test.py b/keras_hub/src/models/qwen/qwen_causal_lm_test.py index b1a715646e..081461e94f 100644 --- a/keras_hub/src/models/qwen/qwen_causal_lm_test.py +++ b/keras_hub/src/models/qwen/qwen_causal_lm_test.py @@ -113,6 +113,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=QwenCausalLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in QwenCausalLM.presets: diff --git a/keras_hub/src/models/qwen3/qwen3_causal_lm_test.py b/keras_hub/src/models/qwen3/qwen3_causal_lm_test.py index 5e0456b521..7903967e07 100644 --- a/keras_hub/src/models/qwen3/qwen3_causal_lm_test.py +++ b/keras_hub/src/models/qwen3/qwen3_causal_lm_test.py @@ -114,6 +114,32 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + """Test LiteRT export for Qwen3CausalLM with small test model.""" + model = Qwen3CausalLM(**self.init_kwargs) + + # Convert boolean padding_mask to int32 for LiteRT compatibility + input_data = self.input_data.copy() + if "padding_mask" in input_data: + input_data["padding_mask"] = ops.cast( + input_data["padding_mask"], "int32" + ) + + expected_output_shape = ( + 2, + 7, + self.preprocessor.tokenizer.vocabulary_size(), + ) + + self.run_litert_export_test( + model=model, + input_data=input_data, + expected_output_shape=expected_output_shape, + comparison_mode="statistical", + output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}}, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in Qwen3CausalLM.presets: diff --git a/keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_test.py b/keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_test.py index d342c1e165..f57279a69f 100644 --- a/keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_test.py +++ b/keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_test.py @@ -120,6 +120,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=Qwen3MoeCausalLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in Qwen3MoeCausalLM.presets: diff --git a/keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_test.py b/keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_test.py index ad1b8c3113..9be89a4add 100644 --- a/keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_test.py +++ b/keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_test.py @@ -139,6 +139,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=QwenMoeCausalLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in QwenMoeCausalLM.presets: diff --git a/keras_hub/src/models/resnet/resnet_image_classifier_test.py b/keras_hub/src/models/resnet/resnet_image_classifier_test.py index 9bc5897fee..d4dc77a97a 100644 --- a/keras_hub/src/models/resnet/resnet_image_classifier_test.py +++ b/keras_hub/src/models/resnet/resnet_image_classifier_test.py @@ -65,6 +65,21 @@ def test_saved_model(self): input_data=self.images, ) + @pytest.mark.large + def test_litert_export(self): + """Test LiteRT export for ResNetImageClassifier with small test + model.""" + model = ResNetImageClassifier(**self.init_kwargs) + expected_output_shape = (2, 2) # 2 images, 2 classes + + self.run_litert_export_test( + model=model, + input_data=self.images, + expected_output_shape=expected_output_shape, + comparison_mode="statistical", + output_thresholds={"*": {"max": 5e-5, "mean": 1e-5}}, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in ResNetImageClassifier.presets: diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector_test.py b/keras_hub/src/models/retinanet/retinanet_object_detector_test.py index 5e01c802a5..b304fe3ada 100644 --- a/keras_hub/src/models/retinanet/retinanet_object_detector_test.py +++ b/keras_hub/src/models/retinanet/retinanet_object_detector_test.py @@ -60,7 +60,7 @@ def setUp(self): ) image_converter = RetinaNetImageConverter( - bounding_box_format="yxyx", scale=1 / 255.0, image_size=(800, 800) + bounding_box_format="yxyx", scale=1 / 255.0, image_size=(512, 512) ) preprocessor = RetinaNetObjectDetectorPreprocessor( @@ -108,3 +108,19 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.images, ) + + @pytest.mark.large + def test_litert_export(self): + input_data = self.images + + self.run_litert_export_test( + cls=RetinaNetObjectDetector, + init_kwargs=self.init_kwargs, + input_data=input_data, + comparison_mode="statistical", + output_thresholds={ + "enc_topk_logits": {"max": 5.0, "mean": 0.05}, + "logits": {"max": 2.0, "mean": 0.05}, + "*": {"max": 1.5, "mean": 0.05}, + }, + ) diff --git a/keras_hub/src/models/roberta/roberta_text_classifier_test.py b/keras_hub/src/models/roberta/roberta_text_classifier_test.py index c5534a0dc4..adc3daa3ba 100644 --- a/keras_hub/src/models/roberta/roberta_text_classifier_test.py +++ b/keras_hub/src/models/roberta/roberta_text_classifier_test.py @@ -59,6 +59,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=RobertaTextClassifier, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in RobertaTextClassifier.presets: diff --git a/keras_hub/src/models/roformer_v2/roformer_v2_text_classifier_test.py b/keras_hub/src/models/roformer_v2/roformer_v2_text_classifier_test.py index b24395c574..22a038c538 100644 --- a/keras_hub/src/models/roformer_v2/roformer_v2_text_classifier_test.py +++ b/keras_hub/src/models/roformer_v2/roformer_v2_text_classifier_test.py @@ -1,3 +1,5 @@ +import pytest + from keras_hub.src.models.roformer_v2 import ( roformer_v2_text_classifier_preprocessor as r, ) @@ -50,3 +52,30 @@ def test_classifier_basics(self): train_data=self.train_data, expected_output_shape=(2, 2), ) + + @pytest.mark.large + def test_saved_model(self): + self.run_model_saving_test( + cls=RoformerV2TextClassifier, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=RoformerV2TextClassifier, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + + @pytest.mark.extra_large + def test_all_presets(self): + for preset in RoformerV2TextClassifier.presets: + self.run_preset_test( + cls=RoformerV2TextClassifier, + preset=preset, + init_kwargs={"num_classes": 2}, + input_data=self.input_data, + expected_output_shape=(2, 2), + ) diff --git a/keras_hub/src/models/sam/sam_image_segmenter_test.py b/keras_hub/src/models/sam/sam_image_segmenter_test.py index 278b6a8749..63a248d6f4 100644 --- a/keras_hub/src/models/sam/sam_image_segmenter_test.py +++ b/keras_hub/src/models/sam/sam_image_segmenter_test.py @@ -22,6 +22,11 @@ def setUp(self): (self.batch_size, self.image_size, self.image_size, 3), dtype="float32", ) + # Use more realistic SAM configuration for export testing + # Real SAM uses 64x64 embeddings for 1024x1024 images + # Scale down proportionally: 128/1024 = 1/8, + # so embeddings should be 64/8 = 8 + # But keep it simple for testing self.image_encoder = ViTDetBackbone( hidden_size=16, num_layers=16, @@ -70,9 +75,10 @@ def setUp(self): "points": np.ones((self.batch_size, 1, 2), dtype="float32"), "labels": np.ones((self.batch_size, 1), dtype="float32"), "boxes": np.ones((self.batch_size, 1, 2, 2), dtype="float32"), - "masks": np.zeros( - (self.batch_size, 0, self.image_size, self.image_size, 1) - ), + # For TFLite export, use 1 mask filled with + # zeros (interpreted as "no mask") + # Use the expected mask size of 4 * image_embedding_size = 32 + "masks": np.zeros((self.batch_size, 1, 32, 32, 1), dtype="float32"), } self.labels = { "masks": np.ones((self.batch_size, 2), dtype="float32"), @@ -124,3 +130,16 @@ def test_all_presets(self): "iou_pred": [2], }, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=SAMImageSegmenter, + init_kwargs=self.init_kwargs, + input_data=self.inputs, + comparison_mode="statistical", + output_thresholds={ + "masks": {"max": 1e-3, "mean": 1e-4}, + "iou_pred": {"max": 1e-3, "mean": 1e-4}, + }, + ) diff --git a/keras_hub/src/models/sam/sam_prompt_encoder.py b/keras_hub/src/models/sam/sam_prompt_encoder.py index 12b77f4a7d..883903415c 100644 --- a/keras_hub/src/models/sam/sam_prompt_encoder.py +++ b/keras_hub/src/models/sam/sam_prompt_encoder.py @@ -292,7 +292,7 @@ def _maybe_input_mask_embed(): ) dense_embeddings = ops.cond( - ops.equal(ops.size(masks), 0), + ops.equal(ops.shape(masks)[1], 0), _no_mask_embed, _maybe_input_mask_embed, ) diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py index 136351e386..8227399b57 100644 --- a/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py +++ b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py @@ -72,3 +72,13 @@ def test_saved_model(self): init_kwargs={**self.init_kwargs}, input_data=self.input_data, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=SegFormerImageSegmenter, + init_kwargs={**self.init_kwargs}, + input_data=self.input_data, + comparison_mode="statistical", + output_thresholds={"*": {"max": 10.0, "mean": 2.0}}, + ) diff --git a/keras_hub/src/models/siglip/siglip_backbone_test.py b/keras_hub/src/models/siglip/siglip_backbone_test.py index ef3ddd5016..43f191c1f4 100644 --- a/keras_hub/src/models/siglip/siglip_backbone_test.py +++ b/keras_hub/src/models/siglip/siglip_backbone_test.py @@ -45,6 +45,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=SigLIPBackbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.kaggle_key_required @pytest.mark.extra_large def test_smallest_preset(self): @@ -105,6 +113,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=SigLIPBackbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.kaggle_key_required @pytest.mark.extra_large def test_smallest_preset(self): diff --git a/keras_hub/src/models/smollm3/smollm3_causal_lm_test.py b/keras_hub/src/models/smollm3/smollm3_causal_lm_test.py index cbf9b3f88e..8ec458fe21 100644 --- a/keras_hub/src/models/smollm3/smollm3_causal_lm_test.py +++ b/keras_hub/src/models/smollm3/smollm3_causal_lm_test.py @@ -122,6 +122,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=SmolLM3CausalLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in SmolLM3CausalLM.presets: diff --git a/keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_test.py b/keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_test.py index 10ba8c5149..0317bcaf79 100644 --- a/keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_test.py +++ b/keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_test.py @@ -196,3 +196,13 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.input_data, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=StableDiffusion3TextToImage, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + allow_custom_ops=True, # Allow custom ops like GatherV2, Erfc + enable_select_tf_ops=True, # Enable TensorFlow Select ops + ) diff --git a/keras_hub/src/models/t5/t5_backbone_test.py b/keras_hub/src/models/t5/t5_backbone_test.py index 8a67dcd865..33091e3136 100644 --- a/keras_hub/src/models/t5/t5_backbone_test.py +++ b/keras_hub/src/models/t5/t5_backbone_test.py @@ -40,6 +40,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=T5Backbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_smallest_preset(self): self.run_preset_test( diff --git a/keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_test.py b/keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_test.py index 0a4cb0ef4e..fe258524ad 100644 --- a/keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_test.py +++ b/keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_test.py @@ -156,6 +156,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=T5GemmaSeq2SeqLM, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in T5GemmaSeq2SeqLM.presets: diff --git a/keras_hub/src/models/vae/vae_backbone_test.py b/keras_hub/src/models/vae/vae_backbone_test.py index f5bd6f27a8..fcf349257c 100644 --- a/keras_hub/src/models/vae/vae_backbone_test.py +++ b/keras_hub/src/models/vae/vae_backbone_test.py @@ -33,3 +33,13 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.input_data, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=VAEBackbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + comparison_mode="statistical", + output_thresholds={"*": {"max": 2e-3, "mean": 2e-4}}, + ) diff --git a/keras_hub/src/models/vgg/vgg_image_classifier_test.py b/keras_hub/src/models/vgg/vgg_image_classifier_test.py index 16c3fa4453..1f694dbd89 100644 --- a/keras_hub/src/models/vgg/vgg_image_classifier_test.py +++ b/keras_hub/src/models/vgg/vgg_image_classifier_test.py @@ -52,6 +52,14 @@ def test_saved_model(self): input_data=self.images, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=VGGImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) + @pytest.mark.extra_large def test_all_presets(self): # we need at least 32x32 image resolution here to satisfy the presets' diff --git a/keras_hub/src/models/vit/vit_image_classifier_test.py b/keras_hub/src/models/vit/vit_image_classifier_test.py index 1734642bd6..8dfd7a34e2 100644 --- a/keras_hub/src/models/vit/vit_image_classifier_test.py +++ b/keras_hub/src/models/vit/vit_image_classifier_test.py @@ -55,3 +55,11 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.images, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=ViTImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) diff --git a/keras_hub/src/models/vit_det/vit_det_backbone_test.py b/keras_hub/src/models/vit_det/vit_det_backbone_test.py index c8e80c0de0..f55a68fc14 100644 --- a/keras_hub/src/models/vit_det/vit_det_backbone_test.py +++ b/keras_hub/src/models/vit_det/vit_det_backbone_test.py @@ -37,3 +37,11 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.input_data, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=ViTDetBackbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) diff --git a/keras_hub/src/models/whisper/whisper_backbone_test.py b/keras_hub/src/models/whisper/whisper_backbone_test.py index 19c129db00..197c62a0ba 100644 --- a/keras_hub/src/models/whisper/whisper_backbone_test.py +++ b/keras_hub/src/models/whisper/whisper_backbone_test.py @@ -60,6 +60,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=WhisperBackbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_smallest_preset(self): self.run_preset_test( diff --git a/keras_hub/src/models/xception/xception_image_classifier_test.py b/keras_hub/src/models/xception/xception_image_classifier_test.py index c042ecf2d7..a20308fb8a 100644 --- a/keras_hub/src/models/xception/xception_image_classifier_test.py +++ b/keras_hub/src/models/xception/xception_image_classifier_test.py @@ -74,6 +74,14 @@ def test_saved_model(self): input_data=self.images, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=XceptionImageClassifier, + init_kwargs=self.init_kwargs, + input_data=self.images, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in XceptionImageClassifier.presets: diff --git a/keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier_test.py b/keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier_test.py index 386d807917..d56f144f0e 100644 --- a/keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier_test.py +++ b/keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier_test.py @@ -64,6 +64,14 @@ def test_saved_model(self): input_data=self.input_data, ) + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=XLMRobertaTextClassifier, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) + @pytest.mark.extra_large def test_all_presets(self): for preset in XLMRobertaTextClassifier.presets: diff --git a/keras_hub/src/models/xlnet/xlnet_backbone_test.py b/keras_hub/src/models/xlnet/xlnet_backbone_test.py index a24ebc38b0..ce4ffad13e 100644 --- a/keras_hub/src/models/xlnet/xlnet_backbone_test.py +++ b/keras_hub/src/models/xlnet/xlnet_backbone_test.py @@ -35,3 +35,11 @@ def test_saved_model(self): init_kwargs=self.init_kwargs, input_data=self.input_data, ) + + @pytest.mark.large + def test_litert_export(self): + self.run_litert_export_test( + cls=XLNetBackbone, + init_kwargs=self.init_kwargs, + input_data=self.input_data, + ) diff --git a/keras_hub/src/tests/test_case.py b/keras_hub/src/tests/test_case.py index 633f32cd5b..0e40486196 100644 --- a/keras_hub/src/tests/test_case.py +++ b/keras_hub/src/tests/test_case.py @@ -1,7 +1,9 @@ +import gc import json import os import pathlib import re +import tempfile import keras import numpy as np @@ -433,6 +435,387 @@ def run_model_saving_test( restored_output = restored_model(input_data) self.assertAllClose(model_output, restored_output, atol=atol, rtol=rtol) + def _verify_litert_outputs( + self, + keras_output, + litert_output, + sig_outputs, + expected_output_shape=None, + verify_numerics=True, + comparison_mode="strict", + output_thresholds=None, + ): + """Verify LiteRT outputs against expected shape and Keras outputs. + + Args: + keras_output: Keras model output (can be None if not verifying + numerics) + litert_output: LiteRT interpreter output + sig_outputs: Output names from SignatureDef + expected_output_shape: Expected output shape (optional) + verify_numerics: Whether to verify numerical correctness + comparison_mode: "strict" or "statistical" + output_thresholds: Thresholds for statistical comparison + """ + # Handle single output case: if Keras has single output but LiteRT + # returns dict + if ( + not isinstance(keras_output, dict) + and isinstance(litert_output, dict) + and len(litert_output) == 1 + ): + litert_output = list(litert_output.values())[0] + + # Verify output shape if specified + if expected_output_shape is not None: + self.assertEqual(litert_output.shape, expected_output_shape) + + # Verify numerical correctness if requested + if verify_numerics: + self._verify_litert_numerics( + keras_output, + litert_output, + sig_outputs, + output_thresholds, + comparison_mode, + ) + + def _verify_litert_numerics( + self, + keras_output, + litert_output, + sig_outputs, + output_thresholds, + comparison_mode, + ): + """Verify numerical accuracy between Keras and LiteRT outputs. + + This method compares outputs using the SignatureDef output names to + match Keras outputs with LiteRT outputs properly. + + Args: + keras_output: Keras model output (tensor or dict) + litert_output: LiteRT interpreter output (tensor or dict) + sig_outputs: List of output names from SignatureDef + output_thresholds: Dict of thresholds for comparison + comparison_mode: "strict" or "statistical" + """ + if isinstance(keras_output, dict) and isinstance(litert_output, dict): + # Both outputs are dicts - compare using SignatureDef output names + for output_name in sig_outputs: + if output_name not in keras_output: + self.fail( + f"SignatureDef output '{output_name}' not found in " + f"Keras outputs.\n" + f"Keras keys: {list(keras_output.keys())}" + ) + if output_name not in litert_output: + self.fail( + f"SignatureDef output '{output_name}' not found in " + f"LiteRT outputs.\n" + f"LiteRT keys: {list(litert_output.keys())}" + ) + + keras_val_np = ops.convert_to_numpy(keras_output[output_name]) + litert_val = litert_output[output_name] + output_threshold = output_thresholds.get( + output_name, + output_thresholds.get("*", {"max": 10.0, "mean": 0.1}), + ) + self._compare_outputs( + keras_val_np, + litert_val, + comparison_mode, + output_name, + output_threshold["max"], + output_threshold["mean"], + ) + elif not isinstance(keras_output, dict) and not isinstance( + litert_output, dict + ): + # Both outputs are single tensors - direct comparison + keras_output_np = ops.convert_to_numpy(keras_output) + output_threshold = output_thresholds.get( + "*", {"max": 1e-2, "mean": 1e-3} + ) + self._compare_outputs( + keras_output_np, + litert_output, + comparison_mode, + key=None, + max_threshold=output_threshold["max"], + mean_threshold=output_threshold["mean"], + ) + else: + keras_type = type(keras_output).__name__ + litert_type = type(litert_output).__name__ + self.fail( + f"Output structure mismatch: Keras returns " + f"{keras_type}, LiteRT returns {litert_type}" + ) + + def run_litert_export_test( + self, + cls=None, + init_kwargs=None, + input_data=None, + expected_output_shape=None, + model=None, + verify_numerics=True, + # No LiteRT output in model saving test; remove undefined return + output_thresholds=None, + **export_kwargs, + ): + """Export model to LiteRT format and verify outputs. + + Args: + cls: Model class to test (optional if model is provided) + init_kwargs: Initialization arguments for the model (optional + if model is provided) + input_data: Input data to test with (dict or tensor) + expected_output_shape: Expected output shape from LiteRT inference + model: Pre-created model instance (optional, if provided cls and + init_kwargs are ignored) + verify_numerics: Whether to verify numerical correctness + between Keras and LiteRT outputs. Set to False for preset + models with load_weights=False where outputs are random. + comparison_mode: "strict" (default) or "statistical". + - "strict": All elements must be within default tolerances + (1e-6) + - "statistical": Check mean/max absolute differences against + provided thresholds + output_thresholds: Dict mapping output names to threshold dicts + with "max" and "mean" keys. Use "*" as wildcard for defaults. + Example: {"output1": {"max": 1e-4, "mean": 1e-5}, + "*": {"max": 1e-3, "mean": 1e-4}} + **export_kwargs: Additional keyword arguments to pass to + model.export(), such as allow_custom_ops=True or + enable_select_tf_ops=True. + """ + # Extract comparison_mode from export_kwargs if provided + comparison_mode = export_kwargs.pop("comparison_mode", "strict") + if keras.backend.backend() != "tensorflow": + self.skipTest("LiteRT export only supports TensorFlow backend") + + try: + from ai_edge_litert.interpreter import Interpreter + except ImportError: + import tensorflow as tf + + Interpreter = tf.lite.Interpreter + + if output_thresholds is None: + output_thresholds = {"*": {"max": 10.0, "mean": 0.1}} + + if model is None: + if cls is None or init_kwargs is None: + raise ValueError( + "Either 'model' or 'cls' and 'init_kwargs' must be provided" + ) + model = cls(**init_kwargs) + _ = model(input_data) + + interpreter = None + try: + with tempfile.TemporaryDirectory() as temp_dir: + export_path = os.path.join(temp_dir, "model.tflite") + + # Step 1: Export model and get Keras output + model.export(export_path, format="litert", **export_kwargs) + self.assertTrue(os.path.exists(export_path)) + self.assertGreater(os.path.getsize(export_path), 0) + + keras_output = model(input_data) if verify_numerics else None + + # Step 2: Load interpreter and verify SignatureDef + interpreter = Interpreter(model_path=export_path) + signature_defs = interpreter.get_signature_list() + self.assertIn( + "serving_default", + signature_defs, + "Missing serving_default signature", + ) + + serving_sig = signature_defs["serving_default"] + sig_inputs = serving_sig.get("inputs", []) + sig_outputs = serving_sig.get("outputs", []) + + self.assertGreater( + len(sig_inputs), + 0, + "Should have at least one input in SignatureDef", + ) + self.assertGreater( + len(sig_outputs), + 0, + "Should have at least one output in SignatureDef", + ) + + # Verify input signature + if isinstance(input_data, dict): + expected_inputs = set(input_data.keys()) + actual_inputs = set(sig_inputs) + # Check that all expected inputs are in the signature + # (allow signature to have additional optional inputs) + missing_inputs = expected_inputs - actual_inputs + if missing_inputs: + self.fail( + f"Missing inputs in SignatureDef: " + f"{sorted(missing_inputs)}. " + f"Expected: {sorted(expected_inputs)}, " + f"SignatureDef has: {sorted(actual_inputs)}" + ) + else: + # For numpy arrays, just verify we have exactly one input + # (since we're passing a single tensor) + if len(sig_inputs) != 1: + self.fail( + "Expected 1 input for numpy array input_data, " + f"but SignatureDef has {len(sig_inputs)}: " + f"{sig_inputs}" + ) + + # Verify output signature + if verify_numerics and isinstance(keras_output, dict): + expected_outputs = set(keras_output.keys()) + actual_outputs = set(sig_outputs) + if expected_outputs != actual_outputs: + self.fail( + f"Output name mismatch: Expected " + f"{sorted(expected_outputs)}, " + f"but SignatureDef has {sorted(actual_outputs)}" + ) + + # Step 3: Run LiteRT inference + os.remove(export_path) + # Simple inference implementation + runner = interpreter.get_signature_runner("serving_default") + + # Convert input data dtypes to match TFLite expectations + def convert_for_tflite(x): + """Convert tensor/array to TFLite-compatible dtypes.""" + if hasattr(x, "dtype"): + if isinstance(x, np.ndarray): + if x.dtype == bool: + return x.astype(np.int32) + elif x.dtype == np.float64: + return x.astype(np.float32) + elif x.dtype == np.int64: + return x.astype(np.int32) + else: # TensorFlow tensor + if x.dtype == tf.bool: + return ops.cast(x, "int32").numpy() + elif x.dtype == tf.float64: + return ops.cast(x, "float32").numpy() + elif x.dtype == tf.int64: + return ops.cast(x, "int32").numpy() + else: + return x.numpy() if hasattr(x, "numpy") else x + elif hasattr(x, "numpy"): + return x.numpy() + return x + + if isinstance(input_data, dict): + converted_input_data = tree.map_structure( + convert_for_tflite, input_data + ) + litert_output = runner(**converted_input_data) + else: + # For single tensor inputs, get the input name + sig_inputs = serving_sig.get("inputs", []) + input_name = sig_inputs[ + 0 + ] # We verified len(sig_inputs) == 1 above + converted_input = convert_for_tflite(input_data) + litert_output = runner(**{input_name: converted_input}) + + # Step 4: Verify outputs + self._verify_litert_outputs( + keras_output, + litert_output, + sig_outputs, + expected_output_shape=expected_output_shape, + verify_numerics=verify_numerics, + comparison_mode=comparison_mode, + output_thresholds=output_thresholds, + ) + finally: + if interpreter is not None: + del interpreter + if model is not None and cls is not None: + del model + gc.collect() + + def _compare_outputs( + self, + keras_val, + litert_val, + comparison_mode, + key=None, + max_threshold=10.0, + mean_threshold=0.1, + ): + """Compare Keras and LiteRT outputs using specified comparison mode. + + Args: + keras_val: Keras model output (numpy array) + litert_val: LiteRT model output (numpy array) + comparison_mode: "strict" or "statistical" + key: Output key name for error messages (optional) + max_threshold: Maximum absolute difference threshold for statistical + mode + mean_threshold: Mean absolute difference threshold for statistical + mode + """ + key_msg = f" for output key '{key}'" if key else "" + + # Check if shapes are compatible for comparison + self.assertEqual( + keras_val.shape, + litert_val.shape, + f"Shape mismatch{key_msg}: Keras shape " + f"{keras_val.shape}, LiteRT shape {litert_val.shape}. " + "Numerical comparison cannot proceed due to incompatible shapes.", + ) + + if comparison_mode == "strict": + # Original strict element-wise comparison with default tolerances + self.assertAllClose( + keras_val, + litert_val, + atol=1e-6, + rtol=1e-6, + msg=f"Mismatch{key_msg}", + ) + elif comparison_mode == "statistical": + # Statistical comparison + + # Calculate element-wise absolute differences + abs_diff = np.abs(keras_val - litert_val) + + # Element-wise statistics + mean_abs_diff = np.mean(abs_diff) + max_abs_diff = np.max(abs_diff) + + # Assert reasonable bounds on statistical differences + self.assertLessEqual( + mean_abs_diff, + mean_threshold, + f"Mean absolute difference too high: {mean_abs_diff:.6e}" + f"{key_msg} (threshold: {mean_threshold})", + ) + self.assertLessEqual( + max_abs_diff, + max_threshold, + f"Max absolute difference too high: {max_abs_diff:.6e}" + f"{key_msg} (threshold: {max_threshold})", + ) + else: + raise ValueError( + f"Unknown comparison_mode: {comparison_mode}. Must be " + "'strict' or 'statistical'" + ) + def run_backbone_test( self, cls, diff --git a/requirements-common.txt b/requirements-common.txt index a258d1cd85..fd81373b22 100644 --- a/requirements-common.txt +++ b/requirements-common.txt @@ -20,3 +20,4 @@ safetensors pillow openvino transformers +ai-edge-litert \ No newline at end of file