diff --git a/keras_hub/src/models/albert/albert_text_classifier_test.py b/keras_hub/src/models/albert/albert_text_classifier_test.py
index 3d6413ff99..d9ab9c70d0 100644
--- a/keras_hub/src/models/albert/albert_text_classifier_test.py
+++ b/keras_hub/src/models/albert/albert_text_classifier_test.py
@@ -61,6 +61,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=AlbertTextClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in AlbertTextClassifier.presets:
diff --git a/keras_hub/src/models/bart/bart_seq_2_seq_lm_test.py b/keras_hub/src/models/bart/bart_seq_2_seq_lm_test.py
index f525908b67..983b71610f 100644
--- a/keras_hub/src/models/bart/bart_seq_2_seq_lm_test.py
+++ b/keras_hub/src/models/bart/bart_seq_2_seq_lm_test.py
@@ -149,6 +149,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=BartSeq2SeqLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in BartSeq2SeqLM.presets:
diff --git a/keras_hub/src/models/basnet/basnet_test.py b/keras_hub/src/models/basnet/basnet_test.py
index b5bbe405e2..7af901ffd8 100644
--- a/keras_hub/src/models/basnet/basnet_test.py
+++ b/keras_hub/src/models/basnet/basnet_test.py
@@ -3,6 +3,9 @@
 
 from keras_hub.src.models.basnet.basnet import BASNetImageSegmenter
 from keras_hub.src.models.basnet.basnet_backbone import BASNetBackbone
+from keras_hub.src.models.basnet.basnet_image_converter import (
+    BASNetImageConverter,
+)
 from keras_hub.src.models.basnet.basnet_preprocessor import BASNetPreprocessor
 from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone
 from keras_hub.src.tests.test_case import TestCase
@@ -26,7 +29,9 @@ def setUp(self):
             image_encoder=self.image_encoder,
             num_classes=1,
         )
-        self.preprocessor = BASNetPreprocessor()
+        self.preprocessor = BASNetPreprocessor(
+            image_converter=BASNetImageConverter(height=64, width=64)
+        )
         self.init_kwargs = {
             "backbone": self.backbone,
             "preprocessor": self.preprocessor,
@@ -49,6 +54,14 @@ def test_saved_model(self):
             input_data=self.images,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=BASNetImageSegmenter,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
+
     def test_end_to_end_model_predict(self):
         model = BASNetImageSegmenter(**self.init_kwargs)
         output = model.predict(self.images)
diff --git a/keras_hub/src/models/bert/bert_text_classifier_test.py b/keras_hub/src/models/bert/bert_text_classifier_test.py
index 606be7c839..2aacfa53d6 100644
--- a/keras_hub/src/models/bert/bert_text_classifier_test.py
+++ b/keras_hub/src/models/bert/bert_text_classifier_test.py
@@ -53,6 +53,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=BertTextClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_smallest_preset(self):
         self.run_preset_test(
diff --git a/keras_hub/src/models/bloom/bloom_causal_lm_test.py b/keras_hub/src/models/bloom/bloom_causal_lm_test.py
index ada3d8eeb1..c6fc6de3e9 100644
--- a/keras_hub/src/models/bloom/bloom_causal_lm_test.py
+++ b/keras_hub/src/models/bloom/bloom_causal_lm_test.py
@@ -164,6 +164,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=BloomCausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in BloomCausalLM.presets:
diff --git a/keras_hub/src/models/clip/clip_backbone_test.py b/keras_hub/src/models/clip/clip_backbone_test.py
index d24b8cc553..8e0d224699 100644
--- a/keras_hub/src/models/clip/clip_backbone_test.py
+++ b/keras_hub/src/models/clip/clip_backbone_test.py
@@ -44,6 +44,16 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=CLIPBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-4, "mean": 1e-5}},
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in CLIPBackbone.presets:
diff --git a/keras_hub/src/models/cspnet/cspnet_image_classifier_test.py b/keras_hub/src/models/cspnet/cspnet_image_classifier_test.py
index 6a5ee517e1..016d6ad478 100644
--- a/keras_hub/src/models/cspnet/cspnet_image_classifier_test.py
+++ b/keras_hub/src/models/cspnet/cspnet_image_classifier_test.py
@@ -76,3 +76,11 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.images,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=CSPNetImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
diff --git a/keras_hub/src/models/d_fine/d_fine_object_detector_test.py b/keras_hub/src/models/d_fine/d_fine_object_detector_test.py
index 3b3bfe14c0..414701cd0b 100644
--- a/keras_hub/src/models/d_fine/d_fine_object_detector_test.py
+++ b/keras_hub/src/models/d_fine/d_fine_object_detector_test.py
@@ -152,3 +152,30 @@ def test_saved_model(self):
             init_kwargs=init_kwargs,
             input_data=self.images,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        backbone = DFineBackbone(**self.base_backbone_kwargs)
+        init_kwargs = {
+            "backbone": backbone,
+            "num_classes": 4,
+            "bounding_box_format": self.bounding_box_format,
+            "preprocessor": self.preprocessor,
+        }
+
+        # D-Fine ObjectDetector only takes images as input
+        input_data = self.images
+
+        self.run_litert_export_test(
+            cls=DFineObjectDetector,
+            init_kwargs=init_kwargs,
+            input_data=input_data,
+            comparison_mode="statistical",
+            output_thresholds={
+                "intermediate_predicted_corners": {"max": 5.0, "mean": 0.05},
+                "intermediate_logits": {"max": 5.0, "mean": 0.1},
+                "enc_topk_logits": {"max": 5.0, "mean": 0.03},
+                "logits": {"max": 2.0, "mean": 0.03},
+                "*": {"max": 1.0, "mean": 0.03},
+            },
+        )
diff --git a/keras_hub/src/models/deberta_v3/deberta_v3_text_classifier_test.py b/keras_hub/src/models/deberta_v3/deberta_v3_text_classifier_test.py
index 11f3d139ee..3f443ae366 100644
--- a/keras_hub/src/models/deberta_v3/deberta_v3_text_classifier_test.py
+++ b/keras_hub/src/models/deberta_v3/deberta_v3_text_classifier_test.py
@@ -64,6 +64,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=DebertaV3TextClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in DebertaV3TextClassifier.presets:
diff --git a/keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter_test.py b/keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter_test.py
index 065bed3caa..0062f7b671 100644
--- a/keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter_test.py
+++ b/keras_hub/src/models/deeplab_v3/deeplab_v3_segmenter_test.py
@@ -70,3 +70,15 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.images,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=DeepLabV3ImageSegmenter,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+            comparison_mode="statistical",
+            output_thresholds={
+                "*": {"max": 0.6, "mean": 0.3},
+            },
+        )
diff --git a/keras_hub/src/models/deit/deit_image_classifier_test.py b/keras_hub/src/models/deit/deit_image_classifier_test.py
index d64a956cdc..5c784ccf19 100644
--- a/keras_hub/src/models/deit/deit_image_classifier_test.py
+++ b/keras_hub/src/models/deit/deit_image_classifier_test.py
@@ -55,3 +55,11 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.images,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=DeiTImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
diff --git a/keras_hub/src/models/densenet/densenet_image_classifier_test.py b/keras_hub/src/models/densenet/densenet_image_classifier_test.py
index 481005ba7e..18d622d79c 100644
--- a/keras_hub/src/models/densenet/densenet_image_classifier_test.py
+++ b/keras_hub/src/models/densenet/densenet_image_classifier_test.py
@@ -61,6 +61,14 @@ def test_saved_model(self):
             input_data=self.images,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=DenseNetImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in DenseNetImageClassifier.presets:
diff --git a/keras_hub/src/models/depth_anything/depth_anything_depth_estimator_test.py b/keras_hub/src/models/depth_anything/depth_anything_depth_estimator_test.py
index 6277078488..5fedad8131 100644
--- a/keras_hub/src/models/depth_anything/depth_anything_depth_estimator_test.py
+++ b/keras_hub/src/models/depth_anything/depth_anything_depth_estimator_test.py
@@ -85,6 +85,16 @@ def test_saved_model(self):
             input_data=self.images,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=DepthAnythingDepthEstimator,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+            comparison_mode="statistical",
+            output_thresholds={"depths": {"max": 2e-4, "mean": 1e-5}},
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         images = np.ones((2, 518, 518, 3), dtype="float32")
diff --git a/keras_hub/src/models/dinov2/dinov2_backbone_test.py b/keras_hub/src/models/dinov2/dinov2_backbone_test.py
index ca4edcafc0..a14626a024 100644
--- a/keras_hub/src/models/dinov2/dinov2_backbone_test.py
+++ b/keras_hub/src/models/dinov2/dinov2_backbone_test.py
@@ -49,6 +49,16 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=DINOV2Backbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-4, "mean": 1e-5}},
+        )
+
     @pytest.mark.large
     def test_position_embedding_interpolation(self):
         model = DINOV2Backbone(**self.init_kwargs)
@@ -144,6 +154,16 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=DINOV2Backbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-4, "mean": 1e-5}},
+        )
+
     @pytest.mark.kaggle_key_required
     @pytest.mark.extra_large
     def test_smallest_preset(self):
diff --git a/keras_hub/src/models/dinov3/dinov3_backbone_test.py b/keras_hub/src/models/dinov3/dinov3_backbone_test.py
index b8fdd9a0c6..c899e932ce 100644
--- a/keras_hub/src/models/dinov3/dinov3_backbone_test.py
+++ b/keras_hub/src/models/dinov3/dinov3_backbone_test.py
@@ -52,6 +52,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=DINOV3Backbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.large
     def test_position_embedding_interpolation(self):
         model = DINOV3Backbone(**self.init_kwargs)
diff --git a/keras_hub/src/models/distil_bert/distil_bert_text_classifier_test.py b/keras_hub/src/models/distil_bert/distil_bert_text_classifier_test.py
index 71fdfc52b4..db57d21d0e 100644
--- a/keras_hub/src/models/distil_bert/distil_bert_text_classifier_test.py
+++ b/keras_hub/src/models/distil_bert/distil_bert_text_classifier_test.py
@@ -59,6 +59,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=DistilBertTextClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in DistilBertTextClassifier.presets:
diff --git a/keras_hub/src/models/efficientnet/efficientnet_image_classifier_test.py b/keras_hub/src/models/efficientnet/efficientnet_image_classifier_test.py
index d2b5717b68..18f13e5505 100644
--- a/keras_hub/src/models/efficientnet/efficientnet_image_classifier_test.py
+++ b/keras_hub/src/models/efficientnet/efficientnet_image_classifier_test.py
@@ -7,6 +7,12 @@
 from keras_hub.src.models.efficientnet.efficientnet_image_classifier import (
     EfficientNetImageClassifier,
 )
+from keras_hub.src.models.efficientnet.efficientnet_image_classifier_preprocessor import (  # noqa: E501
+    EfficientNetImageClassifierPreprocessor,
+)
+from keras_hub.src.models.efficientnet.efficientnet_image_converter import (
+    EfficientNetImageConverter,
+)
 from keras_hub.src.tests.test_case import TestCase
 
 
@@ -38,6 +44,9 @@ def setUp(self):
         self.init_kwargs = {
             "backbone": backbone,
             "num_classes": 1000,
+            "preprocessor": EfficientNetImageClassifierPreprocessor(
+                image_converter=EfficientNetImageConverter(image_size=(16, 16))
+            ),
         }
         self.train_data = (self.images, self.labels)
 
@@ -82,3 +91,11 @@ def test_all_presets(self):
                 input_data=self.images,
                 expected_output_shape=(2, 2),
             )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=EfficientNetImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
diff --git a/keras_hub/src/models/electra/electra_backbone_test.py b/keras_hub/src/models/electra/electra_backbone_test.py
index de6416bd7e..a0240c5ed0 100644
--- a/keras_hub/src/models/electra/electra_backbone_test.py
+++ b/keras_hub/src/models/electra/electra_backbone_test.py
@@ -41,6 +41,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=ElectraBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_smallest_preset(self):
         self.run_preset_test(
diff --git a/keras_hub/src/models/esm/esm_classifier_test.py b/keras_hub/src/models/esm/esm_classifier_test.py
index 8eeec2b40d..58103a448e 100644
--- a/keras_hub/src/models/esm/esm_classifier_test.py
+++ b/keras_hub/src/models/esm/esm_classifier_test.py
@@ -51,3 +51,11 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=ESMProteinClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
diff --git a/keras_hub/src/models/f_net/f_net_text_classifier_test.py b/keras_hub/src/models/f_net/f_net_text_classifier_test.py
index 4658e795f6..ceaae7944c 100644
--- a/keras_hub/src/models/f_net/f_net_text_classifier_test.py
+++ b/keras_hub/src/models/f_net/f_net_text_classifier_test.py
@@ -57,6 +57,20 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        # F-Net does NOT use padding_mask - it only uses token_ids and
+        # segment_ids. Don't add padding_mask to input_data.
+        self.run_litert_export_test(
+            cls=FNetTextClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            comparison_mode="statistical",
+            output_thresholds={
+                "*": {"max": 0.01, "mean": 0.005},
+            },
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in FNetTextClassifier.presets:
diff --git a/keras_hub/src/models/falcon/falcon_causal_lm_test.py b/keras_hub/src/models/falcon/falcon_causal_lm_test.py
index 393f8a8e97..c8b699b818 100644
--- a/keras_hub/src/models/falcon/falcon_causal_lm_test.py
+++ b/keras_hub/src/models/falcon/falcon_causal_lm_test.py
@@ -164,6 +164,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=FalconCausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in FalconCausalLM.presets:
diff --git a/keras_hub/src/models/flux/flux_backbone_test.py b/keras_hub/src/models/flux/flux_backbone_test.py
index 5a15e3b7f0..322bfd81a3 100644
--- a/keras_hub/src/models/flux/flux_backbone_test.py
+++ b/keras_hub/src/models/flux/flux_backbone_test.py
@@ -71,3 +71,13 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=FluxBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-4, "mean": 1e-5}},
+        )
diff --git a/keras_hub/src/models/gemma/gemma_causal_lm_test.py b/keras_hub/src/models/gemma/gemma_causal_lm_test.py
index 0581d42ffe..98c696c82f 100644
--- a/keras_hub/src/models/gemma/gemma_causal_lm_test.py
+++ b/keras_hub/src/models/gemma/gemma_causal_lm_test.py
@@ -201,6 +201,32 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        """Test LiteRT export for GemmaCausalLM with small test model."""
+        model = GemmaCausalLM(**self.init_kwargs)
+
+        # Convert boolean padding_mask to int32 for LiteRT compatibility
+        input_data = self.input_data.copy()
+        if "padding_mask" in input_data:
+            input_data["padding_mask"] = ops.cast(
+                input_data["padding_mask"], "int32"
+            )
+
+        expected_output_shape = (
+            2,
+            8,
+            self.preprocessor.tokenizer.vocabulary_size(),
+        )
+
+        self.run_litert_export_test(
+            model=model,
+            input_data=input_data,
+            expected_output_shape=expected_output_shape,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}},
+        )
+
     @pytest.mark.kaggle_key_required
     @pytest.mark.extra_large
     def test_all_presets(self):
diff --git a/keras_hub/src/models/gemma3/gemma3_causal_lm_test.py b/keras_hub/src/models/gemma3/gemma3_causal_lm_test.py
index ad37403752..0054cdb6b9 100644
--- a/keras_hub/src/models/gemma3/gemma3_causal_lm_test.py
+++ b/keras_hub/src/models/gemma3/gemma3_causal_lm_test.py
@@ -226,6 +226,63 @@ def test_saved_model(self, modality_type):
             input_data=input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        """Test LiteRT export for Gemma3CausalLM with small test model."""
+        # Use the small text-only model for fast testing
+        model = Gemma3CausalLM(**self.text_init_kwargs)
+
+        # Test with text input data
+        input_data = self.text_input_data.copy()
+        # Convert boolean padding_mask to int32 for LiteRT compatibility
+        if "padding_mask" in input_data:
+            input_data["padding_mask"] = ops.cast(
+                input_data["padding_mask"], "int32"
+            )
+
+        expected_output_shape = (
+            2,
+            20,
+            self.text_preprocessor.tokenizer.vocabulary_size(),
+        )
+
+        self.run_litert_export_test(
+            model=model,
+            input_data=input_data,
+            expected_output_shape=expected_output_shape,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-2, "mean": 1e-4}},
+        )
+
+    @pytest.mark.large
+    def test_litert_export_multimodal(self):
+        """Test LiteRT export for multimodal Gemma3CausalLM with small test
+        model."""
+        # Use the small multimodal model for testing
+        model = Gemma3CausalLM(**self.init_kwargs)
+
+        # Test with multimodal input data
+        input_data = self.input_data.copy()
+        # Convert boolean padding_mask to int32 for LiteRT compatibility
+        if "padding_mask" in input_data:
+            input_data["padding_mask"] = ops.cast(
+                input_data["padding_mask"], "int32"
+            )
+
+        expected_output_shape = (
+            2,
+            20,
+            self.preprocessor.tokenizer.vocabulary_size(),
+        )
+
+        self.run_litert_export_test(
+            model=model,
+            input_data=input_data,
+            expected_output_shape=expected_output_shape,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-2, "mean": 1e-4}},
+        )
+
     @pytest.mark.kaggle_key_required
     @pytest.mark.extra_large
     def test_all_presets(self):
diff --git a/keras_hub/src/models/gpt2/gpt2_causal_lm_test.py b/keras_hub/src/models/gpt2/gpt2_causal_lm_test.py
index ec3d829047..231ba1f8ee 100644
--- a/keras_hub/src/models/gpt2/gpt2_causal_lm_test.py
+++ b/keras_hub/src/models/gpt2/gpt2_causal_lm_test.py
@@ -107,6 +107,32 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        """Test LiteRT export for GPT2CausalLM with small test model."""
+        model = GPT2CausalLM(**self.init_kwargs)
+
+        # Convert boolean padding_mask to int32 for LiteRT compatibility
+        input_data = self.input_data.copy()
+        if "padding_mask" in input_data:
+            input_data["padding_mask"] = ops.cast(
+                input_data["padding_mask"], "int32"
+            )
+
+        expected_output_shape = (
+            2,
+            8,
+            self.preprocessor.tokenizer.vocabulary_size(),
+        )
+
+        self.run_litert_export_test(
+            model=model,
+            input_data=input_data,
+            expected_output_shape=expected_output_shape,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}},
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in GPT2CausalLM.presets:
diff --git a/keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_test.py b/keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_test.py
index f66c748b9e..4ba327886a 100644
--- a/keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_test.py
+++ b/keras_hub/src/models/gpt_neo_x/gpt_neo_x_causal_lm_test.py
@@ -105,3 +105,15 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=GPTNeoXCausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            output_thresholds={
+                "max": 1e-3,
+                "mean": 1e-4,
+            },  # More lenient thresholds for numerical differences
+        )
diff --git a/keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_test.py b/keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_test.py
index f294a23b72..8eb16b3cad 100644
--- a/keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_test.py
+++ b/keras_hub/src/models/hgnetv2/hgnetv2_image_classifier_test.py
@@ -89,3 +89,11 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.images,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=HGNetV2ImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
diff --git a/keras_hub/src/models/llama/llama_causal_lm_test.py b/keras_hub/src/models/llama/llama_causal_lm_test.py
index 1ff5a3a987..681ae1da83 100644
--- a/keras_hub/src/models/llama/llama_causal_lm_test.py
+++ b/keras_hub/src/models/llama/llama_causal_lm_test.py
@@ -106,6 +106,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=LlamaCausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in LlamaCausalLM.presets:
diff --git a/keras_hub/src/models/llama3/llama3_causal_lm_test.py b/keras_hub/src/models/llama3/llama3_causal_lm_test.py
index a054b8ae14..6c6386d740 100644
--- a/keras_hub/src/models/llama3/llama3_causal_lm_test.py
+++ b/keras_hub/src/models/llama3/llama3_causal_lm_test.py
@@ -114,6 +114,32 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        """Test LiteRT export for Llama3CausalLM with small test model."""
+        model = Llama3CausalLM(**self.init_kwargs)
+
+        # Convert boolean padding_mask to int32 for LiteRT compatibility
+        input_data = self.input_data.copy()
+        if "padding_mask" in input_data:
+            input_data["padding_mask"] = ops.cast(
+                input_data["padding_mask"], "int32"
+            )
+
+        expected_output_shape = (
+            2,
+            7,
+            self.preprocessor.tokenizer.vocabulary_size(),
+        )
+
+        self.run_litert_export_test(
+            model=model,
+            input_data=input_data,
+            expected_output_shape=expected_output_shape,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}},
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in Llama3CausalLM.presets:
diff --git a/keras_hub/src/models/mistral/mistral_causal_lm_test.py b/keras_hub/src/models/mistral/mistral_causal_lm_test.py
index c682c2866d..f5d9c82ff5 100644
--- a/keras_hub/src/models/mistral/mistral_causal_lm_test.py
+++ b/keras_hub/src/models/mistral/mistral_causal_lm_test.py
@@ -106,6 +106,32 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        """Test LiteRT export for MistralCausalLM with small test model."""
+        model = MistralCausalLM(**self.init_kwargs)
+
+        # Convert boolean padding_mask to int32 for LiteRT compatibility
+        input_data = self.input_data.copy()
+        if "padding_mask" in input_data:
+            input_data["padding_mask"] = ops.cast(
+                input_data["padding_mask"], "int32"
+            )
+
+        expected_output_shape = (
+            2,
+            8,
+            self.preprocessor.tokenizer.vocabulary_size(),
+        )
+
+        self.run_litert_export_test(
+            model=model,
+            input_data=input_data,
+            expected_output_shape=expected_output_shape,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}},
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in MistralCausalLM.presets:
diff --git a/keras_hub/src/models/mit/mit_image_classifier_test.py b/keras_hub/src/models/mit/mit_image_classifier_test.py
index c63a456311..4203ccda42 100644
--- a/keras_hub/src/models/mit/mit_image_classifier_test.py
+++ b/keras_hub/src/models/mit/mit_image_classifier_test.py
@@ -50,3 +50,11 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.images,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=MiTImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
diff --git a/keras_hub/src/models/mixtral/mixtral_causal_lm_test.py b/keras_hub/src/models/mixtral/mixtral_causal_lm_test.py
index a711a06b0e..6417c068a2 100644
--- a/keras_hub/src/models/mixtral/mixtral_causal_lm_test.py
+++ b/keras_hub/src/models/mixtral/mixtral_causal_lm_test.py
@@ -107,6 +107,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=MixtralCausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in MixtralCausalLM.presets:
diff --git a/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py b/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py
index c996122fa5..27e41bcff9 100644
--- a/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py
+++ b/keras_hub/src/models/mobilenet/mobilenet_image_classifier_test.py
@@ -101,3 +101,11 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.images,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=MobileNetImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
diff --git a/keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_test.py b/keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_test.py
index 219cb6f285..2229295159 100644
--- a/keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_test.py
+++ b/keras_hub/src/models/mobilenetv5/mobilenetv5_image_classifier_test.py
@@ -74,3 +74,13 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.images,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=MobileNetV5ImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-4, "mean": 1e-5}},
+        )
diff --git a/keras_hub/src/models/moonshine/moonshine_audio_to_text_test.py b/keras_hub/src/models/moonshine/moonshine_audio_to_text_test.py
index 5d0a7dbe7a..8b1d9bc8c7 100644
--- a/keras_hub/src/models/moonshine/moonshine_audio_to_text_test.py
+++ b/keras_hub/src/models/moonshine/moonshine_audio_to_text_test.py
@@ -145,6 +145,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=MoonshineAudioToText,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in MoonshineAudioToText.presets:
diff --git a/keras_hub/src/models/opt/opt_causal_lm_test.py b/keras_hub/src/models/opt/opt_causal_lm_test.py
index 138c5a5180..6a9aa12262 100644
--- a/keras_hub/src/models/opt/opt_causal_lm_test.py
+++ b/keras_hub/src/models/opt/opt_causal_lm_test.py
@@ -105,6 +105,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=OPTCausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in OPTCausalLM.presets:
diff --git a/keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_test.py b/keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_test.py
index 1f53cdef04..108b150b50 100644
--- a/keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_test.py
+++ b/keras_hub/src/models/pali_gemma/pali_gemma_causal_lm_test.py
@@ -106,6 +106,35 @@ def test_saved_model(self):
             input_data=input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        input_data = {
+            "token_ids": np.random.randint(
+                0,
+                self.vocabulary_size,
+                size=(self.batch_size, self.text_sequence_length),
+                dtype="int32",
+            ),
+            "images": np.ones(
+                (self.batch_size, self.image_size, self.image_size, 3)
+            ),
+            "padding_mask": np.ones(
+                (self.batch_size, self.text_sequence_length),
+                dtype="int32",
+            ),
+            "response_mask": np.zeros(
+                (self.batch_size, self.text_sequence_length),
+                dtype="int32",
+            ),
+        }
+        self.run_litert_export_test(
+            cls=PaliGemmaCausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=input_data,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 2e-6, "mean": 1e-6}},
+        )
+
     def test_pali_gemma_causal_model(self):
         preprocessed, _, _ = self.preprocessor(
             {
diff --git a/keras_hub/src/models/parseq/parseq_causal_lm_test.py b/keras_hub/src/models/parseq/parseq_causal_lm_test.py
index 177c596521..cc23a37666 100644
--- a/keras_hub/src/models/parseq/parseq_causal_lm_test.py
+++ b/keras_hub/src/models/parseq/parseq_causal_lm_test.py
@@ -101,3 +101,30 @@ def test_causal_lm_basics(self):
             train_data=self.train_data,
             expected_output_shape=expected_shape_full,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        # Create input data for export test
+        input_data = {
+            "images": np.random.randn(
+                self.batch_size,
+                self.image_height,
+                self.image_width,
+                self.num_channels,
+            ),
+            "token_ids": np.random.randint(
+                0,
+                self.vocabulary_size,
+                (self.batch_size, self.max_label_length),
+            ),
+            "padding_mask": np.ones(
+                (self.batch_size, self.max_label_length), dtype="int32"
+            ),
+        }
+        self.run_litert_export_test(
+            cls=PARSeqCausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=input_data,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-3, "mean": 1e-4}},
+        )
diff --git a/keras_hub/src/models/phi3/phi3_causal_lm_test.py b/keras_hub/src/models/phi3/phi3_causal_lm_test.py
index 564d9d4785..0b579d5d27 100644
--- a/keras_hub/src/models/phi3/phi3_causal_lm_test.py
+++ b/keras_hub/src/models/phi3/phi3_causal_lm_test.py
@@ -107,6 +107,32 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        """Test LiteRT export for Phi3CausalLM with small test model."""
+        model = Phi3CausalLM(**self.init_kwargs)
+
+        # Convert boolean padding_mask to int32 for LiteRT compatibility
+        input_data = self.input_data.copy()
+        if "padding_mask" in input_data:
+            input_data["padding_mask"] = ops.cast(
+                input_data["padding_mask"], "int32"
+            )
+
+        expected_output_shape = (
+            2,
+            12,
+            self.preprocessor.tokenizer.vocabulary_size(),
+        )
+
+        self.run_litert_export_test(
+            model=model,
+            input_data=input_data,
+            expected_output_shape=expected_output_shape,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}},
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in Phi3CausalLM.presets:
diff --git a/keras_hub/src/models/qwen/qwen_causal_lm_test.py b/keras_hub/src/models/qwen/qwen_causal_lm_test.py
index b1a715646e..081461e94f 100644
--- a/keras_hub/src/models/qwen/qwen_causal_lm_test.py
+++ b/keras_hub/src/models/qwen/qwen_causal_lm_test.py
@@ -113,6 +113,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=QwenCausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in QwenCausalLM.presets:
diff --git a/keras_hub/src/models/qwen3/qwen3_causal_lm_test.py b/keras_hub/src/models/qwen3/qwen3_causal_lm_test.py
index 5e0456b521..7903967e07 100644
--- a/keras_hub/src/models/qwen3/qwen3_causal_lm_test.py
+++ b/keras_hub/src/models/qwen3/qwen3_causal_lm_test.py
@@ -114,6 +114,32 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        """Test LiteRT export for Qwen3CausalLM with small test model."""
+        model = Qwen3CausalLM(**self.init_kwargs)
+
+        # Convert boolean padding_mask to int32 for LiteRT compatibility
+        input_data = self.input_data.copy()
+        if "padding_mask" in input_data:
+            input_data["padding_mask"] = ops.cast(
+                input_data["padding_mask"], "int32"
+            )
+
+        expected_output_shape = (
+            2,
+            7,
+            self.preprocessor.tokenizer.vocabulary_size(),
+        )
+
+        self.run_litert_export_test(
+            model=model,
+            input_data=input_data,
+            expected_output_shape=expected_output_shape,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 1e-3, "mean": 1e-5}},
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in Qwen3CausalLM.presets:
diff --git a/keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_test.py b/keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_test.py
index d342c1e165..f57279a69f 100644
--- a/keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_test.py
+++ b/keras_hub/src/models/qwen3_moe/qwen3_moe_causal_lm_test.py
@@ -120,6 +120,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=Qwen3MoeCausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in Qwen3MoeCausalLM.presets:
diff --git a/keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_test.py b/keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_test.py
index ad1b8c3113..9be89a4add 100644
--- a/keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_test.py
+++ b/keras_hub/src/models/qwen_moe/qwen_moe_causal_lm_test.py
@@ -139,6 +139,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=QwenMoeCausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in QwenMoeCausalLM.presets:
diff --git a/keras_hub/src/models/resnet/resnet_image_classifier_test.py b/keras_hub/src/models/resnet/resnet_image_classifier_test.py
index 9bc5897fee..d4dc77a97a 100644
--- a/keras_hub/src/models/resnet/resnet_image_classifier_test.py
+++ b/keras_hub/src/models/resnet/resnet_image_classifier_test.py
@@ -65,6 +65,21 @@ def test_saved_model(self):
             input_data=self.images,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        """Test LiteRT export for ResNetImageClassifier with small test
+        model."""
+        model = ResNetImageClassifier(**self.init_kwargs)
+        expected_output_shape = (2, 2)  # 2 images, 2 classes
+
+        self.run_litert_export_test(
+            model=model,
+            input_data=self.images,
+            expected_output_shape=expected_output_shape,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 5e-5, "mean": 1e-5}},
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in ResNetImageClassifier.presets:
diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector_test.py b/keras_hub/src/models/retinanet/retinanet_object_detector_test.py
index 5e01c802a5..b304fe3ada 100644
--- a/keras_hub/src/models/retinanet/retinanet_object_detector_test.py
+++ b/keras_hub/src/models/retinanet/retinanet_object_detector_test.py
@@ -60,7 +60,7 @@ def setUp(self):
         )
 
         image_converter = RetinaNetImageConverter(
-            bounding_box_format="yxyx", scale=1 / 255.0, image_size=(800, 800)
+            bounding_box_format="yxyx", scale=1 / 255.0, image_size=(512, 512)
         )
 
         preprocessor = RetinaNetObjectDetectorPreprocessor(
@@ -108,3 +108,19 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.images,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        input_data = self.images
+
+        self.run_litert_export_test(
+            cls=RetinaNetObjectDetector,
+            init_kwargs=self.init_kwargs,
+            input_data=input_data,
+            comparison_mode="statistical",
+            output_thresholds={
+                "enc_topk_logits": {"max": 5.0, "mean": 0.05},
+                "logits": {"max": 2.0, "mean": 0.05},
+                "*": {"max": 1.5, "mean": 0.05},
+            },
+        )
diff --git a/keras_hub/src/models/roberta/roberta_text_classifier_test.py b/keras_hub/src/models/roberta/roberta_text_classifier_test.py
index c5534a0dc4..adc3daa3ba 100644
--- a/keras_hub/src/models/roberta/roberta_text_classifier_test.py
+++ b/keras_hub/src/models/roberta/roberta_text_classifier_test.py
@@ -59,6 +59,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=RobertaTextClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in RobertaTextClassifier.presets:
diff --git a/keras_hub/src/models/roformer_v2/roformer_v2_text_classifier_test.py b/keras_hub/src/models/roformer_v2/roformer_v2_text_classifier_test.py
index b24395c574..22a038c538 100644
--- a/keras_hub/src/models/roformer_v2/roformer_v2_text_classifier_test.py
+++ b/keras_hub/src/models/roformer_v2/roformer_v2_text_classifier_test.py
@@ -1,3 +1,5 @@
+import pytest
+
 from keras_hub.src.models.roformer_v2 import (
     roformer_v2_text_classifier_preprocessor as r,
 )
@@ -50,3 +52,30 @@ def test_classifier_basics(self):
             train_data=self.train_data,
             expected_output_shape=(2, 2),
         )
+
+    @pytest.mark.large
+    def test_saved_model(self):
+        self.run_model_saving_test(
+            cls=RoformerV2TextClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=RoformerV2TextClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
+    @pytest.mark.extra_large
+    def test_all_presets(self):
+        for preset in RoformerV2TextClassifier.presets:
+            self.run_preset_test(
+                cls=RoformerV2TextClassifier,
+                preset=preset,
+                init_kwargs={"num_classes": 2},
+                input_data=self.input_data,
+                expected_output_shape=(2, 2),
+            )
diff --git a/keras_hub/src/models/sam/sam_image_segmenter_test.py b/keras_hub/src/models/sam/sam_image_segmenter_test.py
index 278b6a8749..63a248d6f4 100644
--- a/keras_hub/src/models/sam/sam_image_segmenter_test.py
+++ b/keras_hub/src/models/sam/sam_image_segmenter_test.py
@@ -22,6 +22,11 @@ def setUp(self):
             (self.batch_size, self.image_size, self.image_size, 3),
             dtype="float32",
         )
+        # Use more realistic SAM configuration for export testing
+        # Real SAM uses 64x64 embeddings for 1024x1024 images
+        # Scale down proportionally: 128/1024 = 1/8,
+        # so embeddings should be 64/8 = 8
+        # But keep it simple for testing
         self.image_encoder = ViTDetBackbone(
             hidden_size=16,
             num_layers=16,
@@ -70,9 +75,10 @@ def setUp(self):
             "points": np.ones((self.batch_size, 1, 2), dtype="float32"),
             "labels": np.ones((self.batch_size, 1), dtype="float32"),
             "boxes": np.ones((self.batch_size, 1, 2, 2), dtype="float32"),
-            "masks": np.zeros(
-                (self.batch_size, 0, self.image_size, self.image_size, 1)
-            ),
+            # For TFLite export, use 1 mask filled with
+            # zeros (interpreted as "no mask")
+            # Use the expected mask size of 4 * image_embedding_size = 32
+            "masks": np.zeros((self.batch_size, 1, 32, 32, 1), dtype="float32"),
         }
         self.labels = {
             "masks": np.ones((self.batch_size, 2), dtype="float32"),
@@ -124,3 +130,16 @@ def test_all_presets(self):
                     "iou_pred": [2],
                 },
             )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=SAMImageSegmenter,
+            init_kwargs=self.init_kwargs,
+            input_data=self.inputs,
+            comparison_mode="statistical",
+            output_thresholds={
+                "masks": {"max": 1e-3, "mean": 1e-4},
+                "iou_pred": {"max": 1e-3, "mean": 1e-4},
+            },
+        )
diff --git a/keras_hub/src/models/sam/sam_prompt_encoder.py b/keras_hub/src/models/sam/sam_prompt_encoder.py
index 12b77f4a7d..883903415c 100644
--- a/keras_hub/src/models/sam/sam_prompt_encoder.py
+++ b/keras_hub/src/models/sam/sam_prompt_encoder.py
@@ -292,7 +292,7 @@ def _maybe_input_mask_embed():
             )
 
         dense_embeddings = ops.cond(
-            ops.equal(ops.size(masks), 0),
+            ops.equal(ops.shape(masks)[1], 0),
             _no_mask_embed,
             _maybe_input_mask_embed,
         )
diff --git a/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py
index 136351e386..8227399b57 100644
--- a/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py
+++ b/keras_hub/src/models/segformer/segformer_image_segmenter_tests.py
@@ -72,3 +72,13 @@ def test_saved_model(self):
             init_kwargs={**self.init_kwargs},
             input_data=self.input_data,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=SegFormerImageSegmenter,
+            init_kwargs={**self.init_kwargs},
+            input_data=self.input_data,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 10.0, "mean": 2.0}},
+        )
diff --git a/keras_hub/src/models/siglip/siglip_backbone_test.py b/keras_hub/src/models/siglip/siglip_backbone_test.py
index ef3ddd5016..43f191c1f4 100644
--- a/keras_hub/src/models/siglip/siglip_backbone_test.py
+++ b/keras_hub/src/models/siglip/siglip_backbone_test.py
@@ -45,6 +45,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=SigLIPBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.kaggle_key_required
     @pytest.mark.extra_large
     def test_smallest_preset(self):
@@ -105,6 +113,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=SigLIPBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.kaggle_key_required
     @pytest.mark.extra_large
     def test_smallest_preset(self):
diff --git a/keras_hub/src/models/smollm3/smollm3_causal_lm_test.py b/keras_hub/src/models/smollm3/smollm3_causal_lm_test.py
index cbf9b3f88e..8ec458fe21 100644
--- a/keras_hub/src/models/smollm3/smollm3_causal_lm_test.py
+++ b/keras_hub/src/models/smollm3/smollm3_causal_lm_test.py
@@ -122,6 +122,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=SmolLM3CausalLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in SmolLM3CausalLM.presets:
diff --git a/keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_test.py b/keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_test.py
index 10ba8c5149..0317bcaf79 100644
--- a/keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_test.py
+++ b/keras_hub/src/models/stable_diffusion_3/stable_diffusion_3_text_to_image_test.py
@@ -196,3 +196,13 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=StableDiffusion3TextToImage,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            allow_custom_ops=True,  # Allow custom ops like GatherV2, Erfc
+            enable_select_tf_ops=True,  # Enable TensorFlow Select ops
+        )
diff --git a/keras_hub/src/models/t5/t5_backbone_test.py b/keras_hub/src/models/t5/t5_backbone_test.py
index 8a67dcd865..33091e3136 100644
--- a/keras_hub/src/models/t5/t5_backbone_test.py
+++ b/keras_hub/src/models/t5/t5_backbone_test.py
@@ -40,6 +40,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=T5Backbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_smallest_preset(self):
         self.run_preset_test(
diff --git a/keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_test.py b/keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_test.py
index 0a4cb0ef4e..fe258524ad 100644
--- a/keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_test.py
+++ b/keras_hub/src/models/t5gemma/t5gemma_seq_2_seq_lm_test.py
@@ -156,6 +156,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=T5GemmaSeq2SeqLM,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in T5GemmaSeq2SeqLM.presets:
diff --git a/keras_hub/src/models/vae/vae_backbone_test.py b/keras_hub/src/models/vae/vae_backbone_test.py
index f5bd6f27a8..fcf349257c 100644
--- a/keras_hub/src/models/vae/vae_backbone_test.py
+++ b/keras_hub/src/models/vae/vae_backbone_test.py
@@ -33,3 +33,13 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=VAEBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+            comparison_mode="statistical",
+            output_thresholds={"*": {"max": 2e-3, "mean": 2e-4}},
+        )
diff --git a/keras_hub/src/models/vgg/vgg_image_classifier_test.py b/keras_hub/src/models/vgg/vgg_image_classifier_test.py
index 16c3fa4453..1f694dbd89 100644
--- a/keras_hub/src/models/vgg/vgg_image_classifier_test.py
+++ b/keras_hub/src/models/vgg/vgg_image_classifier_test.py
@@ -52,6 +52,14 @@ def test_saved_model(self):
             input_data=self.images,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=VGGImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         # we need at least 32x32 image resolution here to satisfy the presets'
diff --git a/keras_hub/src/models/vit/vit_image_classifier_test.py b/keras_hub/src/models/vit/vit_image_classifier_test.py
index 1734642bd6..8dfd7a34e2 100644
--- a/keras_hub/src/models/vit/vit_image_classifier_test.py
+++ b/keras_hub/src/models/vit/vit_image_classifier_test.py
@@ -55,3 +55,11 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.images,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=ViTImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
diff --git a/keras_hub/src/models/vit_det/vit_det_backbone_test.py b/keras_hub/src/models/vit_det/vit_det_backbone_test.py
index c8e80c0de0..f55a68fc14 100644
--- a/keras_hub/src/models/vit_det/vit_det_backbone_test.py
+++ b/keras_hub/src/models/vit_det/vit_det_backbone_test.py
@@ -37,3 +37,11 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=ViTDetBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
diff --git a/keras_hub/src/models/whisper/whisper_backbone_test.py b/keras_hub/src/models/whisper/whisper_backbone_test.py
index 19c129db00..197c62a0ba 100644
--- a/keras_hub/src/models/whisper/whisper_backbone_test.py
+++ b/keras_hub/src/models/whisper/whisper_backbone_test.py
@@ -60,6 +60,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=WhisperBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_smallest_preset(self):
         self.run_preset_test(
diff --git a/keras_hub/src/models/xception/xception_image_classifier_test.py b/keras_hub/src/models/xception/xception_image_classifier_test.py
index c042ecf2d7..a20308fb8a 100644
--- a/keras_hub/src/models/xception/xception_image_classifier_test.py
+++ b/keras_hub/src/models/xception/xception_image_classifier_test.py
@@ -74,6 +74,14 @@ def test_saved_model(self):
             input_data=self.images,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=XceptionImageClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.images,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in XceptionImageClassifier.presets:
diff --git a/keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier_test.py b/keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier_test.py
index 386d807917..d56f144f0e 100644
--- a/keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier_test.py
+++ b/keras_hub/src/models/xlm_roberta/xlm_roberta_text_classifier_test.py
@@ -64,6 +64,14 @@ def test_saved_model(self):
             input_data=self.input_data,
         )
 
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=XLMRobertaTextClassifier,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
+
     @pytest.mark.extra_large
     def test_all_presets(self):
         for preset in XLMRobertaTextClassifier.presets:
diff --git a/keras_hub/src/models/xlnet/xlnet_backbone_test.py b/keras_hub/src/models/xlnet/xlnet_backbone_test.py
index a24ebc38b0..ce4ffad13e 100644
--- a/keras_hub/src/models/xlnet/xlnet_backbone_test.py
+++ b/keras_hub/src/models/xlnet/xlnet_backbone_test.py
@@ -35,3 +35,11 @@ def test_saved_model(self):
             init_kwargs=self.init_kwargs,
             input_data=self.input_data,
         )
+
+    @pytest.mark.large
+    def test_litert_export(self):
+        self.run_litert_export_test(
+            cls=XLNetBackbone,
+            init_kwargs=self.init_kwargs,
+            input_data=self.input_data,
+        )
diff --git a/keras_hub/src/tests/test_case.py b/keras_hub/src/tests/test_case.py
index 633f32cd5b..0e40486196 100644
--- a/keras_hub/src/tests/test_case.py
+++ b/keras_hub/src/tests/test_case.py
@@ -1,7 +1,9 @@
+import gc
 import json
 import os
 import pathlib
 import re
+import tempfile
 
 import keras
 import numpy as np
@@ -433,6 +435,387 @@ def run_model_saving_test(
         restored_output = restored_model(input_data)
         self.assertAllClose(model_output, restored_output, atol=atol, rtol=rtol)
 
+    def _verify_litert_outputs(
+        self,
+        keras_output,
+        litert_output,
+        sig_outputs,
+        expected_output_shape=None,
+        verify_numerics=True,
+        comparison_mode="strict",
+        output_thresholds=None,
+    ):
+        """Verify LiteRT outputs against expected shape and Keras outputs.
+
+        Args:
+            keras_output: Keras model output (can be None if not verifying
+                numerics)
+            litert_output: LiteRT interpreter output
+            sig_outputs: Output names from SignatureDef
+            expected_output_shape: Expected output shape (optional)
+            verify_numerics: Whether to verify numerical correctness
+            comparison_mode: "strict" or "statistical"
+            output_thresholds: Thresholds for statistical comparison
+        """
+        # Handle single output case: if Keras has single output but LiteRT
+        # returns dict
+        if (
+            not isinstance(keras_output, dict)
+            and isinstance(litert_output, dict)
+            and len(litert_output) == 1
+        ):
+            litert_output = list(litert_output.values())[0]
+
+        # Verify output shape if specified
+        if expected_output_shape is not None:
+            self.assertEqual(litert_output.shape, expected_output_shape)
+
+        # Verify numerical correctness if requested
+        if verify_numerics:
+            self._verify_litert_numerics(
+                keras_output,
+                litert_output,
+                sig_outputs,
+                output_thresholds,
+                comparison_mode,
+            )
+
+    def _verify_litert_numerics(
+        self,
+        keras_output,
+        litert_output,
+        sig_outputs,
+        output_thresholds,
+        comparison_mode,
+    ):
+        """Verify numerical accuracy between Keras and LiteRT outputs.
+
+        This method compares outputs using the SignatureDef output names to
+        match Keras outputs with LiteRT outputs properly.
+
+        Args:
+            keras_output: Keras model output (tensor or dict)
+            litert_output: LiteRT interpreter output (tensor or dict)
+            sig_outputs: List of output names from SignatureDef
+            output_thresholds: Dict of thresholds for comparison
+            comparison_mode: "strict" or "statistical"
+        """
+        if isinstance(keras_output, dict) and isinstance(litert_output, dict):
+            # Both outputs are dicts - compare using SignatureDef output names
+            for output_name in sig_outputs:
+                if output_name not in keras_output:
+                    self.fail(
+                        f"SignatureDef output '{output_name}' not found in "
+                        f"Keras outputs.\n"
+                        f"Keras keys: {list(keras_output.keys())}"
+                    )
+                if output_name not in litert_output:
+                    self.fail(
+                        f"SignatureDef output '{output_name}' not found in "
+                        f"LiteRT outputs.\n"
+                        f"LiteRT keys: {list(litert_output.keys())}"
+                    )
+
+                keras_val_np = ops.convert_to_numpy(keras_output[output_name])
+                litert_val = litert_output[output_name]
+                output_threshold = output_thresholds.get(
+                    output_name,
+                    output_thresholds.get("*", {"max": 10.0, "mean": 0.1}),
+                )
+                self._compare_outputs(
+                    keras_val_np,
+                    litert_val,
+                    comparison_mode,
+                    output_name,
+                    output_threshold["max"],
+                    output_threshold["mean"],
+                )
+        elif not isinstance(keras_output, dict) and not isinstance(
+            litert_output, dict
+        ):
+            # Both outputs are single tensors - direct comparison
+            keras_output_np = ops.convert_to_numpy(keras_output)
+            output_threshold = output_thresholds.get(
+                "*", {"max": 1e-2, "mean": 1e-3}
+            )
+            self._compare_outputs(
+                keras_output_np,
+                litert_output,
+                comparison_mode,
+                key=None,
+                max_threshold=output_threshold["max"],
+                mean_threshold=output_threshold["mean"],
+            )
+        else:
+            keras_type = type(keras_output).__name__
+            litert_type = type(litert_output).__name__
+            self.fail(
+                f"Output structure mismatch: Keras returns "
+                f"{keras_type}, LiteRT returns {litert_type}"
+            )
+
+    def run_litert_export_test(
+        self,
+        cls=None,
+        init_kwargs=None,
+        input_data=None,
+        expected_output_shape=None,
+        model=None,
+        verify_numerics=True,
+        # No LiteRT output in model saving test; remove undefined return
+        output_thresholds=None,
+        **export_kwargs,
+    ):
+        """Export model to LiteRT format and verify outputs.
+
+        Args:
+            cls: Model class to test (optional if model is provided)
+            init_kwargs: Initialization arguments for the model (optional
+                if model is provided)
+            input_data: Input data to test with (dict or tensor)
+            expected_output_shape: Expected output shape from LiteRT inference
+            model: Pre-created model instance (optional, if provided cls and
+                init_kwargs are ignored)
+            verify_numerics: Whether to verify numerical correctness
+                between Keras and LiteRT outputs. Set to False for preset
+                models with load_weights=False where outputs are random.
+            comparison_mode: "strict" (default) or "statistical".
+                - "strict": All elements must be within default tolerances
+                    (1e-6)
+                - "statistical": Check mean/max absolute differences against
+                    provided thresholds
+            output_thresholds: Dict mapping output names to threshold dicts
+                with "max" and "mean" keys. Use "*" as wildcard for defaults.
+                Example: {"output1": {"max": 1e-4, "mean": 1e-5},
+                         "*": {"max": 1e-3, "mean": 1e-4}}
+            **export_kwargs: Additional keyword arguments to pass to
+                model.export(), such as allow_custom_ops=True or
+                enable_select_tf_ops=True.
+        """
+        # Extract comparison_mode from export_kwargs if provided
+        comparison_mode = export_kwargs.pop("comparison_mode", "strict")
+        if keras.backend.backend() != "tensorflow":
+            self.skipTest("LiteRT export only supports TensorFlow backend")
+
+        try:
+            from ai_edge_litert.interpreter import Interpreter
+        except ImportError:
+            import tensorflow as tf
+
+            Interpreter = tf.lite.Interpreter
+
+        if output_thresholds is None:
+            output_thresholds = {"*": {"max": 10.0, "mean": 0.1}}
+
+        if model is None:
+            if cls is None or init_kwargs is None:
+                raise ValueError(
+                    "Either 'model' or 'cls' and 'init_kwargs' must be provided"
+                )
+            model = cls(**init_kwargs)
+            _ = model(input_data)
+
+        interpreter = None
+        try:
+            with tempfile.TemporaryDirectory() as temp_dir:
+                export_path = os.path.join(temp_dir, "model.tflite")
+
+                # Step 1: Export model and get Keras output
+                model.export(export_path, format="litert", **export_kwargs)
+                self.assertTrue(os.path.exists(export_path))
+                self.assertGreater(os.path.getsize(export_path), 0)
+
+                keras_output = model(input_data) if verify_numerics else None
+
+                # Step 2: Load interpreter and verify SignatureDef
+                interpreter = Interpreter(model_path=export_path)
+                signature_defs = interpreter.get_signature_list()
+                self.assertIn(
+                    "serving_default",
+                    signature_defs,
+                    "Missing serving_default signature",
+                )
+
+                serving_sig = signature_defs["serving_default"]
+                sig_inputs = serving_sig.get("inputs", [])
+                sig_outputs = serving_sig.get("outputs", [])
+
+                self.assertGreater(
+                    len(sig_inputs),
+                    0,
+                    "Should have at least one input in SignatureDef",
+                )
+                self.assertGreater(
+                    len(sig_outputs),
+                    0,
+                    "Should have at least one output in SignatureDef",
+                )
+
+                # Verify input signature
+                if isinstance(input_data, dict):
+                    expected_inputs = set(input_data.keys())
+                    actual_inputs = set(sig_inputs)
+                    # Check that all expected inputs are in the signature
+                    # (allow signature to have additional optional inputs)
+                    missing_inputs = expected_inputs - actual_inputs
+                    if missing_inputs:
+                        self.fail(
+                            f"Missing inputs in SignatureDef: "
+                            f"{sorted(missing_inputs)}. "
+                            f"Expected: {sorted(expected_inputs)}, "
+                            f"SignatureDef has: {sorted(actual_inputs)}"
+                        )
+                else:
+                    # For numpy arrays, just verify we have exactly one input
+                    # (since we're passing a single tensor)
+                    if len(sig_inputs) != 1:
+                        self.fail(
+                            "Expected 1 input for numpy array input_data, "
+                            f"but SignatureDef has {len(sig_inputs)}: "
+                            f"{sig_inputs}"
+                        )
+
+                # Verify output signature
+                if verify_numerics and isinstance(keras_output, dict):
+                    expected_outputs = set(keras_output.keys())
+                    actual_outputs = set(sig_outputs)
+                    if expected_outputs != actual_outputs:
+                        self.fail(
+                            f"Output name mismatch: Expected "
+                            f"{sorted(expected_outputs)}, "
+                            f"but SignatureDef has {sorted(actual_outputs)}"
+                        )
+
+                # Step 3: Run LiteRT inference
+                os.remove(export_path)
+                # Simple inference implementation
+                runner = interpreter.get_signature_runner("serving_default")
+
+                # Convert input data dtypes to match TFLite expectations
+                def convert_for_tflite(x):
+                    """Convert tensor/array to TFLite-compatible dtypes."""
+                    if hasattr(x, "dtype"):
+                        if isinstance(x, np.ndarray):
+                            if x.dtype == bool:
+                                return x.astype(np.int32)
+                            elif x.dtype == np.float64:
+                                return x.astype(np.float32)
+                            elif x.dtype == np.int64:
+                                return x.astype(np.int32)
+                        else:  # TensorFlow tensor
+                            if x.dtype == tf.bool:
+                                return ops.cast(x, "int32").numpy()
+                            elif x.dtype == tf.float64:
+                                return ops.cast(x, "float32").numpy()
+                            elif x.dtype == tf.int64:
+                                return ops.cast(x, "int32").numpy()
+                            else:
+                                return x.numpy() if hasattr(x, "numpy") else x
+                    elif hasattr(x, "numpy"):
+                        return x.numpy()
+                    return x
+
+                if isinstance(input_data, dict):
+                    converted_input_data = tree.map_structure(
+                        convert_for_tflite, input_data
+                    )
+                    litert_output = runner(**converted_input_data)
+                else:
+                    # For single tensor inputs, get the input name
+                    sig_inputs = serving_sig.get("inputs", [])
+                    input_name = sig_inputs[
+                        0
+                    ]  # We verified len(sig_inputs) == 1 above
+                    converted_input = convert_for_tflite(input_data)
+                    litert_output = runner(**{input_name: converted_input})
+
+                # Step 4: Verify outputs
+                self._verify_litert_outputs(
+                    keras_output,
+                    litert_output,
+                    sig_outputs,
+                    expected_output_shape=expected_output_shape,
+                    verify_numerics=verify_numerics,
+                    comparison_mode=comparison_mode,
+                    output_thresholds=output_thresholds,
+                )
+        finally:
+            if interpreter is not None:
+                del interpreter
+            if model is not None and cls is not None:
+                del model
+            gc.collect()
+
+    def _compare_outputs(
+        self,
+        keras_val,
+        litert_val,
+        comparison_mode,
+        key=None,
+        max_threshold=10.0,
+        mean_threshold=0.1,
+    ):
+        """Compare Keras and LiteRT outputs using specified comparison mode.
+
+        Args:
+            keras_val: Keras model output (numpy array)
+            litert_val: LiteRT model output (numpy array)
+            comparison_mode: "strict" or "statistical"
+            key: Output key name for error messages (optional)
+            max_threshold: Maximum absolute difference threshold for statistical
+                mode
+            mean_threshold: Mean absolute difference threshold for statistical
+                mode
+        """
+        key_msg = f" for output key '{key}'" if key else ""
+
+        # Check if shapes are compatible for comparison
+        self.assertEqual(
+            keras_val.shape,
+            litert_val.shape,
+            f"Shape mismatch{key_msg}: Keras shape "
+            f"{keras_val.shape}, LiteRT shape {litert_val.shape}. "
+            "Numerical comparison cannot proceed due to incompatible shapes.",
+        )
+
+        if comparison_mode == "strict":
+            # Original strict element-wise comparison with default tolerances
+            self.assertAllClose(
+                keras_val,
+                litert_val,
+                atol=1e-6,
+                rtol=1e-6,
+                msg=f"Mismatch{key_msg}",
+            )
+        elif comparison_mode == "statistical":
+            # Statistical comparison
+
+            # Calculate element-wise absolute differences
+            abs_diff = np.abs(keras_val - litert_val)
+
+            # Element-wise statistics
+            mean_abs_diff = np.mean(abs_diff)
+            max_abs_diff = np.max(abs_diff)
+
+            # Assert reasonable bounds on statistical differences
+            self.assertLessEqual(
+                mean_abs_diff,
+                mean_threshold,
+                f"Mean absolute difference too high: {mean_abs_diff:.6e}"
+                f"{key_msg} (threshold: {mean_threshold})",
+            )
+            self.assertLessEqual(
+                max_abs_diff,
+                max_threshold,
+                f"Max absolute difference too high: {max_abs_diff:.6e}"
+                f"{key_msg} (threshold: {max_threshold})",
+            )
+        else:
+            raise ValueError(
+                f"Unknown comparison_mode: {comparison_mode}. Must be "
+                "'strict' or 'statistical'"
+            )
+
     def run_backbone_test(
         self,
         cls,
diff --git a/requirements-common.txt b/requirements-common.txt
index a258d1cd85..fd81373b22 100644
--- a/requirements-common.txt
+++ b/requirements-common.txt
@@ -20,3 +20,4 @@ safetensors
 pillow
 openvino
 transformers
+ai-edge-litert
\ No newline at end of file