diff --git a/src/megatron/bridge/models/__init__.py b/src/megatron/bridge/models/__init__.py
index 4338bbbc4e..eb2925ebb0 100644
--- a/src/megatron/bridge/models/__init__.py
+++ b/src/megatron/bridge/models/__init__.py
@@ -72,6 +72,14 @@
     Qwen25ModelProvider72B,
     Qwen25ModelProvider500M,
 )
+from megatron.bridge.models.starcoder import (
+    Starcoder2ModelProvider,
+    Starcoder2ModelProvider3B,
+    Starcoder2ModelProvider7B,
+    Starcoder2ModelProvider15B,
+    StarcoderModelProvider,
+    StarcoderModelProvider15B,
+)
 from megatron.bridge.models.t5_provider import T5ModelProvider
 
 
@@ -130,4 +138,10 @@
     "Qwen3MoEModelProvider",
     "Qwen3MoEModelProvider30B_A3B",
     "Qwen3MoEModelProvider235B_A22B",
+    "Starcoder2ModelProvider",
+    "Starcoder2ModelProvider3B",
+    "Starcoder2ModelProvider7B",
+    "Starcoder2ModelProvider15B",
+    "StarcoderModelProvider",
+    "StarcoderModelProvider15B",
 ]
diff --git a/src/megatron/bridge/models/starcoder/__init__.py b/src/megatron/bridge/models/starcoder/__init__.py
new file mode 100644
index 0000000000..145f753be6
--- /dev/null
+++ b/src/megatron/bridge/models/starcoder/__init__.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from megatron.bridge.models.starcoder.starcoder2_provider import (
+    Starcoder2ModelProvider,
+    Starcoder2ModelProvider3B,
+    Starcoder2ModelProvider7B,
+    Starcoder2ModelProvider15B,
+)
+from megatron.bridge.models.starcoder.starcoder_provider import (
+    StarcoderModelProvider,
+    StarcoderModelProvider15B,
+)
+
+
+__all__ = [
+    "StarcoderModelProvider",
+    "StarcoderModelProvider15B",
+    "Starcoder2ModelProvider",
+    "Starcoder2ModelProvider3B",
+    "Starcoder2ModelProvider7B",
+    "Starcoder2ModelProvider15B",
+]
diff --git a/src/megatron/bridge/models/starcoder/starcoder2_provider.py b/src/megatron/bridge/models/starcoder/starcoder2_provider.py
new file mode 100644
index 0000000000..d04df26623
--- /dev/null
+++ b/src/megatron/bridge/models/starcoder/starcoder2_provider.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import Callable, List, Optional
+
+import torch.nn.functional as F
+
+from megatron.bridge.models.gpt_provider import GPTModelProvider
+
+
+@dataclass
+class Starcoder2ModelProvider(GPTModelProvider):
+    """
+    Model Provider class for Starcoder2, inheriting from GPTModelProvider.
+    """
+
+    # configs that are common across model sizes
+    normalization: str = "LayerNorm"
+    activation_func: Callable = F.gelu
+    add_bias_linear: bool = True
+    seq_length: int = 16384
+    position_embedding_type: str = "rope"
+    rotary_percent: float = 1.0
+    hidden_dropout: float = 0.0
+    attention_dropout: float = 0.0
+    init_method_std: float = 0.01
+    share_embeddings_and_output_weights: bool = False
+    kv_channels: int = None
+    num_query_groups: int = None
+    window_size: Optional[List[int]] = None
+    attention_softmax_in_fp32: bool = True
+    bias_activation_fusion: bool = True
+    bias_dropout_fusion: bool = True
+    layernorm_epsilon: float = 1e-5
+
+
+@dataclass
+class Starcoder2ModelProvider3B(Starcoder2ModelProvider):
+    """
+    Model Provider for the Starcoder2 3B, inheriting from Starcoder2ModelProvider.
+    """
+
+    num_layers: int = 30
+    hidden_size: int = 3072
+    ffn_hidden_size: int = 12288
+    num_query_groups: int = 2
+    num_attention_heads: int = 24
+    init_method_std: float = 0.018042
+    rotary_base: float = 999999.4420358813
+
+
+@dataclass
+class Starcoder2ModelProvider7B(Starcoder2ModelProvider):
+    """
+    Model Provider for the Starcoder2 7B, inheriting from Starcoder2ModelProvider.
+    """
+
+    num_layers: int = 32
+    hidden_size: int = 4608
+    ffn_hidden_size: int = 18432
+    num_query_groups: int = 4
+    num_attention_heads: int = 36
+    init_method_std: float = 0.018042
+    rotary_base: float = 1_000_000
+
+
+@dataclass
+class Starcoder2ModelProvider15B(Starcoder2ModelProvider):
+    """
+    Model Provider for the Starcoder2 15B, inheriting from Starcoder2ModelProvider.
+    """
+
+    num_layers: int = 40
+    hidden_size: int = 6144
+    ffn_hidden_size: int = 24576
+    num_query_groups: int = 4
+    num_attention_heads: int = 48
+    init_method_std: float = 0.01275
+    rotary_base: float = 100_000
diff --git a/src/megatron/bridge/models/starcoder/starcoder_provider.py b/src/megatron/bridge/models/starcoder/starcoder_provider.py
new file mode 100644
index 0000000000..064c56d282
--- /dev/null
+++ b/src/megatron/bridge/models/starcoder/starcoder_provider.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import Callable
+
+import torch.nn.functional as F
+
+from megatron.bridge.models.gpt_provider import GPTModelProvider
+
+
+@dataclass
+class StarcoderModelProvider(GPTModelProvider):
+    """
+    Model Provider class for Starcoder, inheriting from GPTModelProvider.
+    """
+
+    # configs that are common across model sizes
+    normalization: str = "LayerNorm"
+    activation_func: Callable = F.gelu
+    add_bias_linear: bool = True
+    seq_length: int = 8192
+    position_embedding_type: str = "learned_absolute"
+    hidden_dropout: float = 0.2
+    attention_dropout: float = 0.2
+    init_method_std: float = 0.01
+    layernorm_epsilon: float = 1e-5
+    share_embeddings_and_output_weights: bool = False
+    kv_channels: int = None
+    num_query_groups: int = 1
+    attention_softmax_in_fp32: bool = True
+    bias_activation_fusion: bool = True
+    bias_dropout_fusion: bool = True
+
+
+@dataclass
+class StarcoderModelProvider15B(StarcoderModelProvider):
+    """
+    Model Provider for the Starcoder 15B, inheriting from StarcoderModelProvider.
+    """
+
+    num_layers: int = 40
+    hidden_size: int = 6144
+    ffn_hidden_size: int = 24576
+    num_attention_heads: int = 48
+    init_method_std: float = 0.02
diff --git a/tests/unit_tests/models/starcoder/__init__.py b/tests/unit_tests/models/starcoder/__init__.py
new file mode 100644
index 0000000000..341a77c5bc
--- /dev/null
+++ b/tests/unit_tests/models/starcoder/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/unit_tests/models/starcoder/test_starcoder2_provider.py b/tests/unit_tests/models/starcoder/test_starcoder2_provider.py
new file mode 100644
index 0000000000..1817f34198
--- /dev/null
+++ b/tests/unit_tests/models/starcoder/test_starcoder2_provider.py
@@ -0,0 +1,192 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch.nn.functional as F
+
+from megatron.bridge.models.starcoder.starcoder2_provider import (
+    Starcoder2ModelProvider,
+    Starcoder2ModelProvider3B,
+    Starcoder2ModelProvider7B,
+    Starcoder2ModelProvider15B,
+)
+
+
+class TestStarcoder2ModelProvider:
+    """Test cases for Starcoder2ModelProvider class."""
+
+    def test_starcoder2_model_provider_defaults(self):
+        """Test Starcoder2ModelProvider has correct default values."""
+        provider = Starcoder2ModelProvider(
+            num_layers=12,
+            hidden_size=768,
+            num_attention_heads=12,
+        )
+
+        # Check required transformer config fields
+        assert provider.num_layers == 12
+        assert provider.hidden_size == 768
+        assert provider.num_attention_heads == 12
+
+        # Check Starcoder2-specific defaults + transformer config post init
+        assert provider.normalization == "LayerNorm"
+        assert provider.activation_func == F.gelu
+        assert provider.add_bias_linear is True
+        assert provider.seq_length == 16384
+        assert provider.position_embedding_type == "rope"
+        assert provider.rotary_percent == 1.0
+        assert provider.hidden_dropout == 0.0
+        assert provider.attention_dropout == 0.0
+        assert provider.init_method_std == 0.01
+        assert provider.share_embeddings_and_output_weights is False
+        assert provider.kv_channels == 64
+        assert provider.num_query_groups == 12
+        assert provider.window_size is None
+        assert provider.attention_softmax_in_fp32 is True
+        assert provider.bias_activation_fusion is True
+        assert provider.bias_dropout_fusion is True
+        assert provider.layernorm_epsilon == 1e-5
+
+
+class TestStarcoder2ModelProvider3B:
+    """Test cases for Starcoder2ModelProvider3B class."""
+
+    def test_starcoder2_3b_defaults(self):
+        """Test Starcoder2ModelProvider3B has correct default values for 3B model."""
+        provider = Starcoder2ModelProvider3B()
+
+        # Check 3B-specific configuration
+        assert provider.num_layers == 30
+        assert provider.hidden_size == 3072
+        assert provider.ffn_hidden_size == 12288
+        assert provider.num_query_groups == 2
+        assert provider.num_attention_heads == 24
+        assert provider.init_method_std == 0.018042
+        assert provider.rotary_base == 999999.4420358813
+
+        # Check inherited Starcoder2 defaults
+        assert provider.normalization == "LayerNorm"
+        assert provider.activation_func == F.gelu
+        assert provider.add_bias_linear is True
+        assert provider.seq_length == 16384
+        assert provider.position_embedding_type == "rope"
+        assert provider.rotary_percent == 1.0
+        assert provider.hidden_dropout == 0.0
+        assert provider.attention_dropout == 0.0
+        assert provider.share_embeddings_and_output_weights is False
+        assert provider.window_size is None
+        assert provider.attention_softmax_in_fp32 is True
+        assert provider.bias_activation_fusion is True
+        assert provider.bias_dropout_fusion is True
+        assert provider.layernorm_epsilon == 1e-5
+
+
+class TestStarcoder2ModelProvider7B:
+    """Test cases for Starcoder2ModelProvider7B class."""
+
+    def test_starcoder2_7b_defaults(self):
+        """Test Starcoder2ModelProvider7B has correct default values for 7B model."""
+        provider = Starcoder2ModelProvider7B()
+
+        # Check 7B-specific configuration
+        assert provider.num_layers == 32
+        assert provider.hidden_size == 4608
+        assert provider.ffn_hidden_size == 18432
+        assert provider.num_query_groups == 4
+        assert provider.num_attention_heads == 36
+        assert provider.init_method_std == 0.018042
+        assert provider.rotary_base == 1_000_000
+
+        # Check inherited Starcoder2 defaults
+        assert provider.normalization == "LayerNorm"
+        assert provider.activation_func == F.gelu
+        assert provider.add_bias_linear is True
+        assert provider.seq_length == 16384
+        assert provider.position_embedding_type == "rope"
+        assert provider.rotary_percent == 1.0
+        assert provider.hidden_dropout == 0.0
+        assert provider.attention_dropout == 0.0
+        assert provider.share_embeddings_and_output_weights is False
+        assert provider.kv_channels is 128
+        assert provider.window_size is None
+        assert provider.attention_softmax_in_fp32 is True
+        assert provider.bias_activation_fusion is True
+        assert provider.bias_dropout_fusion is True
+        assert provider.layernorm_epsilon == 1e-5
+
+
+class TestStarcoder2ModelProvider15B:
+    """Test cases for Starcoder2ModelProvider15B class."""
+
+    def test_starcoder2_15b_defaults(self):
+        """Test Starcoder2ModelProvider15B has correct default values for 15B model."""
+        provider = Starcoder2ModelProvider15B()
+
+        # Check 15B-specific configuration
+        assert provider.num_layers == 40
+        assert provider.hidden_size == 6144
+        assert provider.ffn_hidden_size == 24576
+        assert provider.num_query_groups == 4
+        assert provider.num_attention_heads == 48
+        assert provider.init_method_std == 0.01275
+        assert provider.rotary_base == 100_000
+
+        # Check inherited Starcoder2 defaults
+        assert provider.normalization == "LayerNorm"
+        assert provider.activation_func == F.gelu
+        assert provider.add_bias_linear is True
+        assert provider.seq_length == 16384
+        assert provider.position_embedding_type == "rope"
+        assert provider.rotary_percent == 1.0
+        assert provider.hidden_dropout == 0.0
+        assert provider.attention_dropout == 0.0
+        assert provider.share_embeddings_and_output_weights is False
+        assert provider.kv_channels == 128
+        assert provider.window_size is None
+        assert provider.attention_softmax_in_fp32 is True
+        assert provider.bias_activation_fusion is True
+        assert provider.bias_dropout_fusion is True
+        assert provider.layernorm_epsilon == 1e-5
+
+
+class TestStarcoder2ProviderInheritance:
+    """Test inheritance relationships between Starcoder2 providers."""
+
+    def test_starcoder2_models_inherit_from_base(self):
+        """Test Starcoder2 providers inherit from Starcoder2ModelProvider."""
+        assert issubclass(Starcoder2ModelProvider3B, Starcoder2ModelProvider)
+        assert issubclass(Starcoder2ModelProvider7B, Starcoder2ModelProvider)
+        assert issubclass(Starcoder2ModelProvider15B, Starcoder2ModelProvider)
+
+    def test_starcoder2_models_inherit_from_gpt(self):
+        """Test Starcoder2 providers inherit from GPTModelProvider."""
+        from megatron.bridge.models.gpt_provider import GPTModelProvider
+
+        assert issubclass(Starcoder2ModelProvider, GPTModelProvider)
+        assert issubclass(Starcoder2ModelProvider3B, GPTModelProvider)
+        assert issubclass(Starcoder2ModelProvider7B, GPTModelProvider)
+        assert issubclass(Starcoder2ModelProvider15B, GPTModelProvider)
+
+    def test_provide_method_inherited(self):
+        """Test that provide method works correctly in inherited classes."""
+        # Test with all Starcoder2 providers
+        providers = [
+            Starcoder2ModelProvider3B(),
+            Starcoder2ModelProvider7B(),
+            Starcoder2ModelProvider15B(),
+        ]
+
+        for provider in providers:
+            # The provide method should be inherited from GPTModelProvider
+            assert hasattr(provider, "provide")
+            assert callable(provider.provide)
diff --git a/tests/unit_tests/models/starcoder/test_starcoder_provider.py b/tests/unit_tests/models/starcoder/test_starcoder_provider.py
new file mode 100644
index 0000000000..ec30ef33b8
--- /dev/null
+++ b/tests/unit_tests/models/starcoder/test_starcoder_provider.py
@@ -0,0 +1,103 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch.nn.functional as F
+
+from megatron.bridge.models.starcoder.starcoder_provider import (
+    StarcoderModelProvider,
+    StarcoderModelProvider15B,
+)
+
+
+class TestStarcoderModelProvider:
+    """Test cases for StarcoderModelProvider class."""
+
+    def test_starcoder_model_provider_defaults(self):
+        """Test StarcoderModelProvider has correct default values."""
+        provider = StarcoderModelProvider(
+            num_layers=12,
+            hidden_size=768,
+            num_attention_heads=12,
+        )
+
+        # Check required transformer config fields
+        assert provider.num_layers == 12
+        assert provider.hidden_size == 768
+        assert provider.num_attention_heads == 12
+
+        # Check Starcoder-specific defaults
+        assert provider.normalization == "LayerNorm"
+        assert provider.activation_func == F.gelu
+        assert provider.add_bias_linear is True
+        assert provider.seq_length == 8192
+        assert provider.position_embedding_type == "learned_absolute"
+        assert provider.hidden_dropout == 0.2
+        assert provider.attention_dropout == 0.2
+        assert provider.init_method_std == 0.01
+        assert provider.layernorm_epsilon == 1e-5
+        assert provider.share_embeddings_and_output_weights is False
+        assert provider.kv_channels == 64
+        assert provider.num_query_groups == 1
+        assert provider.attention_softmax_in_fp32 is True
+        assert provider.bias_activation_fusion is True
+        assert provider.bias_dropout_fusion is True
+
+
+class TestStarcoderModelProvider15B:
+    """Test cases for StarcoderModelProvider15B class."""
+
+    def test_starcoder_model_provider_15b_defaults(self):
+        """Test StarcoderModelProvider15B has correct default values for 15B model."""
+        provider = StarcoderModelProvider15B()
+
+        # Check 15B-specific configuration
+        assert provider.num_layers == 40
+        assert provider.hidden_size == 6144
+        assert provider.ffn_hidden_size == 24576
+        assert provider.num_attention_heads == 48
+        assert provider.init_method_std == 0.02
+
+        # Check inherited Starcoder defaults
+        assert provider.normalization == "LayerNorm"
+        assert provider.activation_func == F.gelu
+        assert provider.add_bias_linear is True
+        assert provider.seq_length == 8192
+        assert provider.position_embedding_type == "learned_absolute"
+        assert provider.hidden_dropout == 0.2
+        assert provider.attention_dropout == 0.2
+        assert provider.layernorm_epsilon == 1e-5
+        assert provider.share_embeddings_and_output_weights is False
+        assert provider.kv_channels == 128
+        assert provider.num_query_groups == 1
+        assert provider.attention_softmax_in_fp32 is True
+        assert provider.bias_activation_fusion is True
+        assert provider.bias_dropout_fusion is True
+
+
+class TestStarcoderProviderInheritance:
+    """Test inheritance relationships between Starcoder providers."""
+
+    def test_starcoder_models_inherit_from_gpt(self):
+        """Test Starcoder providers inherit from GPTModelProvider."""
+        from megatron.bridge.models.gpt_provider import GPTModelProvider
+
+        assert issubclass(StarcoderModelProvider, GPTModelProvider)
+        assert issubclass(StarcoderModelProvider15B, StarcoderModelProvider)
+        assert issubclass(StarcoderModelProvider15B, GPTModelProvider)
+
+    def test_provide_method_inherited(self):
+        """Test that provide method works correctly in inherited classes."""
+        provider = StarcoderModelProvider15B()
+        assert hasattr(provider, "provide")
+        assert callable(provider.provide)