Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/megatron/bridge/models/nemotronh/nemotron_h_bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,15 @@ def provider_bridge(self, hf_pretrained: PreTrainedCausalLM) -> MambaModelProvid

return provider

@classmethod
def get_hf_tokenizer_kwargs(cls) -> dict:
"""Return HuggingFace tokenizer kwargs for Nemotron-H models.

Nemotron-H models only provide a fast tokenizer (tokenizer.json),
so use_fast=True is required.
"""
return {"use_fast": True}

def mapping_registry(self) -> MegatronMappingRegistry:
# Return MegatronMappingRegistry containing parameter mappings from Megatron to HF format
# First create simple 1:1 parameter mappings using a dictionary for readability
Expand Down
14 changes: 14 additions & 0 deletions tests/unit_tests/models/nemotronh/test_nemotron_h_bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,20 @@ def test_mapping_registry_moe_hf_params(self):
)


class TestNemotronHBridgeTokenizerKwargs:
"""Test get_hf_tokenizer_kwargs method."""

def test_tokenizer_kwargs_returns_dict(self):
"""Test get_hf_tokenizer_kwargs returns a dict."""
kwargs = NemotronHBridge.get_hf_tokenizer_kwargs()
assert isinstance(kwargs, dict)

def test_tokenizer_kwargs_use_fast(self):
"""Test get_hf_tokenizer_kwargs returns use_fast=True."""
kwargs = NemotronHBridge.get_hf_tokenizer_kwargs()
assert kwargs.get("use_fast") is True


class TestAutoBridgeIntegration:
"""Integration tests for AutoBridge with NemotronH models."""

Expand Down
Loading