Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c4dcc44
inference: remove direct nemo imports, add InferenceWrapperConfig for…
oyilmaz-nvidia Mar 3, 2026
4d2454c
fix: apply ruff linting fixes and complete nemo import removal
oyilmaz-nvidia Mar 3, 2026
ce646ce
inference: fix InferenceWrapperConfig and add buffer_size_gb support
oyilmaz-nvidia Mar 3, 2026
a37a149
Merge branch 'remove-direct-nemo-imports-in-inference' into fix/ruff-…
oyilmaz-nvidia Mar 3, 2026
3b99d12
Update mbridge commit
oyilmaz-nvidia Mar 5, 2026
8488f19
Merge branch 'remove-direct-nemo-imports-in-inference' into fix/ruff-…
oyilmaz-nvidia Mar 5, 2026
21862a6
Fix megatron-bridge install
chtruong814 Mar 6, 2026
c5fdd40
Set cryptography to < 47
chtruong814 Mar 6, 2026
5f91d27
fix: lazy-import InferenceWrapperConfig to fix CI collection error
oyilmaz-nvidia Mar 6, 2026
cd1db0d
fix: lazy-import InferenceWrapperConfig to fix CI collection error
oyilmaz-nvidia Mar 6, 2026
32d0e06
Use static inference context
oyilmaz-nvidia Mar 7, 2026
3b60125
Fix for the test
oyilmaz-nvidia Mar 9, 2026
830fcec
Fix merge conflicts
oyilmaz-nvidia Mar 9, 2026
8ae2106
Fix merge conflicts
oyilmaz-nvidia Mar 9, 2026
32f1726
Merge branch 'main' into fix/ruff-linting
oyilmaz-nvidia Mar 9, 2026
d8ac4f5
Remove HAVE_NEMO
oyilmaz-nvidia Mar 9, 2026
c398fee
Fix linting
oyilmaz-nvidia Mar 9, 2026
f63d2c3
Fix doc issues
oyilmaz-nvidia Mar 9, 2026
755f3d5
Fix test issue
oyilmaz-nvidia Mar 10, 2026
535bb85
Fix test error
oyilmaz-nvidia Mar 10, 2026
d7598b7
Add more tests
oyilmaz-nvidia Mar 10, 2026
3e05b5b
Fix linting issues
oyilmaz-nvidia Mar 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions nemo_deploy/llm/inference/inference_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@
from megatron.core.transformer.module import MegatronModule
from packaging import version

from nemo_export_deploy_common.import_utils import MISSING_NEMO_MSG, UnavailableError

from .tron_utils import (
DistributedInitConfig,
RNGConfig,
Expand All @@ -63,7 +61,6 @@
HAVE_TRITON = False

from .nemo_utils import (
HAVE_NEMO,
MCoreTokenizerWrappper,
ckpt_to_context_subdir,
ckpt_to_weights_subdir,
Expand Down Expand Up @@ -186,8 +183,6 @@ def load_nemo_checkpoint_to_tron_model(model: List[MegatronModule], path: Path,
path (Path): Path to NeMo checkpoint directory
legacy_ckpt (bool): Whether to use legacy checkpoint format
"""
if not HAVE_NEMO:
raise UnavailableError(MISSING_NEMO_MSG)
weights_dir = ckpt_to_weights_subdir(path, is_saving=False)
LOGGER.info(f"Loading NeMo checkpoint from {weights_dir}")

Expand Down Expand Up @@ -309,9 +304,6 @@ def setup_model_and_tokenizer_for_inference(
Raises:
ValueError: If checkpoint_path is not a valid NeMo-2.0 checkpoint
"""
if not HAVE_NEMO:
raise UnavailableError(MISSING_NEMO_MSG)

checkpoint_path = Path(checkpoint_path)

# Load model context for config and tokenizer
Expand Down Expand Up @@ -478,9 +470,6 @@ def create_mcore_engine(
- GPTInferenceWrapper: Inference-wrapped model
- Union[MCoreTokenizerWrappper, MegatronTokenizer]: Tokenizer instance
"""
if not HAVE_NEMO and model_format == "nemo":
raise UnavailableError(MISSING_NEMO_MSG)

# Default to 1 for any parallelism dimension that's None
tensor_model_parallel_size = tensor_model_parallel_size if tensor_model_parallel_size is not None else 1
pipeline_model_parallel_size = pipeline_model_parallel_size if pipeline_model_parallel_size is not None else 1
Expand Down
Loading