Merge branch 'feature/compress' into dkorzekwa/compress_tutorial

danielkorzekwa · danielkorzekwa · commit 25b4aed67b6d · 2025-11-03T14:49:37.000+01:00
Signed-off-by: Daniel Korzekwa &lt;dkorzekwa@nvidia.com&gt;
diff --git a/examples/compress/configs/llama-3_1-8B_pruneffn_memory/pruning/ffn_pruning.yaml b/examples/compress/configs/llama-3_1-8B_pruneffn_memory/pruning/ffn_pruning.yaml
@@ -8,5 +8,5 @@ activation_hooks_kwargs:
   target_layer: "mlp.down_proj"
   layer_input_descriptors_path:
 
-intermediate_size_list: [256]  # teacher_intermediate_size is 14336
+intermediate_size_list: [3072, 5888, 8704, 11520]  # teacher_intermediate_size is 14336
 mlp_init_mode: "PruneByActivationsLog"
diff --git a/modelopt/torch/_compress/nas/plugins/compress_nas_plugin.py b/modelopt/torch/_compress/nas/plugins/compress_nas_plugin.py
@@ -96,6 +96,9 @@ def convert_compress_model(model: nn.Module, config: CompressConfig) -> ConvertR
 
     The output of this step will be used by mnt.search() to perform the NAS search.
     """
+
+    # NativeDdpRuntime must be initialized/closed from outside of this function, so we are
+    # NOT calling runtime.cleanup() here. TODO: Not optimal - redesign it.
     runtime = NativeDdpRuntime(
         dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10)
     )
@@ -199,6 +202,8 @@ def default_state_dict(self) -> SearchStateDict:
         return {}
 
     def run_search(self) -> None:
+        # NativeDdpRuntime must be initialized/closed from outside of this function, so we are
+        # NOT calling runtime.cleanup() here. TODO: Not optimal - redesign it.
         runtime = NativeDdpRuntime(
             dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10)
         )
@@ -220,10 +225,12 @@ def run_search(self) -> None:
                     "Compress Progress 5/8: building replacement library and subblock statistics (single-gpu)"
                 )
             )
-            build_library_and_stats.launch_build_library_and_stats(hydra_cfg)
+
+        build_library_and_stats.launch_build_library_and_stats(hydra_cfg)
         runtime.wait_for_everyone()
 
         # Calc_one_block_scores (distributed processing)
+
         print(timestamped("Compress Progress 6/8: calculating one block scores (multi-gpu)"))
         scoring.launch_scoring(hydra_cfg, runtime)
 
diff --git a/tests/experimental/torch/_compress/compress_test_utils.py b/tests/experimental/torch/_compress/compress_test_utils.py
@@ -19,9 +19,68 @@
 
 import torch
 from datasets import Dataset, DatasetDict
+from puzzle_tools.hydra_utils import register_hydra_resolvers
 from transformers import AutoTokenizer, LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerBase
 
 
+def setup_test_model_and_data(
+    project_root_path: Path,
+    tmp_path: Path,
+    rank: int,
+    runtime,
+) -> tuple[
+    Path,
+    Path,
+    Path,
+    Path,
+    str,
+]:
+    """
+    Setup the test model and data for the compress NAS search.
+
+    Args:
+        project_root_path (Path): the root path of the project
+        tmp_path (Path): the temporary path to use for the test
+        rank (int): the rank of the process
+        runtime: the runtime to use for the test
+
+    Returns:
+        tuple[Path, Path, Path, Path, str]:
+        the puzzle_dir, llama_checkpoint_path, dataset_path, hydra_config_dir, hydra_config_name
+    """
+
+    # Register Hydra custom resolvers (needed for config resolution)
+    register_hydra_resolvers()
+
+    # The inputs for the nas.convert() step.
+    #
+    puzzle_dir = tmp_path
+    llama_checkpoint_path = puzzle_dir / "input_model/llama"
+    dataset_path = puzzle_dir / "dummy_dataset"
+    hydra_config_dir = project_root_path / "tests/experimental/torch/_compress/resources/configs"
+    hydra_config_name = "Llama-3_1-8B"
+
+    if rank == 0:
+        # Setup puzzle_dir and dataset
+        setup_puzzle_dir(puzzle_dir)
+        save_dummy_dataset(dataset_path)
+
+        # Create a small Llama model
+        tokenizer = create_tokenizer(project_root_path)
+        create_and_save_small_llama_model(
+            llama_checkpoint_path, vocab_size=tokenizer.vocab_size, tokenizer=tokenizer
+        )
+    runtime.wait_for_everyone()
+
+    return (
+        puzzle_dir,
+        llama_checkpoint_path,
+        dataset_path,
+        hydra_config_dir,
+        hydra_config_name,
+    )
+
+
 def create_and_save_small_llama_model(
     output_path: str, vocab_size: int, tokenizer: PreTrainedTokenizerBase
 ):
diff --git a/tests/experimental/torch/_compress/nas/plugins/test_nas_convert.py b/tests/experimental/torch/_compress/nas/plugins/test_nas_convert.py
@@ -20,13 +20,7 @@
 
 import torch
 from _test_utils.torch.distributed.utils import spawn_multiprocess_job
-from experimental.torch._compress.compress_test_utils import (
-    create_and_save_small_llama_model,
-    create_tokenizer,
-    save_dummy_dataset,
-    setup_puzzle_dir,
-)
-from puzzle_tools.hydra_utils import register_hydra_resolvers
+from experimental.torch._compress.compress_test_utils import setup_test_model_and_data
 
 import modelopt.torch.nas as mtn
 from modelopt.torch._compress.nas.plugins.compress_nas_plugin import CompressModel
@@ -51,7 +45,30 @@ def _test_nas_convert_multiprocess_job(
     with NativeDdpRuntime(
         dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10)
     ) as runtime:
-        converted_model, puzzle_dir = run_nas_convert(project_root_path, tmp_path, rank, runtime)
+        # Setup the test model and data.
+        puzzle_dir, llama_checkpoint_path, dataset_path, hydra_config_dir, hydra_config_name = (
+            setup_test_model_and_data(project_root_path, tmp_path, rank, runtime)
+        )
+
+        #
+        # Run the mnt.convert() step
+        #
+        input_model = CompressModel()
+        mtn.convert(
+            input_model,
+            mode=[
+                (
+                    "compress",
+                    {
+                        "puzzle_dir": str(puzzle_dir),
+                        "input_model_path": str(llama_checkpoint_path),
+                        "hydra_config_dir": str(hydra_config_dir),
+                        "hydra_config_name": hydra_config_name,
+                        "dataset_path": str(dataset_path),
+                    },
+                )
+            ],
+        )
 
         #
         # Check assertions
@@ -70,54 +87,3 @@ def _test_nas_convert_multiprocess_job(
         runtime.wait_for_everyone()
 
     print("PYTEST SUMMARY: test_nas_convert() test has finished successfully")
-
-
-def run_nas_convert(
-    project_root_path: Path,
-    tmp_path: Path,
-    rank: int,
-    runtime,
-):
-    # Register Hydra custom resolvers (needed for config resolution)
-    register_hydra_resolvers()
-
-    # The inputs for the nas.convert() step.
-    #
-    puzzle_dir = tmp_path
-    llama_checkpoint_path = puzzle_dir / "input_model/llama"
-    dataset_path = puzzle_dir / "dummy_dataset"
-    hydra_config_dir = project_root_path / "tests/experimental/torch/_compress/resources/configs"
-    hydra_config_name = "Llama-3_1-8B"
-
-    if rank == 0:
-        # Setup puzzle_dir and dataset
-        setup_puzzle_dir(puzzle_dir)
-        save_dummy_dataset(dataset_path)
-
-        # Create a small Llama model
-        tokenizer = create_tokenizer(project_root_path)
-        create_and_save_small_llama_model(
-            llama_checkpoint_path, vocab_size=tokenizer.vocab_size, tokenizer=tokenizer
-        )
-    runtime.wait_for_everyone()
-
-    # Run the mnt.convert() step
-    #
-    input_model = CompressModel()
-    converted_model = mtn.convert(
-        input_model,
-        mode=[
-            (
-                "compress",
-                {
-                    "puzzle_dir": str(puzzle_dir),
-                    "input_model_path": str(llama_checkpoint_path),
-                    "hydra_config_dir": str(hydra_config_dir),
-                    "hydra_config_name": hydra_config_name,
-                    "dataset_path": str(dataset_path),
-                },
-            )
-        ],
-    )
-
-    return converted_model, puzzle_dir
diff --git a/tests/experimental/torch/_compress/nas/plugins/test_nas_search.py b/tests/experimental/torch/_compress/nas/plugins/test_nas_search.py
@@ -23,9 +23,10 @@
 
 import torch
 from _test_utils.torch.distributed.utils import spawn_multiprocess_job
-from experimental.torch._compress.nas.plugins.test_nas_convert import run_nas_convert
+from experimental.torch._compress.compress_test_utils import setup_test_model_and_data
 
 import modelopt.torch.nas as mtn
+from modelopt.torch._compress.nas.plugins.compress_nas_plugin import CompressModel
 from modelopt.torch._compress.runtime import NativeDdpRuntime
 
 
@@ -43,7 +44,30 @@ def _test_nas_search_multiprocess_job(
     with NativeDdpRuntime(
         dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10)
     ) as runtime:
-        converted_model, puzzle_dir = run_nas_convert(project_root_path, tmp_path, rank, runtime)
+        # Setup the test model and data.
+        puzzle_dir, llama_checkpoint_path, dataset_path, hydra_config_dir, hydra_config_name = (
+            setup_test_model_and_data(project_root_path, tmp_path, rank, runtime)
+        )
+
+        #
+        # Run the mnt.convert() step
+        #
+        input_model = CompressModel()
+        converted_model = mtn.convert(
+            input_model,
+            mode=[
+                (
+                    "compress",
+                    {
+                        "puzzle_dir": str(puzzle_dir),
+                        "input_model_path": str(llama_checkpoint_path),
+                        "hydra_config_dir": str(hydra_config_dir),
+                        "hydra_config_name": hydra_config_name,
+                        "dataset_path": str(dataset_path),
+                    },
+                )
+            ],
+        )
 
         #
         # Run the mnt.search() step
diff --git a/tests/experimental/torch/_compress/resources/configs/pruning/ffn_pruning.yaml b/tests/experimental/torch/_compress/resources/configs/pruning/ffn_pruning.yaml
@@ -8,5 +8,5 @@ activation_hooks_kwargs:
   target_layer: "mlp.down_proj"
   layer_input_descriptors_path:
 
-intermediate_size_list: [3072, 5888, 8704, 11520]  # teacher_intermediate_size is 14336
+intermediate_size_list: [256]  # teacher_intermediate_size is 14336
 mlp_init_mode: "PruneByActivationsLog"
diff --git a/tests/experimental/torch/_compress/test_compress.py b/tests/experimental/torch/_compress/test_compress.py
@@ -20,13 +20,7 @@
 
 import torch
 from _test_utils.torch.distributed.utils import spawn_multiprocess_job
-from experimental.torch._compress.compress_test_utils import (
-    create_and_save_small_llama_model,
-    create_tokenizer,
-    save_dummy_dataset,
-    setup_puzzle_dir,
-)
-from puzzle_tools.hydra_utils import register_hydra_resolvers
+from experimental.torch._compress.compress_test_utils import setup_test_model_and_data
 
 from modelopt.torch._compress import compress
 from modelopt.torch._compress.decilm.converters.convert_llama3_to_decilm import (
@@ -63,42 +57,16 @@ def test_compress(project_root_path: Path, tmp_path: Path):
 
 
 def _test_compress_multiprocess_job(project_root_path: Path, tmp_path: Path, rank: int, size: int):
-    register_hydra_resolvers()
-
-    #
-    # The inputs for the compress() algorihm.
-    #
-    puzzle_dir = tmp_path
-    dataset_path = puzzle_dir / "dummy_dataset"
-    hydra_config_dir = project_root_path / "tests/experimental/torch/_compress/resources/configs"
-    hydra_config_name = "Llama-3_1-8B"
-
     with NativeDdpRuntime(
         dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10)
     ) as runtime:
-        #
-        # Test setup
-        #
-        if rank == 0:
-            # Setup puzzle_dir and dataset
-            setup_puzzle_dir(puzzle_dir)
-            save_dummy_dataset(dataset_path)
-
-            #
-            # Step 1: Create and save a teacher model to compress
-            # This mimics the normal pipeline where we start with a Llama model
-            #
-
-            # Create a small Llama model (not DeciLM) to match the normal conversion pipeline
-            tokenizer = create_tokenizer(project_root_path)
-            # TODO: change it to "ckpts/llama" once the conversion script is fixed
-            # Currently, the build replacement library step will fail with such a path.
-            llama_checkpoint_path = puzzle_dir / "input_model/llama"
-            create_and_save_small_llama_model(
-                llama_checkpoint_path, vocab_size=tokenizer.vocab_size, tokenizer=tokenizer
-            )
+        # Setup the test model and data.
+        puzzle_dir, llama_checkpoint_path, dataset_path, hydra_config_dir, hydra_config_name = (
+            setup_test_model_and_data(project_root_path, tmp_path, rank, runtime)
+        )
 
-            # Use the full conversion pipeline (matches normal usage)
+        # Convert the Llama model to DeciLM model.
+        if rank == 0:
             convert_llama3_to_decilm(
                 input_dir=llama_checkpoint_path,
                 output_dir=puzzle_dir / "ckpts/teacher",