modelopt nas search() implementation for the compress algorithm (#490)

danielkorzekwa · kevalmorabia97 · web-flow · commit 1c12fd8008d6 · 2025-11-03T14:37:40.000+01:00
Signed-off-by: Daniel Korzekwa &lt;dkorzekwa@nvidia.com&gt;
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
Co-authored-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/modelopt/torch/_compress/nas/plugins/compress_nas_plugin.py b/modelopt/torch/_compress/nas/plugins/compress_nas_plugin.py
@@ -20,12 +20,17 @@
 import datetime
 from pathlib import Path
 
+import build_library_and_stats
+import mip_and_realize_models
 import pruning_ckpts
 import score_pruning_activations
+import scoring
 import torch
-from scripts.convert_llama3_to_decilm import convert_llama3_to_decilm
 from torch import nn
 
+from modelopt.torch._compress.decilm.converters.convert_llama3_to_decilm import (
+    convert_llama3_to_decilm,
+)
 from modelopt.torch._compress.hydra import initialize_hydra_config_for_dir
 from modelopt.torch._compress.runtime import NativeDdpRuntime
 from modelopt.torch.nas.conversion import NASModeRegistry
@@ -37,7 +42,7 @@
     ModeDescriptor,
     RestoreEntrypoint,
 )
-from modelopt.torch.opt.searcher import BaseSearcher
+from modelopt.torch.opt.searcher import BaseSearcher, SearchStateDict
 
 
 class CompressModel(nn.Module):
@@ -90,10 +95,19 @@ def convert_compress_model(model: nn.Module, config: CompressConfig) -> ConvertR
 
     The output of this step will be used by mnt.search() to perform the NAS search.
     """
+
+    # NativeDdpRuntime must be initialized/closed from outside of this function, so we are
+    # NOT calling runtime.cleanup() here. TODO: Not optimal - redesign it.
     runtime = NativeDdpRuntime(
         dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10)
     )
 
+    # Required for mtn.search() to read NAS configuration
+    model.hydra_config_dir = config.hydra_config_dir
+    model.hydra_config_name = config.hydra_config_name
+    model.puzzle_dir = config.puzzle_dir
+    model.dataset_path = config.dataset_path
+
     # Load hydra config
     hydra_cfg = initialize_hydra_config_for_dir(
         config_dir=config.hydra_config_dir,
@@ -146,7 +160,8 @@ def config_class(self) -> type[ModeloptBaseConfig]:
     @property
     def search_algorithm(self) -> type[BaseSearcher]:
         """Return the associated searcher implementation."""
-        raise NotImplementedError("Compress mode does not have a search algorithm yet.")
+
+        return CompressSearcher
 
     @property
     def convert(self) -> ConvertEntrypoint:
@@ -165,3 +180,40 @@ def export_mode(self) -> str | None:
         for the compress algorithm.
         """
         return "export_nas"
+
+
+class CompressSearcher(BaseSearcher):
+    """Runs NAS search for the Compress mode."""
+
+    @property
+    def default_state_dict(self) -> SearchStateDict:
+        """Not needed for the compress mode as we are not saving any model state"""
+        return {}
+
+    def run_search(self) -> None:
+        # NativeDdpRuntime must be initialized/closed from outside of this function, so we are
+        # NOT calling runtime.cleanup() here. TODO: Not optimal - redesign it.
+        runtime = NativeDdpRuntime(
+            dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10)
+        )
+
+        # Load hydra config
+        hydra_cfg = initialize_hydra_config_for_dir(
+            config_dir=self.model.hydra_config_dir,
+            config_name=self.model.hydra_config_name,
+            overrides=[
+                f"puzzle_dir={self.model.puzzle_dir}",
+                f"dataset_path={self.model.dataset_path}",
+            ],
+        )
+
+        # Build_library_and_stats (single process)
+        if runtime.global_rank == 0:
+            build_library_and_stats.launch_build_library_and_stats(hydra_cfg)
+        runtime.wait_for_everyone()
+
+        # Calc_one_block_scores (distributed processing)
+        scoring.launch_scoring(hydra_cfg, runtime)
+
+        # mip_and_realize_models (distributed processing)
+        mip_and_realize_models.launch_mip_and_realize_model(hydra_cfg, runtime)
diff --git a/tests/experimental/torch/_compress/compress_test_utils.py b/tests/experimental/torch/_compress/compress_test_utils.py
@@ -19,9 +19,68 @@
 
 import torch
 from datasets import Dataset, DatasetDict
+from puzzle_tools.hydra_utils import register_hydra_resolvers
 from transformers import AutoTokenizer, LlamaConfig, LlamaForCausalLM, PreTrainedTokenizerBase
 
 
+def setup_test_model_and_data(
+    project_root_path: Path,
+    tmp_path: Path,
+    rank: int,
+    runtime,
+) -> tuple[
+    Path,
+    Path,
+    Path,
+    Path,
+    str,
+]:
+    """
+    Setup the test model and data for the compress NAS search.
+
+    Args:
+        project_root_path (Path): the root path of the project
+        tmp_path (Path): the temporary path to use for the test
+        rank (int): the rank of the process
+        runtime: the runtime to use for the test
+
+    Returns:
+        tuple[Path, Path, Path, Path, str]:
+        the puzzle_dir, llama_checkpoint_path, dataset_path, hydra_config_dir, hydra_config_name
+    """
+
+    # Register Hydra custom resolvers (needed for config resolution)
+    register_hydra_resolvers()
+
+    # The inputs for the nas.convert() step.
+    #
+    puzzle_dir = tmp_path
+    llama_checkpoint_path = puzzle_dir / "input_model/llama"
+    dataset_path = puzzle_dir / "dummy_dataset"
+    hydra_config_dir = project_root_path / "tests/experimental/torch/_compress/resources/configs"
+    hydra_config_name = "Llama-3_1-8B"
+
+    if rank == 0:
+        # Setup puzzle_dir and dataset
+        setup_puzzle_dir(puzzle_dir)
+        save_dummy_dataset(dataset_path)
+
+        # Create a small Llama model
+        tokenizer = create_tokenizer(project_root_path)
+        create_and_save_small_llama_model(
+            llama_checkpoint_path, vocab_size=tokenizer.vocab_size, tokenizer=tokenizer
+        )
+    runtime.wait_for_everyone()
+
+    return (
+        puzzle_dir,
+        llama_checkpoint_path,
+        dataset_path,
+        hydra_config_dir,
+        hydra_config_name,
+    )
+
+
 def create_and_save_small_llama_model(
     output_path: str, vocab_size: int, tokenizer: PreTrainedTokenizerBase
 ):
diff --git a/tests/experimental/torch/_compress/nas/plugins/test_nas_convert.py b/tests/experimental/torch/_compress/nas/plugins/test_nas_convert.py
@@ -20,13 +20,7 @@
 
 import torch
 from _test_utils.torch.distributed.utils import spawn_multiprocess_job
-from experimental.torch._compress.compress_test_utils import (
-    create_and_save_small_llama_model,
-    create_tokenizer,
-    save_dummy_dataset,
-    setup_puzzle_dir,
-)
-from puzzle_tools.hydra_utils import register_hydra_resolvers
+from experimental.torch._compress.compress_test_utils import setup_test_model_and_data
 
 import modelopt.torch.nas as mtn
 from modelopt.torch._compress.nas.plugins.compress_nas_plugin import CompressModel
@@ -48,32 +42,13 @@ def test_nas_convert(project_root_path: Path, tmp_path: Path):
 def _test_nas_convert_multiprocess_job(
     project_root_path: Path, tmp_path: Path, rank: int, size: int
 ):
-    # Register Hydra custom resolvers (needed for config resolution)
-    register_hydra_resolvers()
-
-    #
-    # The inputs for the nas.convert() step.
-    #
-    puzzle_dir = tmp_path
-    llama_checkpoint_path = puzzle_dir / "ckpts/llama"
-    dataset_path = puzzle_dir / "dummy_dataset"
-    hydra_config_dir = project_root_path / "tests/experimental/torch/_compress/resources/configs"
-    hydra_config_name = "Llama-3_1-8B"
-
     with NativeDdpRuntime(
         dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10)
     ) as runtime:
-        if rank == 0:
-            # Setup puzzle_dir and dataset
-            setup_puzzle_dir(puzzle_dir)
-            save_dummy_dataset(dataset_path)
-
-            # Create a small Llama model
-            tokenizer = create_tokenizer(project_root_path)
-            create_and_save_small_llama_model(
-                llama_checkpoint_path, vocab_size=tokenizer.vocab_size, tokenizer=tokenizer
-            )
-        runtime.wait_for_everyone()
+        # Setup the test model and data.
+        puzzle_dir, llama_checkpoint_path, dataset_path, hydra_config_dir, hydra_config_name = (
+            setup_test_model_and_data(project_root_path, tmp_path, rank, runtime)
+        )
 
         #
         # Run the mnt.convert() step
diff --git a/tests/experimental/torch/_compress/nas/plugins/test_nas_search.py b/tests/experimental/torch/_compress/nas/plugins/test_nas_search.py
@@ -0,0 +1,110 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# See tests/experimental/torch/_compress/test_compress.py for instructions on how to run this test
+# TODO: Remove those instructions once this test runs automatically on CI
+#
+import datetime
+from functools import partial
+from pathlib import Path
+
+import torch
+from _test_utils.torch.distributed.utils import spawn_multiprocess_job
+from experimental.torch._compress.compress_test_utils import setup_test_model_and_data
+
+import modelopt.torch.nas as mtn
+from modelopt.torch._compress.nas.plugins.compress_nas_plugin import CompressModel
+from modelopt.torch._compress.runtime import NativeDdpRuntime
+
+
+def test_nas_search(project_root_path: Path, tmp_path: Path):
+    spawn_multiprocess_job(
+        size=torch.cuda.device_count(),
+        job=partial(_test_nas_search_multiprocess_job, project_root_path, tmp_path),
+        backend="nccl",
+    )
+
+
+def _test_nas_search_multiprocess_job(
+    project_root_path: Path, tmp_path: Path, rank: int, size: int
+):
+    with NativeDdpRuntime(
+        dtype=torch.bfloat16, torch_distributed_timeout=datetime.timedelta(10)
+    ) as runtime:
+        # Setup the test model and data.
+        puzzle_dir, llama_checkpoint_path, dataset_path, hydra_config_dir, hydra_config_name = (
+            setup_test_model_and_data(project_root_path, tmp_path, rank, runtime)
+        )
+
+        #
+        # Run the mnt.convert() step
+        #
+        input_model = CompressModel()
+        converted_model = mtn.convert(
+            input_model,
+            mode=[
+                (
+                    "compress",
+                    {
+                        "puzzle_dir": str(puzzle_dir),
+                        "input_model_path": str(llama_checkpoint_path),
+                        "hydra_config_dir": str(hydra_config_dir),
+                        "hydra_config_name": hydra_config_name,
+                        "dataset_path": str(dataset_path),
+                    },
+                )
+            ],
+        )
+
+        #
+        # Run the mnt.search() step
+        #
+        mtn.search(
+            converted_model,
+            constraints={},  # this is not used as the search space is defined in the hydra config
+            dummy_input=None,  # Not used
+            config={},  # this is not used as the search space is defined in the hydra config
+        )
+
+        #
+        # Check assertions for mtn.search() step
+        #
+        if rank == 0:
+            # assertions for the build_library_and_stats step
+            assert (puzzle_dir / "replacement_library.json").is_file()
+            assert (puzzle_dir / "subblock_stats.json").is_file()
+
+            # assertions for the scoring step
+            solution_0_filepath = (
+                puzzle_dir / "single_sequence_replacement_solutions--validation/solution_0.json"
+            )
+
+            assert solution_0_filepath.exists()
+
+            # assertions for the mip_and_realize_models step
+            solution_0_ckpt_config_path = (
+                puzzle_dir
+                / "mip/puzzle_solutions/target_memory_780000MiB/solutions--checkpoints/solution_0/config.json"
+            )
+
+            assert solution_0_ckpt_config_path.exists()
+            assert (
+                puzzle_dir / "mip/puzzle_solutions/target_memory_780000MiB/solutions.json"
+            ).exists()
+
+        runtime.wait_for_everyone()
+
+    print("PYTEST SUMMARY: test_nas_search() test has finished successfully")
diff --git a/tests/experimental/torch/_compress/test_compress.py b/tests/experimental/torch/_compress/test_compress.py