NVIDIA-NeMo · mckornfield · Apr 1, 2026
@@ -127,17 +127,17 @@ cpu = [
   "peft",
   "opacus",
   "sentence-transformers",
-  "torch==2.9.1; sys_platform == 'darwin'",
-  "torch==2.9.1+cpu; sys_platform == 'linux'",
-  "torchvision==0.24.1; sys_platform == 'darwin'",
-  "torchvision==0.24.1+cpu; sys_platform == 'linux'",
-  "torchao==0.15.0",
+  "torch==2.10.0; sys_platform == 'darwin'",
+  "torch==2.10.0+cpu; sys_platform == 'linux'",
+  "torchvision==0.25.0; sys_platform == 'darwin'",
+  "torchvision==0.25.0+cpu; sys_platform == 'linux'",
+  "torchao==0.16.0",
   "transformers==4.57.3",
   "triton>=2.0.0; sys_platform=='linux'",
   "trl>=0.23.0",
-  "unsloth[cu128-torch291]==2025.12.4; sys_platform=='linux'",
-  "unsloth_zoo==2025.12.4; sys_platform=='linux'",
-  "vllm==0.15.0; sys_platform=='linux'",
+  "unsloth[cu128-torch2100]==2026.3.18; sys_platform=='linux'",
+  "unsloth_zoo==2026.3.7; sys_platform=='linux'",
+  "vllm==0.18.0; sys_platform=='linux'",
 ]
 
 cu128 = [
@@ -153,17 +153,17 @@ cu128 = [
   "opacus",
   "peft",
   "sentence-transformers",
-  "torch==2.9.1+cu128; sys_platform == 'linux'",
+  "torch==2.10.0+cu128; sys_platform == 'linux'",
   "torch-c-dlpack-ext",
-  "torchvision==0.24.1+cu128; sys_platform == 'linux'",
-  "torchao==0.15.0; sys_platform == 'linux'",
+  "torchvision==0.25.0+cu128; sys_platform == 'linux'",
+  "torchao==0.16.0; sys_platform == 'linux'",
   "transformers==4.57.3",
   "triton>=2.0.0; sys_platform == 'linux'",
   "trl>=0.23.0",
-  "unsloth[cu128-torch291]==2025.12.4; sys_platform == 'linux'",
-  "unsloth_zoo==2025.12.4; sys_platform == 'linux'",
-  "vllm==0.15.0; sys_platform == 'linux'",
-  "xformers==v0.0.33.post2; sys_platform == 'linux'",
+  "unsloth[cu128-torch2100]==2026.3.18; sys_platform == 'linux'",
+  "unsloth_zoo==2026.3.7; sys_platform == 'linux'",
+  "vllm==0.18.0; sys_platform == 'linux'",
+  "xformers==v0.0.34; sys_platform == 'linux'",
 ]
 
 # at some point, do per-subpackage dependencies
@@ -204,7 +204,7 @@ environments = [
     "sys_platform == 'linux' and platform_machine == 'aarch64'",
 ]
 
-constraint-dependencies = ["torch==2.9.1", "regex==2025.07.34", "pandas<3"]
+constraint-dependencies = ["torch==2.10.0", "regex==2025.07.34", "pandas<3"]
 
 
 

@@ -137,7 +137,6 @@ def __init__(
             prompt_template=self.model_metadata.prompt_config.template,
         )
         self.llm: vLLM | None = None
-        self.logits_processors = []
 
         # Do not generate detailed error messages in production to avoid leaking sensitive data.
         self.use_detailed_logs = kwargs.pop("use_detailed_logs", False)
@@ -191,7 +190,6 @@ def initialize(self, **kwargs) -> None:
         # vllm requires this "config" to set the backend ahead of time.
         structured_outputs_config = StructuredOutputsConfig(
             backend=self.config.generation.structured_generation_backend,
-            disable_fallback=True,
         )
         # Unsloth patches model attention forward functions with torch.compiler.disable().
         # vLLM compiles TransformersForCausalLM with fullgraph=True via @support_torch_compile.
@@ -222,7 +220,7 @@ def _build_structured_output_params(self) -> StructuredOutputsParams | None:
         if not self.config.generation.use_structured_generation:
             return None
 
-        params: dict[str, Any] = {"disable_fallback": True}
+        params: dict[str, Any] = {}
 
         if self.config.generation.structured_generation_schema_method == "regex":
             logger.info("Structured generation is enabled, using a regex to enforce the schema")
@@ -511,7 +509,6 @@ def generate(
             top_p=self.config.generation.top_p,
             top_k=FIXED_RUNTIME_GENERATE_ARGS["top_k"],
             min_p=FIXED_RUNTIME_GENERATE_ARGS["min_p"],
-            logits_processors=self.logits_processors,
             max_tokens=self.model_metadata.max_seq_length,
             skip_special_tokens=not need_special_token_outputs,
             include_stop_str_in_output=need_special_token_outputs,

@@ -85,7 +85,7 @@ def test_train_and_generate_dp(fixture_financial_transactions_dataset, fixture_s
 
 @pytest.mark.e2e
 @pytest.mark.requires_gpu
-@pytest.mark.timeout(500)
+@pytest.mark.timeout(900)
 @pytest.mark.skipif(sys.platform == "darwin", reason="Not applicable on macOS")
 def test_train_and_generate_defaults(fixture_financial_transactions_dataset, fixture_save_path):
     df = fixture_financial_transactions_dataset