diff --git a/scripts/performance/perf_plugins.py b/scripts/performance/perf_plugins.py index fcf5b2dfd2..05a6d86416 100644 --- a/scripts/performance/perf_plugins.py +++ b/scripts/performance/perf_plugins.py @@ -275,6 +275,9 @@ def _set_model_specific_environment_variables( ): executor.env_vars["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" + if model_family_name in ["deepseek"]: + executor.env_vars["NVTE_ALLOW_NONDETERMINISTIC_ALGO"] = "0" + del_cudnn_ln = True if gpu in ["h100"]: if model_family_name == "llama" and model_recipe_name == "llama3_8b" and train_task == "pretrain":