NVIDIA-NeMo · blisc · May 28, 2025 · May 21, 2025 · May 26, 2025 · May 26, 2025
diff --git a/nemo/collections/tts/models/magpietts_preference_optimization.py b/nemo/collections/tts/models/magpietts_preference_optimization.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import copy
 import json
 import os
@@ -31,7 +44,7 @@ class MagpieTTSModelOfflinePODataGen(MagpieTTSModel):
     This class is used in 'test' mode and leverages trainer.test() for multi-GPU/multi-node inference.
     Saves the predicted audio files and logs the CER/WER metrics as individual json files for each audio.
     """
-    
+
     def __init__(self, cfg: DictConfig, trainer: 'Trainer' = None):
         super().__init__(cfg, trainer)
         if cfg.get('pref_set_language', "en") == "en":
@@ -143,8 +156,8 @@ def test_step(self, batch, batch_idx):
 
 class MagpieTTSModelOfflinePO(MagpieTTSModel):
     """
-    MagpieTTS_Model_OfflinePO is a class that extends MagpieTTS_Model to support 
-    offline preference optimization (DPO, IPO, RPO). 
+    MagpieTTS_Model_OfflinePO is a class that extends MagpieTTS_Model to support
+    offline preference optimization (DPO, IPO, RPO).
     Set cfg.model.dpo_loss_type to 'dpo', 'ipo', or 'rpo' to use the corresponding loss.
     """
     def __init__(self, cfg: DictConfig, trainer: 'Trainer' = None):

diff --git a/scripts/magpietts/codec_extraction.py b/scripts/magpietts/codec_extraction.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import json
 import torch
 from torch.utils.data import Dataset, DataLoader

diff --git a/scripts/magpietts/eval_squimmos.py b/scripts/magpietts/eval_squimmos.py
@@ -1,4 +1,16 @@
-from torchaudio.pipelines import SQUIM_OBJECTIVE, SQUIM_SUBJECTIVE
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.from torchaudio.pipelines import SQUIM_OBJECTIVE, SQUIM_SUBJECTIVE
 import os
 import json
 import torch
@@ -22,7 +34,7 @@ def compute_mean_and_confidence_interval(measurements, confidence=0.95):
     std_err = stats.sem(measurements)
 
     confidence_interval = std_err * stats.t.ppf((1 + confidence) / 2, len(measurements) - 1)
-    
+
     return "{:.4f} +/- {:.4f}".format(mean, confidence_interval), mean, confidence_interval
 
 def main():
@@ -54,7 +66,7 @@ def main():
             with torch.no_grad():
                 squm_mos_score = squim_mos_model(pred_wav, gt_wav)
                 squim_score_list.append(squm_mos_score.item())
-        
+
         mean_with_ci, mean, confidence_interval = compute_mean_and_confidence_interval(squim_score_list)
         # Add to audio_dir,mean_with_ci to csv
         with open(out_file, "a") as f:

diff --git a/scripts/magpietts/evalset_config.py b/scripts/magpietts/evalset_config.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 dataset_meta_info = {
     'vctk': {
         'manifest_path' : '/home/pneekhara/2023/SimpleT5NeMo/manifests/smallvctk__phoneme__nemo_audio_21fps_8codebooks_2kcodes_v2bWithWavLM_simplet5_withcontextaudiopaths.json',
@@ -29,12 +42,12 @@
         'manifest_path' : '/datap/misc/speechllm_codecdatasets/manifests/t5_exp/dev_clean_withContextAudioPaths_withTargetCodes_evalset_mid.json',
         'audio_dir' : '/datap/misc/Datasets/LibriTTS',
         'feature_dir' : '/datap/misc/Datasets/LibriTTS',
-    },    
+    },
     'libri_dev_clean_eval_tiny': {
         'manifest_path' : '/datap/misc/speechllm_codecdatasets/manifests/t5_exp/dev_clean_withContextAudioPaths_withTargetCodes_evalset_tiny.json',
         'audio_dir' : '/datap/misc/Datasets/LibriTTS',
         'feature_dir' : '/datap/misc/Datasets/LibriTTS',
-    },     
+    },
     'libri_val': {
         'manifest_path' : '/home/pneekhara/2023/SimpleT5NeMo/manifests/libri360_val.json',
         'audio_dir' : '/datap/misc/LibriTTSfromNemo/LibriTTS',

diff --git a/scripts/magpietts/evaluate_generated_audio.py b/scripts/magpietts/evaluate_generated_audio.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 import json
 import os

diff --git a/scripts/magpietts/infer_and_evaluate.py b/scripts/magpietts/infer_and_evaluate.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 import copy
 import glob
@@ -411,7 +424,7 @@ def main():
             start_prior_after_n_audio_steps=args.start_prior_after_n_audio_steps,
             confidence_level=args.confidence_level,
             use_local_transformer=args.use_local_transformer,
-            maskgit_n_steps=args.maskgit_n_steps,            
+            maskgit_n_steps=args.maskgit_n_steps,
             legacy_codebooks=args.legacy_codebooks
         )
     else:

diff --git a/scripts/tts_dataset_to_lhotse/create_shars.py b/scripts/tts_dataset_to_lhotse/create_shars.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from pathlib import Path
 import json
 import os

diff --git a/tests/collections/tts/modules/test_audio_codec_modules.py b/tests/collections/tts/modules/test_audio_codec_modules.py
@@ -240,6 +240,7 @@ def test_rvq_eval(self, num_codebooks: int):
             torch.testing.assert_close(indices_enc, indices_fw, msg=f'example {i}: indices mismatch')
             torch.testing.assert_close(dequantized_dec, dequantized_fw, msg=f'example {i}: dequantized mismatch')
 
+    @pytest.mark.pleasefixme
     @pytest.mark.unit
     @pytest.mark.parametrize('num_groups', [1, 2, 4])
     @pytest.mark.parametrize('num_codebooks', [1, 4])

diff --git a/tests/functional_tests/L2_TTS_Fast_dev_runs_Magpietts_config1.sh b/tests/functional_tests/L2_TTS_Fast_dev_runs_Magpietts_config1.sh
@@ -0,0 +1,33 @@
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+coverage run --branch -a --data-file=/workspace/.coverage --source=/workspace/nemo examples/tts/magpietts.py \
+    --config-name magpietts_dc_en \
+    +train_ds_meta.an4.manifest_path="/home/TestData/an4_dataset/an4_train.json" \
+    +train_ds_meta.an4.audio_dir="/" \
+    +train_ds_meta.an4.tokenizer_names="[english_phoneme]" \
+    +train_ds_meta.an4.feature_dir=null \
+    +val_ds_meta.an4.manifest_path="/home/TestData/an4_dataset/an4_val.json" \
+    +val_ds_meta.an4.audio_dir="/" \
+    +val_ds_meta.an4.tokenizer_names="[english_phoneme]" \
+    +val_ds_meta.an4.feature_dir=null \
+    max_epochs=1 \
+    batch_size=4 \
+    model.codecmodel_path="/home/TestData/tts/21fps_causal_codecmodel.nemo" \
+    trainer.devices="[0]" \
+    +trainer.limit_train_batches=1 \
+    +trainer.limit_val_batches=1 \
+    trainer.strategy=auto \
+    model.train_ds.dataloader_params.num_workers=0 \
+    model.validation_ds.dataloader_params.num_workers=0 \
+    ~trainer.check_val_every_n_epoch