From ac9008f55271e8bfee9f2971b1b8f3b55aaacfd5 Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Sat, 14 Sep 2024 08:49:42 +0100 Subject: [PATCH 01/17] update to latest pytorch/lightning/whatever --- .gitignore | 4 ++ src/python/piper_train/__main__.py | 51 ++++++++++++++- src/python/piper_train/clean_cached_audio.py | 2 +- src/python/piper_train/export_onnx.py | 14 ++-- src/python/piper_train/infer_generator.py | 2 +- src/python/piper_train/norm_audio/__init__.py | 2 +- src/python/piper_train/vits/dataset.py | 4 +- src/python/piper_train/vits/lightning.py | 64 +++++++++++-------- src/python/requirements.txt | 10 +-- 9 files changed, 108 insertions(+), 45 deletions(-) diff --git a/.gitignore b/.gitignore index e382c7e81..5093eac47 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,7 @@ htmlcov .venv/ lightning_logs/ + +dataset +training +last.ckpt \ No newline at end of file diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py index ffb47d447..9cd3b8030 100644 --- a/src/python/piper_train/__main__.py +++ b/src/python/piper_train/__main__.py @@ -37,19 +37,47 @@ def main(): "--resume_from_single_speaker_checkpoint", help="For multi-speaker models only. Converts a single-speaker checkpoint to multi-speaker and resumes training", ) - Trainer.add_argparse_args(parser) VitsModel.add_model_specific_args(parser) + parser.add_argument( + "--accelerator", + type=str, + ) + parser.add_argument( + "--devices", + type=int, + ) + parser.add_argument( + "--log_every_n_steps", + type=int, + ) + parser.add_argument( + "--max_epochs", + type=int, + ) parser.add_argument( "--seed", type=int, default=1234 ) + parser.add_argument( + "--resume_from_checkpoint", + type=str, + ) + parser.add_argument( + "--precision", + type=int, + ) parser.add_argument( "--num_ckpt", type=int, default=1, help="# of ckpts saved." ) + parser.add_argument( + "--default_root_dir", + type=str, + help="Default root dir for checkpoints and logs." + ) parser.add_argument( "--save_last", type=bool, @@ -76,11 +104,28 @@ def main(): num_speakers = int(config["num_speakers"]) sample_rate = int(config["audio"]["sample_rate"]) - trainer = Trainer.from_argparse_args(args) + # List of argument names to include + allowed_args = [ + "accelerator", + "devices", + "log_every_n_steps", + "max_epochs", + "precision", + "default_root_dir", + ] + + # Filter the arguments + filtered_args = {key: value for key, value in vars(args).items() if key in allowed_args} + + # Pass the filtered arguments to Trainer + + trainer = Trainer(**filtered_args) if args.checkpoint_epochs is not None: trainer.callbacks = [ModelCheckpoint( every_n_epochs=args.checkpoint_epochs, save_top_k=args.num_ckpt, + monitor="val_loss", + mode="min", save_last=args.save_last )] _LOGGER.debug( @@ -147,7 +192,7 @@ def main(): "Successfully converted single-speaker checkpoint to multi-speaker" ) - trainer.fit(model) + trainer.fit(model, ckpt_path=args.resume_from_checkpoint) def load_state_dict(model, saved_state_dict): diff --git a/src/python/piper_train/clean_cached_audio.py b/src/python/piper_train/clean_cached_audio.py index 5e3029590..a181ba05d 100644 --- a/src/python/piper_train/clean_cached_audio.py +++ b/src/python/piper_train/clean_cached_audio.py @@ -32,7 +32,7 @@ def check_file(pt_path: Path) -> None: try: _LOGGER.debug("Checking %s", pt_path) - torch.load(str(pt_path)) + torch.load(str(pt_path), weights_only=True) except Exception: _LOGGER.error(pt_path) if args.delete: diff --git a/src/python/piper_train/export_onnx.py b/src/python/piper_train/export_onnx.py index f30957e05..6442a998b 100644 --- a/src/python/piper_train/export_onnx.py +++ b/src/python/piper_train/export_onnx.py @@ -51,7 +51,9 @@ def main() -> None: with torch.no_grad(): model_g.dec.remove_weight_norm() - # old_forward = model_g.infer + # Define the device + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model_g.to(device) def infer_forward(text, text_lengths, scales, sid=None): noise_scale = scales[0] @@ -73,15 +75,15 @@ def infer_forward(text, text_lengths, scales, sid=None): dummy_input_length = 50 sequences = torch.randint( low=0, high=num_symbols, size=(1, dummy_input_length), dtype=torch.long - ) - sequence_lengths = torch.LongTensor([sequences.size(1)]) + ).to(device) + sequence_lengths = torch.LongTensor([sequences.size(1)]).to(device) sid: Optional[torch.LongTensor] = None if num_speakers > 1: - sid = torch.LongTensor([0]) + sid = torch.LongTensor([0]).to(device) # noise, noise_w, length - scales = torch.FloatTensor([0.667, 1.0, 0.8]) + scales = torch.FloatTensor([0.667, 1.0, 0.8]).to(device) dummy_input = (sequences, sequence_lengths, scales, sid) # Export @@ -106,4 +108,4 @@ def infer_forward(text, text_lengths, scales, sid=None): # ----------------------------------------------------------------------------- if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/src/python/piper_train/infer_generator.py b/src/python/piper_train/infer_generator.py index fe4f348a5..057850dd0 100644 --- a/src/python/piper_train/infer_generator.py +++ b/src/python/piper_train/infer_generator.py @@ -26,7 +26,7 @@ def main(): args.output_dir = Path(args.output_dir) args.output_dir.mkdir(parents=True, exist_ok=True) - model = torch.load(args.model) + model = torch.load(args.model, weights_only=True) # Inference only model.eval() diff --git a/src/python/piper_train/norm_audio/__init__.py b/src/python/piper_train/norm_audio/__init__.py index 6637230b3..4b48b499d 100644 --- a/src/python/piper_train/norm_audio/__init__.py +++ b/src/python/piper_train/norm_audio/__init__.py @@ -77,7 +77,7 @@ def cache_norm_audio( if ignore_cache or (not audio_spec_path.exists()): if audio_norm_tensor is None: # Load pre-cached normalized audio - audio_norm_tensor = torch.load(audio_norm_path) + audio_norm_tensor = torch.load(audio_norm_path, weights_only=True) audio_spec_tensor = spectrogram_torch( y=audio_norm_tensor, diff --git a/src/python/piper_train/vits/dataset.py b/src/python/piper_train/vits/dataset.py index 258425f5c..a9b38483e 100644 --- a/src/python/piper_train/vits/dataset.py +++ b/src/python/piper_train/vits/dataset.py @@ -77,8 +77,8 @@ def __getitem__(self, idx) -> UtteranceTensors: utt = self.utterances[idx] return UtteranceTensors( phoneme_ids=LongTensor(utt.phoneme_ids), - audio_norm=torch.load(utt.audio_norm_path), - spectrogram=torch.load(utt.audio_spec_path), + audio_norm=torch.load(utt.audio_norm_path, weights_only=True), + spectrogram=torch.load(utt.audio_spec_path, weights_only=True), speaker_id=LongTensor([utt.speaker_id]) if utt.speaker_id is not None else None, diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index ca8990249..f93495ba8 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -77,6 +77,7 @@ def __init__( ): super().__init__() self.save_hyperparameters() + self.automatic_optimization = False # Disable automatic optimization if (self.hparams.num_speakers > 1) and (self.hparams.gin_channels <= 0): # Default gin_channels for multi-speaker model @@ -186,12 +187,21 @@ def test_dataloader(self): batch_size=self.hparams.batch_size, ) - def training_step(self, batch: Batch, batch_idx: int, optimizer_idx: int): - if optimizer_idx == 0: - return self.training_step_g(batch) + def training_step(self, batch: Batch, batch_idx: int): + # Manually access optimizers + opt_g, opt_d = self.optimizers() - if optimizer_idx == 1: - return self.training_step_d(batch) + # Perform generator step + loss_gen_all = self.training_step_g(batch) + opt_g.zero_grad() + self.manual_backward(loss_gen_all) + opt_g.step() + + # Perform discriminator step + loss_disc_all = self.training_step_d(batch) + opt_d.zero_grad() + self.manual_backward(loss_disc_all) + opt_d.step() def training_step_g(self, batch: Batch): x, x_lengths, y, _, spec, spec_lengths, speaker_ids = ( @@ -283,28 +293,28 @@ def validation_step(self, batch: Batch, batch_idx: int): val_loss = self.training_step_g(batch) + self.training_step_d(batch) self.log("val_loss", val_loss) print(f"Epoch: {self.current_epoch}. Steps: {self.global_step}") - # Generate audio examples - for utt_idx, test_utt in enumerate(self._test_dataset): - text = test_utt.phoneme_ids.unsqueeze(0).to(self.device) - text_lengths = torch.LongTensor([len(test_utt.phoneme_ids)]).to(self.device) - scales = [0.667, 1.0, 0.8] - sid = ( - test_utt.speaker_id.to(self.device) - if test_utt.speaker_id is not None - else None - ) - test_audio = self(text, text_lengths, scales, sid=sid).detach() - - # Scale to make louder in [-1, 1] - test_audio = test_audio * (1.0 / max(0.01, abs(test_audio.max()))) - - tag = test_utt.text or str(utt_idx) - self.logger.experiment.add_audio( - tag, - test_audio, - self.global_step, - sample_rate=self.hparams.sample_rate - ) + # # Generate audio examples + # for utt_idx, test_utt in enumerate(self._test_dataset): + # text = test_utt.phoneme_ids.unsqueeze(0).to(self.device) + # text_lengths = torch.LongTensor([len(test_utt.phoneme_ids)]).to(self.device) + # scales = [0.667, 1.0, 0.8] + # sid = ( + # test_utt.speaker_id.to(self.device) + # if test_utt.speaker_id is not None + # else None + # ) + # test_audio = self(text, text_lengths, scales, sid=sid).detach() + + # # Scale to make louder in [-1, 1] + # test_audio = test_audio * (1.0 / max(0.01, abs(test_audio.max()))) + + # tag = test_utt.text or str(utt_idx) + # self.logger.experiment.add_audio( + # tag, + # test_audio, + # self.global_step, + # sample_rate=self.hparams.sample_rate + # ) return val_loss diff --git a/src/python/requirements.txt b/src/python/requirements.txt index 010c8b3a2..ee8d0a2a9 100644 --- a/src/python/requirements.txt +++ b/src/python/requirements.txt @@ -1,10 +1,12 @@ ---extra-index-url https://download.pytorch.org/whl/cu117 +--extra-index-url https://download.pytorch.org/whl/cu121 cython>=0.29.0,<1 piper-phonemize~=1.1.0 librosa>=0.9.2,<1 numpy==1.24 onnxruntime>=1.11.0 -pytorch-lightning~=1.7.7 -torch==1.13.1+cu117 -torchmetrics==1.1.1 \ No newline at end of file +pytorch-lightning~=2.4.0 +torch==2.4.0+cu121 +torchmetrics==1.4.2 +onnx==1.16.2 +onnxruntime-gpu==1.19.2 \ No newline at end of file From 404b844ce3fa22fa91b735aa337b6548d16f5b79 Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Sat, 14 Sep 2024 21:02:22 +0100 Subject: [PATCH 02/17] add arguments for monitor and monitor mode --- src/python/piper_train/__main__.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py index 9cd3b8030..50162eb85 100644 --- a/src/python/piper_train/__main__.py +++ b/src/python/piper_train/__main__.py @@ -84,6 +84,18 @@ def main(): default=None, help="Always save the last checkpoint." ) + parser.add_argument( + "--monitor", + type=str, + default="val_loss", + help="Metric to monitor." + ) + parser.add_argument( + "--monitor_mode", + type=str, + default="min", + help="Mode to monitor." + ) args = parser.parse_args() _LOGGER.debug(args) @@ -124,8 +136,8 @@ def main(): trainer.callbacks = [ModelCheckpoint( every_n_epochs=args.checkpoint_epochs, save_top_k=args.num_ckpt, - monitor="val_loss", - mode="min", + monitor=args.monitor, + mode=args.monitor_mode, save_last=args.save_last )] _LOGGER.debug( From 91eb9c1382f7e22e8b245393bc5d01d39257d1ec Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Sat, 14 Sep 2024 22:18:10 +0100 Subject: [PATCH 03/17] fix parameter --- src/python/piper_train/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py index 50162eb85..b885c9846 100644 --- a/src/python/piper_train/__main__.py +++ b/src/python/piper_train/__main__.py @@ -65,7 +65,7 @@ def main(): ) parser.add_argument( "--precision", - type=int, + type=str, ) parser.add_argument( "--num_ckpt", From bdd6b0c135040e90758951de01ca9ab0ede80ae8 Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Sun, 15 Sep 2024 19:53:37 +0100 Subject: [PATCH 04/17] early stopping / proper progress bar / dynamic learning rate --- .gitignore | 2 +- src/python/piper_train/__main__.py | 27 +++++++++++++++++++----- src/python/piper_train/vits/config.py | 2 +- src/python/piper_train/vits/lightning.py | 15 ++++++++----- 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 5093eac47..5ee38aff4 100644 --- a/.gitignore +++ b/.gitignore @@ -23,4 +23,4 @@ lightning_logs/ dataset training -last.ckpt \ No newline at end of file +*.ckpt \ No newline at end of file diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py index b885c9846..5f5b226ac 100644 --- a/src/python/piper_train/__main__.py +++ b/src/python/piper_train/__main__.py @@ -5,7 +5,7 @@ import torch from pytorch_lightning import Trainer -from pytorch_lightning.callbacks import ModelCheckpoint +from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, LearningRateMonitor from .vits.lightning import VitsModel @@ -14,6 +14,7 @@ def main(): logging.basicConfig(level=logging.DEBUG) + logging.getLogger("fsspec").setLevel(logging.WARNING) parser = argparse.ArgumentParser() parser.add_argument( @@ -129,17 +130,18 @@ def main(): # Filter the arguments filtered_args = {key: value for key, value in vars(args).items() if key in allowed_args} - # Pass the filtered arguments to Trainer + # Initialize callbacks + callbacks = [] - trainer = Trainer(**filtered_args) if args.checkpoint_epochs is not None: - trainer.callbacks = [ModelCheckpoint( + checkpoint_callback = ModelCheckpoint( every_n_epochs=args.checkpoint_epochs, save_top_k=args.num_ckpt, monitor=args.monitor, mode=args.monitor_mode, save_last=args.save_last - )] + ) + callbacks.append(checkpoint_callback) _LOGGER.debug( "Checkpoints will be saved every %s epoch(s)", args.checkpoint_epochs ) @@ -147,6 +149,21 @@ def main(): "%s Checkpoints will be saved", args.num_ckpt ) + # Early stopping callback + early_stopping_callback = EarlyStopping( + monitor='val_loss', + patience=10, + verbose=True, + mode='min' + ) + callbacks.append(early_stopping_callback) + + # Learning rate monitor callback + lr_monitor_callback = LearningRateMonitor(logging_interval='epoch') + callbacks.append(lr_monitor_callback) + + trainer = Trainer(**filtered_args, callbacks=callbacks) + dict_args = vars(args) if args.quality == "x-low": dict_args["hidden_channels"] = 96 diff --git a/src/python/piper_train/vits/config.py b/src/python/piper_train/vits/config.py index e878f02cd..6de3c8616 100644 --- a/src/python/piper_train/vits/config.py +++ b/src/python/piper_train/vits/config.py @@ -116,7 +116,7 @@ class TrainingConfig: eps: float = 1e-9 # batch_size: int = 32 fp16_run: bool = False - lr_decay: float = 0.999875 + #lr_decay: float = 0.999875 # Disable fixed learning rate decay as it's handled by ReduceLROnPlateau init_lr_ratio: float = 1.0 warmup_epochs: int = 0 c_mel: int = 45 diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index f93495ba8..efa4e190e 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -292,7 +292,7 @@ def training_step_d(self, batch: Batch): def validation_step(self, batch: Batch, batch_idx: int): val_loss = self.training_step_g(batch) + self.training_step_d(batch) self.log("val_loss", val_loss) - print(f"Epoch: {self.current_epoch}. Steps: {self.global_step}") + # # Generate audio examples # for utt_idx, test_utt in enumerate(self._test_dataset): # text = test_utt.phoneme_ids.unsqueeze(0).to(self.device) @@ -316,6 +316,11 @@ def validation_step(self, batch: Batch, batch_idx: int): # sample_rate=self.hparams.sample_rate # ) + # Step the scheduler with the validation loss + scheduler_g, scheduler_d = self.lr_schedulers() + scheduler_g.step(val_loss) + scheduler_d.step(val_loss) + return val_loss def configure_optimizers(self): @@ -334,11 +339,11 @@ def configure_optimizers(self): ), ] schedulers = [ - torch.optim.lr_scheduler.ExponentialLR( - optimizers[0], gamma=self.hparams.lr_decay + torch.optim.lr_scheduler.ReduceLROnPlateau( + optimizers[0], mode='min', factor=0.1, patience=10, verbose=True ), - torch.optim.lr_scheduler.ExponentialLR( - optimizers[1], gamma=self.hparams.lr_decay + torch.optim.lr_scheduler.ReduceLROnPlateau( + optimizers[1], mode='min', factor=0.1, patience=10, verbose=True ), ] From 6455cae1cb4fbf5c8f87554241967c320464a172 Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Sun, 15 Sep 2024 20:16:14 +0100 Subject: [PATCH 05/17] add options and adjust defaults --- src/python/piper_train/__main__.py | 8 +++++++- src/python/piper_train/vits/lightning.py | 10 ++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py index 5f5b226ac..36252b43d 100644 --- a/src/python/piper_train/__main__.py +++ b/src/python/piper_train/__main__.py @@ -97,6 +97,12 @@ def main(): default="min", help="Mode to monitor." ) + parser.add_argument( + "--early_stop_patience", + type=int, + default=20, + help="Early stopping patience." + ) args = parser.parse_args() _LOGGER.debug(args) @@ -152,7 +158,7 @@ def main(): # Early stopping callback early_stopping_callback = EarlyStopping( monitor='val_loss', - patience=10, + patience=args.early_stop_patience, verbose=True, mode='min' ) diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index efa4e190e..cb134b824 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -63,6 +63,8 @@ def __init__( eps: float = 1e-9, batch_size: int = 1, lr_decay: float = 0.999875, + lr_reduce_patience: int = 10, + lr_reduce_factor: float = 0.5, init_lr_ratio: float = 1.0, warmup_epochs: int = 0, c_mel: int = 45, @@ -338,12 +340,13 @@ def configure_optimizers(self): eps=self.hparams.eps, ), ] + print("TESTING", self.hparams.lr_reduce_factor, self.hparams.lr_reduce_patience) schedulers = [ torch.optim.lr_scheduler.ReduceLROnPlateau( - optimizers[0], mode='min', factor=0.1, patience=10, verbose=True + optimizers[0], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience, verbose=True ), torch.optim.lr_scheduler.ReduceLROnPlateau( - optimizers[1], mode='min', factor=0.1, patience=10, verbose=True + optimizers[1], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience, verbose=True ), ] @@ -366,5 +369,8 @@ def add_model_specific_args(parent_parser): parser.add_argument("--filter-channels", type=int, default=768) parser.add_argument("--n-layers", type=int, default=6) parser.add_argument("--n-heads", type=int, default=2) + + parser.add_argument("--lr-reduce-factor", type=float, default=0.5) + parser.add_argument("--lr-reduce-patience", type=int, default=10) # return parent_parser From 08465f5d5ee04c626e5f30ed3ab11ac540ca3518 Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Sun, 15 Sep 2024 21:33:23 +0100 Subject: [PATCH 06/17] improve logging and tensor support --- src/python/piper_train/__main__.py | 11 +++++++++++ src/python/piper_train/vits/lightning.py | 17 ++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py index 36252b43d..0eb6a2227 100644 --- a/src/python/piper_train/__main__.py +++ b/src/python/piper_train/__main__.py @@ -112,6 +112,17 @@ def main(): torch.backends.cudnn.benchmark = True torch.manual_seed(args.seed) + + # Function to check if the GPU supports Tensor Cores + def supports_tensor_cores(): + # Assuming that Tensor Cores are supported if the compute capability is 7.0 or higher + # This is a simplification; you might need a more detailed check based on your specific requirements + return torch.cuda.get_device_capability(0)[0] >= 7 + + # Set the float32 matrix multiplication precision based on GPU support for Tensor Cores + if supports_tensor_cores(): + # Set to 'high' or 'medium' based on your preference + torch.set_float32_matmul_precision('high') config_path = args.dataset_dir / "config.json" dataset_path = args.dataset_dir / "dataset.jsonl" diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index cb134b824..9d5329da6 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -270,7 +270,11 @@ def training_step_g(self, batch: Batch): loss_gen, _losses_gen = generator_loss(y_d_hat_g) loss_gen_all = loss_gen + loss_fm + loss_mel + loss_dur + loss_kl - self.log("loss_gen_all", loss_gen_all) + loss_gen_lr = self.trainer.optimizers[0].param_groups[0]['lr'] + + self.log("gen_loss", loss_gen_all) + self.log("gen_lr", loss_gen_lr) + self.log("step", self.global_step, prog_bar=True) return loss_gen_all @@ -287,12 +291,16 @@ def training_step_d(self, batch: Batch): ) loss_disc_all = loss_disc - self.log("loss_disc_all", loss_disc_all) + loss_disc_lr = self.trainer.optimizers[1].param_groups[0]['lr'] + self.log("disc_loss", loss_disc_all) + self.log("disc_lr", loss_disc_lr) + self.log("step", self.global_step, prog_bar=True) return loss_disc_all def validation_step(self, batch: Batch, batch_idx: int): val_loss = self.training_step_g(batch) + self.training_step_d(batch) + self.log("step", self.global_step, prog_bar=True) self.log("val_loss", val_loss) # # Generate audio examples @@ -340,13 +348,12 @@ def configure_optimizers(self): eps=self.hparams.eps, ), ] - print("TESTING", self.hparams.lr_reduce_factor, self.hparams.lr_reduce_patience) schedulers = [ torch.optim.lr_scheduler.ReduceLROnPlateau( - optimizers[0], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience, verbose=True + optimizers[0], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience ), torch.optim.lr_scheduler.ReduceLROnPlateau( - optimizers[1], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience, verbose=True + optimizers[1], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience ), ] From 105450f84ed62a7a9083ddeacbe76f64c4097d4c Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Sun, 15 Sep 2024 22:29:26 +0100 Subject: [PATCH 07/17] add optional plot with --show-plot --- src/python/piper_train/__main__.py | 2 + src/python/piper_train/vits/lightning.py | 60 ++++++++++++++++++++++++ src/python/requirements.txt | 4 +- 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py index 0eb6a2227..f615f6b2b 100644 --- a/src/python/piper_train/__main__.py +++ b/src/python/piper_train/__main__.py @@ -15,6 +15,8 @@ def main(): logging.basicConfig(level=logging.DEBUG) logging.getLogger("fsspec").setLevel(logging.WARNING) + logging.getLogger("matplotlib").setLevel(logging.WARNING) + logging.getLogger("PIL").setLevel(logging.WARNING) parser = argparse.ArgumentParser() parser.add_argument( diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index 9d5329da6..a6cba70f1 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -7,6 +7,8 @@ from torch import autocast from torch.nn import functional as F from torch.utils.data import DataLoader, Dataset, random_split +import matplotlib.pyplot as plt +from IPython.display import display, clear_output from .commons import slice_segments from .dataset import Batch, PiperDataset, UtteranceCollate @@ -75,6 +77,7 @@ def __init__( num_test_examples: int = 5, validation_split: float = 0.1, max_phoneme_ids: Optional[int] = None, + show_plot = False, **kwargs, ): super().__init__() @@ -121,6 +124,14 @@ def __init__( self._y = None self._y_hat = None + if self.hparams.show_plot: + # Initialize plot + self.fig, self.ax = plt.subplots() + self.gen_losses = [] + self.disc_losses = [] + self.val_losses = [] + self.epochs = [] + def _load_datasets( self, validation_split: float, @@ -205,6 +216,8 @@ def training_step(self, batch: Batch, batch_idx: int): self.manual_backward(loss_disc_all) opt_d.step() + return {"loss_gen": loss_gen_all, "loss_disc": loss_disc_all} + def training_step_g(self, batch: Batch): x, x_lengths, y, _, spec, spec_lengths, speaker_ids = ( batch.phoneme_ids, @@ -300,6 +313,7 @@ def training_step_d(self, batch: Batch): def validation_step(self, batch: Batch, batch_idx: int): val_loss = self.training_step_g(batch) + self.training_step_d(batch) + self.log("step", self.global_step, prog_bar=True) self.log("val_loss", val_loss) @@ -332,6 +346,31 @@ def validation_step(self, batch: Batch, batch_idx: int): scheduler_d.step(val_loss) return val_loss + + def on_train_epoch_end(self): + if not self.hparams.show_plot: + return + + avg_gen_loss = self.trainer.callback_metrics.get("gen_loss") + avg_disc_loss = self.trainer.callback_metrics.get("disc_loss") + + avg_gen_loss_cpu = avg_gen_loss.detach().cpu() if avg_gen_loss.is_cuda else avg_gen_loss.detach() + self.gen_losses.append(avg_gen_loss_cpu) + + avg_disc_loss_cpu = avg_disc_loss.detach().cpu() if avg_disc_loss.is_cuda else avg_disc_loss.detach() + self.disc_losses.append(avg_disc_loss_cpu) + + # Capture validation loss + val_loss = self.trainer.callback_metrics.get("val_loss") + if val_loss is not None: + val_loss_cpu = val_loss.detach().cpu() if val_loss.is_cuda else val_loss.detach() + self.val_losses.append(val_loss_cpu) + + # Update epochs for plot + self.epochs.append(self.current_epoch) + + # Update plot + self.update_plot() def configure_optimizers(self): optimizers = [ @@ -359,6 +398,25 @@ def configure_optimizers(self): return optimizers, schedulers + def update_plot(self): + if not self.hparams.show_plot: + raise ValueError("show_plot is not enabled") + self.ax.clear() + + self.ax.plot(self.epochs, self.gen_losses, label='Generator Loss') + self.ax.plot(self.epochs, self.disc_losses, label='Discriminator Loss') + self.ax.plot(self.epochs, self.val_losses, label='Validation Loss') + self.ax.set_xlabel('Epoch') + self.ax.set_ylabel('Loss') + self.ax.legend() + title = F'Training Progress - Epoch: {self.current_epoch}' + self.ax.set_title(title) + self.ax.get_figure().canvas.manager.set_window_title(title) + self.ax.grid(True) + plt.draw() + clear_output(wait=True) + plt.pause(0.01) + @staticmethod def add_model_specific_args(parent_parser): parser = parent_parser.add_argument_group("VitsModel") @@ -379,5 +437,7 @@ def add_model_specific_args(parent_parser): parser.add_argument("--lr-reduce-factor", type=float, default=0.5) parser.add_argument("--lr-reduce-patience", type=int, default=10) + + parser.add_argument("--show-plot", type=bool, default=False) # return parent_parser diff --git a/src/python/requirements.txt b/src/python/requirements.txt index ee8d0a2a9..8fdb5280f 100644 --- a/src/python/requirements.txt +++ b/src/python/requirements.txt @@ -9,4 +9,6 @@ pytorch-lightning~=2.4.0 torch==2.4.0+cu121 torchmetrics==1.4.2 onnx==1.16.2 -onnxruntime-gpu==1.19.2 \ No newline at end of file +onnxruntime-gpu==1.19.2 +matplotlib==3.9.2 +ipython==8.27.0 \ No newline at end of file From 35b245dc976937297a38a7a5230c90d635b150ff Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Sun, 15 Sep 2024 23:00:01 +0100 Subject: [PATCH 08/17] allow saving plot to file as well as / instead of drawing it to screen --- src/python/piper_train/vits/lightning.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index a6cba70f1..fead8a945 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -8,7 +8,6 @@ from torch.nn import functional as F from torch.utils.data import DataLoader, Dataset, random_split import matplotlib.pyplot as plt -from IPython.display import display, clear_output from .commons import slice_segments from .dataset import Batch, PiperDataset, UtteranceCollate @@ -124,7 +123,7 @@ def __init__( self._y = None self._y_hat = None - if self.hparams.show_plot: + if self.hparams.show_plot or self.hparams.plot_save_path: # Initialize plot self.fig, self.ax = plt.subplots() self.gen_losses = [] @@ -348,7 +347,7 @@ def validation_step(self, batch: Batch, batch_idx: int): return val_loss def on_train_epoch_end(self): - if not self.hparams.show_plot: + if not self.hparams.show_plot and not self.hparams.plot_save_path: return avg_gen_loss = self.trainer.callback_metrics.get("gen_loss") @@ -399,8 +398,8 @@ def configure_optimizers(self): return optimizers, schedulers def update_plot(self): - if not self.hparams.show_plot: - raise ValueError("show_plot is not enabled") + if not self.hparams.show_plot and not self.hparams.plot_save_path: + raise ValueError("show_plot or plot_save_path must be set to update plot") self.ax.clear() self.ax.plot(self.epochs, self.gen_losses, label='Generator Loss') @@ -413,9 +412,13 @@ def update_plot(self): self.ax.set_title(title) self.ax.get_figure().canvas.manager.set_window_title(title) self.ax.grid(True) - plt.draw() - clear_output(wait=True) - plt.pause(0.01) + + if self.hparams.show_plot: + plt.draw() + plt.pause(0.01) + + if self.hparams.plot_save_path: + self.ax.get_figure().savefig(self.hparams.plot_save_path) @staticmethod def add_model_specific_args(parent_parser): @@ -439,5 +442,6 @@ def add_model_specific_args(parent_parser): parser.add_argument("--lr-reduce-patience", type=int, default=10) parser.add_argument("--show-plot", type=bool, default=False) - # + parser.add_argument("--plot-save-path", type=str, default="plot.png") + return parent_parser From a58d09e76650903345be93ebe86dc2b8ed489f1b Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Sun, 15 Sep 2024 23:04:35 +0100 Subject: [PATCH 09/17] remove requirement --- src/python/piper_train/vits/lightning.py | 2 +- src/python/requirements.txt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index fead8a945..726ffd6d0 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -442,6 +442,6 @@ def add_model_specific_args(parent_parser): parser.add_argument("--lr-reduce-patience", type=int, default=10) parser.add_argument("--show-plot", type=bool, default=False) - parser.add_argument("--plot-save-path", type=str, default="plot.png") + parser.add_argument("--plot-save-path", type=str, default=None) return parent_parser diff --git a/src/python/requirements.txt b/src/python/requirements.txt index 8fdb5280f..6bf7ffc7c 100644 --- a/src/python/requirements.txt +++ b/src/python/requirements.txt @@ -10,5 +10,4 @@ torch==2.4.0+cu121 torchmetrics==1.4.2 onnx==1.16.2 onnxruntime-gpu==1.19.2 -matplotlib==3.9.2 -ipython==8.27.0 \ No newline at end of file +matplotlib==3.9.2 \ No newline at end of file From a1585926e35dd182c36bb7abb6b95c967655f7c4 Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Mon, 16 Sep 2024 01:12:39 +0100 Subject: [PATCH 10/17] fix setting learning rate on existing models with new override parameter and add weight decay option --- src/python/piper_train/vits/lightning.py | 34 ++++++++++++++++++------ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index 726ffd6d0..a67e1368d 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -60,6 +60,8 @@ def __init__( # training dataset: Optional[List[Union[str, Path]]] = None, learning_rate: float = 2e-4, + override_learning_rate: bool = False, + weight_decay: float = 1e-6, betas: Tuple[float, float] = (0.8, 0.99), eps: float = 1e-9, batch_size: int = 1, @@ -203,6 +205,16 @@ def training_step(self, batch: Batch, batch_idx: int): # Manually access optimizers opt_g, opt_d = self.optimizers() + if self.first_epoch: + if self.hparams.override_learning_rate: + _LOGGER.info("First epoch, overriding learning rate to %f", self.hparams.learning_rate) + for param_group in opt_g.param_groups: + param_group['lr'] = self.hparams.learning_rate + for param_group in opt_d.param_groups: + param_group['lr'] = self.hparams.learning_rate + self.first_epoch = False + + # Perform generator step loss_gen_all = self.training_step_g(batch) opt_g.zero_grad() @@ -215,6 +227,11 @@ def training_step(self, batch: Batch, batch_idx: int): self.manual_backward(loss_disc_all) opt_d.step() + # Log learning rates + self.log("gen_lr", opt_g.param_groups[0]['lr']) + self.log("disc_lr", opt_d.param_groups[0]['lr']) + self.log("step", self.global_step, prog_bar=True) + return {"loss_gen": loss_gen_all, "loss_disc": loss_disc_all} def training_step_g(self, batch: Batch): @@ -282,11 +299,7 @@ def training_step_g(self, batch: Batch): loss_gen, _losses_gen = generator_loss(y_d_hat_g) loss_gen_all = loss_gen + loss_fm + loss_mel + loss_dur + loss_kl - loss_gen_lr = self.trainer.optimizers[0].param_groups[0]['lr'] - self.log("gen_loss", loss_gen_all) - self.log("gen_lr", loss_gen_lr) - self.log("step", self.global_step, prog_bar=True) return loss_gen_all @@ -303,17 +316,13 @@ def training_step_d(self, batch: Batch): ) loss_disc_all = loss_disc - loss_disc_lr = self.trainer.optimizers[1].param_groups[0]['lr'] self.log("disc_loss", loss_disc_all) - self.log("disc_lr", loss_disc_lr) - self.log("step", self.global_step, prog_bar=True) return loss_disc_all def validation_step(self, batch: Batch, batch_idx: int): val_loss = self.training_step_g(batch) + self.training_step_d(batch) - self.log("step", self.global_step, prog_bar=True) self.log("val_loss", val_loss) # # Generate audio examples @@ -371,6 +380,9 @@ def on_train_epoch_end(self): # Update plot self.update_plot() + def on_train_start(self): + self.first_epoch = True + def configure_optimizers(self): optimizers = [ torch.optim.AdamW( @@ -378,12 +390,14 @@ def configure_optimizers(self): lr=self.hparams.learning_rate, betas=self.hparams.betas, eps=self.hparams.eps, + weight_decay=self.hparams.weight_decay, ), torch.optim.AdamW( self.model_d.parameters(), lr=self.hparams.learning_rate, betas=self.hparams.betas, eps=self.hparams.eps, + weight_decay=self.hparams.weight_decay, ), ] schedulers = [ @@ -444,4 +458,8 @@ def add_model_specific_args(parent_parser): parser.add_argument("--show-plot", type=bool, default=False) parser.add_argument("--plot-save-path", type=str, default=None) + parser.add_argument("--learning-rate", type=float, default=2e-4) + parser.add_argument("--weight-decay", type=float, default=1e-6) + parser.add_argument("--override-learning-rate", type=bool, default=False) + return parent_parser From 2ec1ca0f6f8efdfc019b2e4ddc2be991858b0208 Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Mon, 16 Sep 2024 01:37:36 +0100 Subject: [PATCH 11/17] graph learning rates --- src/python/piper_train/vits/lightning.py | 35 +++++++++++++++++++----- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index a67e1368d..35b465d35 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -132,6 +132,8 @@ def __init__( self.disc_losses = [] self.val_losses = [] self.epochs = [] + self.gen_lrs = [] + self.disc_lrs = [] def _load_datasets( self, @@ -370,9 +372,19 @@ def on_train_epoch_end(self): # Capture validation loss val_loss = self.trainer.callback_metrics.get("val_loss") - if val_loss is not None: - val_loss_cpu = val_loss.detach().cpu() if val_loss.is_cuda else val_loss.detach() - self.val_losses.append(val_loss_cpu) + val_loss_cpu = val_loss.detach().cpu() if val_loss.is_cuda else val_loss.detach() + self.val_losses.append(val_loss_cpu) + + + # Capture learning rate + gen_lr = self.trainer.callback_metrics.get("gen_lr") + disc_lr = self.trainer.callback_metrics.get("disc_lr") + + gen_lr_cpu = gen_lr.detach().cpu() if gen_lr.is_cuda else gen_lr.detach() + disc_lr_cpu = disc_lr.detach().cpu() if disc_lr.is_cuda else disc_lr.detach() + + self.gen_lrs.append(gen_lr_cpu) + self.disc_lrs.append(disc_lr_cpu) # Update epochs for plot self.epochs.append(self.current_epoch) @@ -416,12 +428,21 @@ def update_plot(self): raise ValueError("show_plot or plot_save_path must be set to update plot") self.ax.clear() - self.ax.plot(self.epochs, self.gen_losses, label='Generator Loss') - self.ax.plot(self.epochs, self.disc_losses, label='Discriminator Loss') - self.ax.plot(self.epochs, self.val_losses, label='Validation Loss') + self.ax.plot(self.epochs, self.gen_losses, label='Generator Loss', color='tab:blue') + self.ax.plot(self.epochs, self.disc_losses, label='Discriminator Loss', color='tab:orange') + self.ax.plot(self.epochs, self.val_losses, label='Validation Loss', color='tab:green') self.ax.set_xlabel('Epoch') self.ax.set_ylabel('Loss') - self.ax.legend() + self.ax.legend(loc='upper left') + + # Create a secondary y-axis for the learning rate + ax2 = self.ax.twinx() + ax2.plot(self.epochs, self.gen_lrs, label='Generator Learning Rate', color='tab:red') + ax2.plot(self.epochs, self.disc_lrs, label='Discriminator Learning Rate', color='tab:purple') + ax2.set_xlabel('Epoch') + ax2.set_ylabel('Learning Rate') + ax2.legend(loc='upper right') + title = F'Training Progress - Epoch: {self.current_epoch}' self.ax.set_title(title) self.ax.get_figure().canvas.manager.set_window_title(title) From 77c876bff16ef0519ecb421793c11b0bc74cd3b0 Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Mon, 16 Sep 2024 02:39:18 +0100 Subject: [PATCH 12/17] hopefully fix graph --- src/python/piper_train/vits/lightning.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index 35b465d35..5ed8d2e1f 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -128,6 +128,7 @@ def __init__( if self.hparams.show_plot or self.hparams.plot_save_path: # Initialize plot self.fig, self.ax = plt.subplots() + self.ax2 = None self.gen_losses = [] self.disc_losses = [] self.val_losses = [] @@ -424,8 +425,6 @@ def configure_optimizers(self): return optimizers, schedulers def update_plot(self): - if not self.hparams.show_plot and not self.hparams.plot_save_path: - raise ValueError("show_plot or plot_save_path must be set to update plot") self.ax.clear() self.ax.plot(self.epochs, self.gen_losses, label='Generator Loss', color='tab:blue') @@ -433,15 +432,18 @@ def update_plot(self): self.ax.plot(self.epochs, self.val_losses, label='Validation Loss', color='tab:green') self.ax.set_xlabel('Epoch') self.ax.set_ylabel('Loss') - self.ax.legend(loc='upper left') # Create a secondary y-axis for the learning rate - ax2 = self.ax.twinx() - ax2.plot(self.epochs, self.gen_lrs, label='Generator Learning Rate', color='tab:red') - ax2.plot(self.epochs, self.disc_lrs, label='Discriminator Learning Rate', color='tab:purple') - ax2.set_xlabel('Epoch') - ax2.set_ylabel('Learning Rate') - ax2.legend(loc='upper right') + if self.ax2 is not None: + self.ax2.clear() + self.ax2 = self.ax.twinx() + self.ax2.plot(self.epochs, self.gen_lrs, label='Generator Learning Rate', color='tab:red') + self.ax2.plot(self.epochs, self.disc_lrs, label='Discriminator Learning Rate', color='tab:purple') + self.ax2.set_xlabel('Epoch') + self.ax2.set_ylabel('Learning Rate') + + self.ax.legend(loc='upper left') + self.ax2.legend(loc='upper right') title = F'Training Progress - Epoch: {self.current_epoch}' self.ax.set_title(title) From c9720ea74ce73f91d49b95e1b59decc537389e90 Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Mon, 16 Sep 2024 02:59:53 +0100 Subject: [PATCH 13/17] add support for gradient clipping (probably) --- src/python/piper_train/vits/lightning.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index 5ed8d2e1f..981f492d4 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -222,12 +222,22 @@ def training_step(self, batch: Batch, batch_idx: int): loss_gen_all = self.training_step_g(batch) opt_g.zero_grad() self.manual_backward(loss_gen_all) + + # Gradient clipping for generator + if self.hparams.grad_clip is not None: + torch.nn.utils.clip_grad_norm_(self.model_g.parameters(), self.hparams.grad_clip) + opt_g.step() # Perform discriminator step loss_disc_all = self.training_step_d(batch) opt_d.zero_grad() self.manual_backward(loss_disc_all) + + # Gradient clipping for discriminator + if self.hparams.grad_clip is not None: + torch.nn.utils.clip_grad_norm_(self.model_d.parameters(), self.hparams.grad_clip) + opt_d.step() # Log learning rates @@ -484,5 +494,6 @@ def add_model_specific_args(parent_parser): parser.add_argument("--learning-rate", type=float, default=2e-4) parser.add_argument("--weight-decay", type=float, default=1e-6) parser.add_argument("--override-learning-rate", type=bool, default=False) + parser.add_argument("--grad-clip", type=float, default=None) return parent_parser From afbdbcc601913a056c037e13c0fd0b5ea345a985 Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Mon, 16 Sep 2024 19:28:52 +0100 Subject: [PATCH 14/17] add option for reduceLRonplateau, set other params to defaults as before --- src/python/piper_train/__main__.py | 19 +++++++------ src/python/piper_train/vits/config.py | 2 +- src/python/piper_train/vits/lightning.py | 35 +++++++++++++++++------- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py index f615f6b2b..5d5f522a2 100644 --- a/src/python/piper_train/__main__.py +++ b/src/python/piper_train/__main__.py @@ -102,7 +102,7 @@ def main(): parser.add_argument( "--early_stop_patience", type=int, - default=20, + default=0, help="Early stopping patience." ) args = parser.parse_args() @@ -168,14 +168,15 @@ def supports_tensor_cores(): "%s Checkpoints will be saved", args.num_ckpt ) - # Early stopping callback - early_stopping_callback = EarlyStopping( - monitor='val_loss', - patience=args.early_stop_patience, - verbose=True, - mode='min' - ) - callbacks.append(early_stopping_callback) + if args.early_stop_patience > 0: + # Early stopping callback + early_stopping_callback = EarlyStopping( + monitor='val_loss', + patience=args.early_stop_patience, + verbose=True, + mode='min' + ) + callbacks.append(early_stopping_callback) # Learning rate monitor callback lr_monitor_callback = LearningRateMonitor(logging_interval='epoch') diff --git a/src/python/piper_train/vits/config.py b/src/python/piper_train/vits/config.py index 6de3c8616..e878f02cd 100644 --- a/src/python/piper_train/vits/config.py +++ b/src/python/piper_train/vits/config.py @@ -116,7 +116,7 @@ class TrainingConfig: eps: float = 1e-9 # batch_size: int = 32 fp16_run: bool = False - #lr_decay: float = 0.999875 # Disable fixed learning rate decay as it's handled by ReduceLROnPlateau + lr_decay: float = 0.999875 init_lr_ratio: float = 1.0 warmup_epochs: int = 0 c_mel: int = 45 diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index 981f492d4..f9db89f46 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -61,11 +61,12 @@ def __init__( dataset: Optional[List[Union[str, Path]]] = None, learning_rate: float = 2e-4, override_learning_rate: bool = False, - weight_decay: float = 1e-6, + weight_decay: float = 1e-2, betas: Tuple[float, float] = (0.8, 0.99), eps: float = 1e-9, batch_size: int = 1, lr_decay: float = 0.999875, + lr_reduce_enabled: bool = False, lr_reduce_patience: int = 10, lr_reduce_factor: float = 0.5, init_lr_ratio: float = 1.0, @@ -423,14 +424,26 @@ def configure_optimizers(self): weight_decay=self.hparams.weight_decay, ), ] - schedulers = [ - torch.optim.lr_scheduler.ReduceLROnPlateau( - optimizers[0], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience - ), - torch.optim.lr_scheduler.ReduceLROnPlateau( - optimizers[1], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience - ), - ] + + + if self.hparams.lr_reduce_enabled: + schedulers = [ + torch.optim.lr_scheduler.ReduceLROnPlateau( + optimizers[0], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience + ), + torch.optim.lr_scheduler.ReduceLROnPlateau( + optimizers[1], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience + ), + ] + else: + schedulers = [ + torch.optim.lr_scheduler.ExponentialLR( + optimizers[0], gamma=self.hparams.lr_decay + ), + torch.optim.lr_scheduler.ExponentialLR( + optimizers[1], gamma=self.hparams.lr_decay + ) + ] return optimizers, schedulers @@ -485,6 +498,8 @@ def add_model_specific_args(parent_parser): parser.add_argument("--n-layers", type=int, default=6) parser.add_argument("--n-heads", type=int, default=2) + parser.add_argument("--lr-decay", type=float, default=0.999875) + parser.add_argument("--lr-reduce-enabled", type=bool, default=False) parser.add_argument("--lr-reduce-factor", type=float, default=0.5) parser.add_argument("--lr-reduce-patience", type=int, default=10) @@ -492,7 +507,7 @@ def add_model_specific_args(parent_parser): parser.add_argument("--plot-save-path", type=str, default=None) parser.add_argument("--learning-rate", type=float, default=2e-4) - parser.add_argument("--weight-decay", type=float, default=1e-6) + parser.add_argument("--weight-decay", type=float, default=1e-2) parser.add_argument("--override-learning-rate", type=bool, default=False) parser.add_argument("--grad-clip", type=float, default=None) From beded97b704f4c67fd8414563b5a11bf1f391ae2 Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Mon, 16 Sep 2024 19:34:30 +0100 Subject: [PATCH 15/17] minor fix --- src/python/piper_train/vits/lightning.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py index f9db89f46..232732e1e 100644 --- a/src/python/piper_train/vits/lightning.py +++ b/src/python/piper_train/vits/lightning.py @@ -362,10 +362,11 @@ def validation_step(self, batch: Batch, batch_idx: int): # sample_rate=self.hparams.sample_rate # ) - # Step the scheduler with the validation loss - scheduler_g, scheduler_d = self.lr_schedulers() - scheduler_g.step(val_loss) - scheduler_d.step(val_loss) + if self.hparams.lr_reduce_enabled: + # Step the scheduler with the validation loss + scheduler_g, scheduler_d = self.lr_schedulers() + scheduler_g.step(val_loss) + scheduler_d.step(val_loss) return val_loss From 9f752ee4f50992523a653dddcf22c04c393da7fb Mon Sep 17 00:00:00 2001 From: Matt Jeanes Date: Tue, 17 Sep 2024 20:30:47 +0100 Subject: [PATCH 16/17] add random seed function --- src/python/piper_train/__main__.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py index 5d5f522a2..f00db9434 100644 --- a/src/python/piper_train/__main__.py +++ b/src/python/piper_train/__main__.py @@ -62,6 +62,11 @@ def main(): type=int, default=1234 ) + parser.add_argument( + "--random_seed", + type=bool, + default=False + ) parser.add_argument( "--resume_from_checkpoint", type=str, @@ -113,7 +118,13 @@ def main(): args.default_root_dir = args.dataset_dir torch.backends.cudnn.benchmark = True - torch.manual_seed(args.seed) + + if args.random_seed: + seed = torch.seed() + _LOGGER.debug("Using random seed: %s", seed) + else: + torch.manual_seed(args.seed) + _LOGGER.debug("Using manual seed: %s", args.seed) # Function to check if the GPU supports Tensor Cores def supports_tensor_cores(): From 5afcab05631ab5894eac1cae578e4c593f8ac051 Mon Sep 17 00:00:00 2001 From: Mateo Cedillo <54605382+rmcpantoja@users.noreply.github.com> Date: Fri, 13 Dec 2024 07:16:35 -0500 Subject: [PATCH 17/17] HiFi-GAN parametrization according to newer torch. --- src/python/piper_train/vits/models.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/python/piper_train/vits/models.py b/src/python/piper_train/vits/models.py index 68ef7ba59..18bf65d6c 100644 --- a/src/python/piper_train/vits/models.py +++ b/src/python/piper_train/vits/models.py @@ -5,8 +5,9 @@ from torch import nn from torch.nn import Conv1d, Conv2d, ConvTranspose1d from torch.nn import functional as F -from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm - +from torch.nn.utils import spectral_norm +from torch.nn.utils.parametrizations import weight_norm +from torch.nn.utils.parametrize import remove_parametrizations from . import attentions, commons, modules, monotonic_align from .commons import get_padding, init_weights @@ -370,9 +371,9 @@ def forward(self, x, g=None): def remove_weight_norm(self): print("Removing weight norm...") for l in self.ups: - remove_weight_norm(l) + remove_parametrizations(l) for l in self.resblocks: - l.remove_weight_norm() + l.remove_parametrizations() class DiscriminatorP(torch.nn.Module):