From ac9008f55271e8bfee9f2971b1b8f3b55aaacfd5 Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Sat, 14 Sep 2024 08:49:42 +0100
Subject: [PATCH 01/17] update to latest pytorch/lightning/whatever

---
 .gitignore                                    |  4 ++
 src/python/piper_train/__main__.py            | 51 ++++++++++++++-
 src/python/piper_train/clean_cached_audio.py  |  2 +-
 src/python/piper_train/export_onnx.py         | 14 ++--
 src/python/piper_train/infer_generator.py     |  2 +-
 src/python/piper_train/norm_audio/__init__.py |  2 +-
 src/python/piper_train/vits/dataset.py        |  4 +-
 src/python/piper_train/vits/lightning.py      | 64 +++++++++++--------
 src/python/requirements.txt                   | 10 +--
 9 files changed, 108 insertions(+), 45 deletions(-)

diff --git a/.gitignore b/.gitignore
index e382c7e81..5093eac47 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,7 @@ htmlcov
 
 .venv/
 lightning_logs/
+
+dataset
+training
+last.ckpt
\ No newline at end of file
diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py
index ffb47d447..9cd3b8030 100644
--- a/src/python/piper_train/__main__.py
+++ b/src/python/piper_train/__main__.py
@@ -37,19 +37,47 @@ def main():
         "--resume_from_single_speaker_checkpoint",
         help="For multi-speaker models only. Converts a single-speaker checkpoint to multi-speaker and resumes training",
     )
-    Trainer.add_argparse_args(parser)
     VitsModel.add_model_specific_args(parser)
+    parser.add_argument(
+        "--accelerator",
+        type=str,
+    )
+    parser.add_argument(
+        "--devices",
+        type=int,
+    )
+    parser.add_argument(
+        "--log_every_n_steps",
+        type=int,
+    )
+    parser.add_argument(
+        "--max_epochs",
+        type=int,
+    )
     parser.add_argument(
         "--seed",
         type=int,
         default=1234
     )
+    parser.add_argument(
+        "--resume_from_checkpoint",
+        type=str,
+    )
+    parser.add_argument(
+        "--precision",
+        type=int,
+    )
     parser.add_argument(
         "--num_ckpt",
         type=int,
         default=1,
         help="# of ckpts saved."
     )
+    parser.add_argument(
+        "--default_root_dir",
+        type=str,
+        help="Default root dir for checkpoints and logs."
+    )
     parser.add_argument(
         "--save_last",
         type=bool,
@@ -76,11 +104,28 @@ def main():
         num_speakers = int(config["num_speakers"])
         sample_rate = int(config["audio"]["sample_rate"])
 
-    trainer = Trainer.from_argparse_args(args)
+    # List of argument names to include
+    allowed_args = [
+        "accelerator",
+        "devices",
+        "log_every_n_steps",
+        "max_epochs",
+        "precision",
+        "default_root_dir",
+    ]
+
+    # Filter the arguments
+    filtered_args = {key: value for key, value in vars(args).items() if key in allowed_args}
+
+    # Pass the filtered arguments to Trainer
+
+    trainer = Trainer(**filtered_args)
     if args.checkpoint_epochs is not None:
         trainer.callbacks = [ModelCheckpoint(
             every_n_epochs=args.checkpoint_epochs,
             save_top_k=args.num_ckpt,
+            monitor="val_loss",
+            mode="min",
             save_last=args.save_last
         )]
         _LOGGER.debug(
@@ -147,7 +192,7 @@ def main():
             "Successfully converted single-speaker checkpoint to multi-speaker"
         )
 
-    trainer.fit(model)
+    trainer.fit(model, ckpt_path=args.resume_from_checkpoint)
 
 
 def load_state_dict(model, saved_state_dict):
diff --git a/src/python/piper_train/clean_cached_audio.py b/src/python/piper_train/clean_cached_audio.py
index 5e3029590..a181ba05d 100644
--- a/src/python/piper_train/clean_cached_audio.py
+++ b/src/python/piper_train/clean_cached_audio.py
@@ -32,7 +32,7 @@ def check_file(pt_path: Path) -> None:
 
         try:
             _LOGGER.debug("Checking %s", pt_path)
-            torch.load(str(pt_path))
+            torch.load(str(pt_path), weights_only=True)
         except Exception:
             _LOGGER.error(pt_path)
             if args.delete:
diff --git a/src/python/piper_train/export_onnx.py b/src/python/piper_train/export_onnx.py
index f30957e05..6442a998b 100644
--- a/src/python/piper_train/export_onnx.py
+++ b/src/python/piper_train/export_onnx.py
@@ -51,7 +51,9 @@ def main() -> None:
     with torch.no_grad():
         model_g.dec.remove_weight_norm()
 
-    # old_forward = model_g.infer
+    # Define the device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model_g.to(device)
 
     def infer_forward(text, text_lengths, scales, sid=None):
         noise_scale = scales[0]
@@ -73,15 +75,15 @@ def infer_forward(text, text_lengths, scales, sid=None):
     dummy_input_length = 50
     sequences = torch.randint(
         low=0, high=num_symbols, size=(1, dummy_input_length), dtype=torch.long
-    )
-    sequence_lengths = torch.LongTensor([sequences.size(1)])
+    ).to(device)
+    sequence_lengths = torch.LongTensor([sequences.size(1)]).to(device)
 
     sid: Optional[torch.LongTensor] = None
     if num_speakers > 1:
-        sid = torch.LongTensor([0])
+        sid = torch.LongTensor([0]).to(device)
 
     # noise, noise_w, length
-    scales = torch.FloatTensor([0.667, 1.0, 0.8])
+    scales = torch.FloatTensor([0.667, 1.0, 0.8]).to(device)
     dummy_input = (sequences, sequence_lengths, scales, sid)
 
     # Export
@@ -106,4 +108,4 @@ def infer_forward(text, text_lengths, scales, sid=None):
 # -----------------------------------------------------------------------------
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/src/python/piper_train/infer_generator.py b/src/python/piper_train/infer_generator.py
index fe4f348a5..057850dd0 100644
--- a/src/python/piper_train/infer_generator.py
+++ b/src/python/piper_train/infer_generator.py
@@ -26,7 +26,7 @@ def main():
     args.output_dir = Path(args.output_dir)
     args.output_dir.mkdir(parents=True, exist_ok=True)
 
-    model = torch.load(args.model)
+    model = torch.load(args.model, weights_only=True)
 
     # Inference only
     model.eval()
diff --git a/src/python/piper_train/norm_audio/__init__.py b/src/python/piper_train/norm_audio/__init__.py
index 6637230b3..4b48b499d 100644
--- a/src/python/piper_train/norm_audio/__init__.py
+++ b/src/python/piper_train/norm_audio/__init__.py
@@ -77,7 +77,7 @@ def cache_norm_audio(
     if ignore_cache or (not audio_spec_path.exists()):
         if audio_norm_tensor is None:
             # Load pre-cached normalized audio
-            audio_norm_tensor = torch.load(audio_norm_path)
+            audio_norm_tensor = torch.load(audio_norm_path, weights_only=True)
 
         audio_spec_tensor = spectrogram_torch(
             y=audio_norm_tensor,
diff --git a/src/python/piper_train/vits/dataset.py b/src/python/piper_train/vits/dataset.py
index 258425f5c..a9b38483e 100644
--- a/src/python/piper_train/vits/dataset.py
+++ b/src/python/piper_train/vits/dataset.py
@@ -77,8 +77,8 @@ def __getitem__(self, idx) -> UtteranceTensors:
         utt = self.utterances[idx]
         return UtteranceTensors(
             phoneme_ids=LongTensor(utt.phoneme_ids),
-            audio_norm=torch.load(utt.audio_norm_path),
-            spectrogram=torch.load(utt.audio_spec_path),
+            audio_norm=torch.load(utt.audio_norm_path, weights_only=True),
+            spectrogram=torch.load(utt.audio_spec_path, weights_only=True),
             speaker_id=LongTensor([utt.speaker_id])
             if utt.speaker_id is not None
             else None,
diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index ca8990249..f93495ba8 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -77,6 +77,7 @@ def __init__(
     ):
         super().__init__()
         self.save_hyperparameters()
+        self.automatic_optimization = False  # Disable automatic optimization
 
         if (self.hparams.num_speakers > 1) and (self.hparams.gin_channels <= 0):
             # Default gin_channels for multi-speaker model
@@ -186,12 +187,21 @@ def test_dataloader(self):
             batch_size=self.hparams.batch_size,
         )
 
-    def training_step(self, batch: Batch, batch_idx: int, optimizer_idx: int):
-        if optimizer_idx == 0:
-            return self.training_step_g(batch)
+    def training_step(self, batch: Batch, batch_idx: int):
+        # Manually access optimizers
+        opt_g, opt_d = self.optimizers()
 
-        if optimizer_idx == 1:
-            return self.training_step_d(batch)
+        # Perform generator step
+        loss_gen_all = self.training_step_g(batch)
+        opt_g.zero_grad()
+        self.manual_backward(loss_gen_all)
+        opt_g.step()
+
+        # Perform discriminator step
+        loss_disc_all = self.training_step_d(batch)
+        opt_d.zero_grad()
+        self.manual_backward(loss_disc_all)
+        opt_d.step()
 
     def training_step_g(self, batch: Batch):
         x, x_lengths, y, _, spec, spec_lengths, speaker_ids = (
@@ -283,28 +293,28 @@ def validation_step(self, batch: Batch, batch_idx: int):
         val_loss = self.training_step_g(batch) + self.training_step_d(batch)
         self.log("val_loss", val_loss)
         print(f"Epoch: {self.current_epoch}. Steps: {self.global_step}")
-        # Generate audio examples
-        for utt_idx, test_utt in enumerate(self._test_dataset):
-            text = test_utt.phoneme_ids.unsqueeze(0).to(self.device)
-            text_lengths = torch.LongTensor([len(test_utt.phoneme_ids)]).to(self.device)
-            scales = [0.667, 1.0, 0.8]
-            sid = (
-                test_utt.speaker_id.to(self.device)
-                if test_utt.speaker_id is not None
-                else None
-            )
-            test_audio = self(text, text_lengths, scales, sid=sid).detach()
-
-            # Scale to make louder in [-1, 1]
-            test_audio = test_audio * (1.0 / max(0.01, abs(test_audio.max())))
-
-            tag = test_utt.text or str(utt_idx)
-            self.logger.experiment.add_audio(
-                tag, 
-                test_audio,
-                self.global_step,
-                sample_rate=self.hparams.sample_rate
-            )
+        # # Generate audio examples
+        # for utt_idx, test_utt in enumerate(self._test_dataset):
+        #     text = test_utt.phoneme_ids.unsqueeze(0).to(self.device)
+        #     text_lengths = torch.LongTensor([len(test_utt.phoneme_ids)]).to(self.device)
+        #     scales = [0.667, 1.0, 0.8]
+        #     sid = (
+        #         test_utt.speaker_id.to(self.device)
+        #         if test_utt.speaker_id is not None
+        #         else None
+        #     )
+        #     test_audio = self(text, text_lengths, scales, sid=sid).detach()
+
+        #     # Scale to make louder in [-1, 1]
+        #     test_audio = test_audio * (1.0 / max(0.01, abs(test_audio.max())))
+
+        #     tag = test_utt.text or str(utt_idx)
+        #     self.logger.experiment.add_audio(
+        #         tag, 
+        #         test_audio,
+        #         self.global_step,
+        #         sample_rate=self.hparams.sample_rate
+        #     )
 
         return val_loss
 
diff --git a/src/python/requirements.txt b/src/python/requirements.txt
index 010c8b3a2..ee8d0a2a9 100644
--- a/src/python/requirements.txt
+++ b/src/python/requirements.txt
@@ -1,10 +1,12 @@
---extra-index-url https://download.pytorch.org/whl/cu117
+--extra-index-url https://download.pytorch.org/whl/cu121
 
 cython>=0.29.0,<1
 piper-phonemize~=1.1.0
 librosa>=0.9.2,<1
 numpy==1.24
 onnxruntime>=1.11.0
-pytorch-lightning~=1.7.7
-torch==1.13.1+cu117
-torchmetrics==1.1.1
\ No newline at end of file
+pytorch-lightning~=2.4.0
+torch==2.4.0+cu121
+torchmetrics==1.4.2
+onnx==1.16.2
+onnxruntime-gpu==1.19.2
\ No newline at end of file

From 404b844ce3fa22fa91b735aa337b6548d16f5b79 Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Sat, 14 Sep 2024 21:02:22 +0100
Subject: [PATCH 02/17] add arguments for monitor and monitor mode

---
 src/python/piper_train/__main__.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py
index 9cd3b8030..50162eb85 100644
--- a/src/python/piper_train/__main__.py
+++ b/src/python/piper_train/__main__.py
@@ -84,6 +84,18 @@ def main():
         default=None,
         help="Always save the last checkpoint."
     )
+    parser.add_argument(
+        "--monitor",
+        type=str,
+        default="val_loss",
+        help="Metric to monitor."
+    )
+    parser.add_argument(
+        "--monitor_mode",
+        type=str,
+        default="min",
+        help="Mode to monitor."
+    )
     args = parser.parse_args()
     _LOGGER.debug(args)
 
@@ -124,8 +136,8 @@ def main():
         trainer.callbacks = [ModelCheckpoint(
             every_n_epochs=args.checkpoint_epochs,
             save_top_k=args.num_ckpt,
-            monitor="val_loss",
-            mode="min",
+            monitor=args.monitor,
+            mode=args.monitor_mode,
             save_last=args.save_last
         )]
         _LOGGER.debug(

From 91eb9c1382f7e22e8b245393bc5d01d39257d1ec Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Sat, 14 Sep 2024 22:18:10 +0100
Subject: [PATCH 03/17] fix parameter

---
 src/python/piper_train/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py
index 50162eb85..b885c9846 100644
--- a/src/python/piper_train/__main__.py
+++ b/src/python/piper_train/__main__.py
@@ -65,7 +65,7 @@ def main():
     )
     parser.add_argument(
         "--precision",
-        type=int,
+        type=str,
     )
     parser.add_argument(
         "--num_ckpt",

From bdd6b0c135040e90758951de01ca9ab0ede80ae8 Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Sun, 15 Sep 2024 19:53:37 +0100
Subject: [PATCH 04/17] early stopping / proper progress bar / dynamic learning
 rate

---
 .gitignore                               |  2 +-
 src/python/piper_train/__main__.py       | 27 +++++++++++++++++++-----
 src/python/piper_train/vits/config.py    |  2 +-
 src/python/piper_train/vits/lightning.py | 15 ++++++++-----
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5093eac47..5ee38aff4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,4 +23,4 @@ lightning_logs/
 
 dataset
 training
-last.ckpt
\ No newline at end of file
+*.ckpt
\ No newline at end of file
diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py
index b885c9846..5f5b226ac 100644
--- a/src/python/piper_train/__main__.py
+++ b/src/python/piper_train/__main__.py
@@ -5,7 +5,7 @@
 
 import torch
 from pytorch_lightning import Trainer
-from pytorch_lightning.callbacks import ModelCheckpoint
+from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, LearningRateMonitor
 
 from .vits.lightning import VitsModel
 
@@ -14,6 +14,7 @@
 
 def main():
     logging.basicConfig(level=logging.DEBUG)
+    logging.getLogger("fsspec").setLevel(logging.WARNING)
 
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -129,17 +130,18 @@ def main():
     # Filter the arguments
     filtered_args = {key: value for key, value in vars(args).items() if key in allowed_args}
 
-    # Pass the filtered arguments to Trainer
+    # Initialize callbacks
+    callbacks = []
 
-    trainer = Trainer(**filtered_args)
     if args.checkpoint_epochs is not None:
-        trainer.callbacks = [ModelCheckpoint(
+        checkpoint_callback = ModelCheckpoint(
             every_n_epochs=args.checkpoint_epochs,
             save_top_k=args.num_ckpt,
             monitor=args.monitor,
             mode=args.monitor_mode,
             save_last=args.save_last
-        )]
+        )
+        callbacks.append(checkpoint_callback)
         _LOGGER.debug(
             "Checkpoints will be saved every %s epoch(s)", args.checkpoint_epochs
         )
@@ -147,6 +149,21 @@ def main():
             "%s Checkpoints will be saved", args.num_ckpt
         )
 
+    # Early stopping callback
+    early_stopping_callback = EarlyStopping(
+        monitor='val_loss',
+        patience=10,
+        verbose=True,
+        mode='min'
+    )
+    callbacks.append(early_stopping_callback)
+
+    # Learning rate monitor callback
+    lr_monitor_callback = LearningRateMonitor(logging_interval='epoch')
+    callbacks.append(lr_monitor_callback)
+
+    trainer = Trainer(**filtered_args, callbacks=callbacks)
+
     dict_args = vars(args)
     if args.quality == "x-low":
         dict_args["hidden_channels"] = 96
diff --git a/src/python/piper_train/vits/config.py b/src/python/piper_train/vits/config.py
index e878f02cd..6de3c8616 100644
--- a/src/python/piper_train/vits/config.py
+++ b/src/python/piper_train/vits/config.py
@@ -116,7 +116,7 @@ class TrainingConfig:
     eps: float = 1e-9
     # batch_size: int = 32
     fp16_run: bool = False
-    lr_decay: float = 0.999875
+    #lr_decay: float = 0.999875 # Disable fixed learning rate decay as it's handled by ReduceLROnPlateau
     init_lr_ratio: float = 1.0
     warmup_epochs: int = 0
     c_mel: int = 45
diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index f93495ba8..efa4e190e 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -292,7 +292,7 @@ def training_step_d(self, batch: Batch):
     def validation_step(self, batch: Batch, batch_idx: int):
         val_loss = self.training_step_g(batch) + self.training_step_d(batch)
         self.log("val_loss", val_loss)
-        print(f"Epoch: {self.current_epoch}. Steps: {self.global_step}")
+
         # # Generate audio examples
         # for utt_idx, test_utt in enumerate(self._test_dataset):
         #     text = test_utt.phoneme_ids.unsqueeze(0).to(self.device)
@@ -316,6 +316,11 @@ def validation_step(self, batch: Batch, batch_idx: int):
         #         sample_rate=self.hparams.sample_rate
         #     )
 
+        # Step the scheduler with the validation loss
+        scheduler_g, scheduler_d = self.lr_schedulers()
+        scheduler_g.step(val_loss)
+        scheduler_d.step(val_loss)
+
         return val_loss
 
     def configure_optimizers(self):
@@ -334,11 +339,11 @@ def configure_optimizers(self):
             ),
         ]
         schedulers = [
-            torch.optim.lr_scheduler.ExponentialLR(
-                optimizers[0], gamma=self.hparams.lr_decay
+            torch.optim.lr_scheduler.ReduceLROnPlateau(
+                optimizers[0], mode='min', factor=0.1, patience=10, verbose=True
             ),
-            torch.optim.lr_scheduler.ExponentialLR(
-                optimizers[1], gamma=self.hparams.lr_decay
+            torch.optim.lr_scheduler.ReduceLROnPlateau(
+                optimizers[1], mode='min', factor=0.1, patience=10, verbose=True
             ),
         ]
 

From 6455cae1cb4fbf5c8f87554241967c320464a172 Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Sun, 15 Sep 2024 20:16:14 +0100
Subject: [PATCH 05/17] add options and adjust defaults

---
 src/python/piper_train/__main__.py       |  8 +++++++-
 src/python/piper_train/vits/lightning.py | 10 ++++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py
index 5f5b226ac..36252b43d 100644
--- a/src/python/piper_train/__main__.py
+++ b/src/python/piper_train/__main__.py
@@ -97,6 +97,12 @@ def main():
         default="min",
         help="Mode to monitor."
     )
+    parser.add_argument(
+        "--early_stop_patience",
+        type=int,
+        default=20,
+        help="Early stopping patience."
+    )
     args = parser.parse_args()
     _LOGGER.debug(args)
 
@@ -152,7 +158,7 @@ def main():
     # Early stopping callback
     early_stopping_callback = EarlyStopping(
         monitor='val_loss',
-        patience=10,
+        patience=args.early_stop_patience,
         verbose=True,
         mode='min'
     )
diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index efa4e190e..cb134b824 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -63,6 +63,8 @@ def __init__(
         eps: float = 1e-9,
         batch_size: int = 1,
         lr_decay: float = 0.999875,
+        lr_reduce_patience: int = 10,
+        lr_reduce_factor: float = 0.5,
         init_lr_ratio: float = 1.0,
         warmup_epochs: int = 0,
         c_mel: int = 45,
@@ -338,12 +340,13 @@ def configure_optimizers(self):
                 eps=self.hparams.eps,
             ),
         ]
+        print("TESTING", self.hparams.lr_reduce_factor, self.hparams.lr_reduce_patience)
         schedulers = [
             torch.optim.lr_scheduler.ReduceLROnPlateau(
-                optimizers[0], mode='min', factor=0.1, patience=10, verbose=True
+                optimizers[0], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience, verbose=True
             ),
             torch.optim.lr_scheduler.ReduceLROnPlateau(
-                optimizers[1], mode='min', factor=0.1, patience=10, verbose=True
+                optimizers[1], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience, verbose=True
             ),
         ]
 
@@ -366,5 +369,8 @@ def add_model_specific_args(parent_parser):
         parser.add_argument("--filter-channels", type=int, default=768)
         parser.add_argument("--n-layers", type=int, default=6)
         parser.add_argument("--n-heads", type=int, default=2)
+
+        parser.add_argument("--lr-reduce-factor", type=float, default=0.5)
+        parser.add_argument("--lr-reduce-patience", type=int, default=10)
         #
         return parent_parser

From 08465f5d5ee04c626e5f30ed3ab11ac540ca3518 Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Sun, 15 Sep 2024 21:33:23 +0100
Subject: [PATCH 06/17] improve logging and tensor support

---
 src/python/piper_train/__main__.py       | 11 +++++++++++
 src/python/piper_train/vits/lightning.py | 17 ++++++++++++-----
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py
index 36252b43d..0eb6a2227 100644
--- a/src/python/piper_train/__main__.py
+++ b/src/python/piper_train/__main__.py
@@ -112,6 +112,17 @@ def main():
 
     torch.backends.cudnn.benchmark = True
     torch.manual_seed(args.seed)
+    
+    # Function to check if the GPU supports Tensor Cores
+    def supports_tensor_cores():
+        # Assuming that Tensor Cores are supported if the compute capability is 7.0 or higher
+        # This is a simplification; you might need a more detailed check based on your specific requirements
+        return torch.cuda.get_device_capability(0)[0] >= 7
+
+    # Set the float32 matrix multiplication precision based on GPU support for Tensor Cores
+    if supports_tensor_cores():
+        # Set to 'high' or 'medium' based on your preference
+        torch.set_float32_matmul_precision('high')
 
     config_path = args.dataset_dir / "config.json"
     dataset_path = args.dataset_dir / "dataset.jsonl"
diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index cb134b824..9d5329da6 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -270,7 +270,11 @@ def training_step_g(self, batch: Batch):
             loss_gen, _losses_gen = generator_loss(y_d_hat_g)
             loss_gen_all = loss_gen + loss_fm + loss_mel + loss_dur + loss_kl
 
-            self.log("loss_gen_all", loss_gen_all)
+            loss_gen_lr = self.trainer.optimizers[0].param_groups[0]['lr']
+
+            self.log("gen_loss", loss_gen_all)
+            self.log("gen_lr", loss_gen_lr)
+            self.log("step", self.global_step, prog_bar=True)
 
             return loss_gen_all
 
@@ -287,12 +291,16 @@ def training_step_d(self, batch: Batch):
             )
             loss_disc_all = loss_disc
 
-            self.log("loss_disc_all", loss_disc_all)
+            loss_disc_lr = self.trainer.optimizers[1].param_groups[0]['lr']
+            self.log("disc_loss", loss_disc_all)
+            self.log("disc_lr", loss_disc_lr)
+            self.log("step", self.global_step, prog_bar=True)
 
             return loss_disc_all
 
     def validation_step(self, batch: Batch, batch_idx: int):
         val_loss = self.training_step_g(batch) + self.training_step_d(batch)
+        self.log("step", self.global_step, prog_bar=True)
         self.log("val_loss", val_loss)
 
         # # Generate audio examples
@@ -340,13 +348,12 @@ def configure_optimizers(self):
                 eps=self.hparams.eps,
             ),
         ]
-        print("TESTING", self.hparams.lr_reduce_factor, self.hparams.lr_reduce_patience)
         schedulers = [
             torch.optim.lr_scheduler.ReduceLROnPlateau(
-                optimizers[0], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience, verbose=True
+                optimizers[0], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience
             ),
             torch.optim.lr_scheduler.ReduceLROnPlateau(
-                optimizers[1], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience, verbose=True
+                optimizers[1], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience
             ),
         ]
 

From 105450f84ed62a7a9083ddeacbe76f64c4097d4c Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Sun, 15 Sep 2024 22:29:26 +0100
Subject: [PATCH 07/17] add optional plot with --show-plot

---
 src/python/piper_train/__main__.py       |  2 +
 src/python/piper_train/vits/lightning.py | 60 ++++++++++++++++++++++++
 src/python/requirements.txt              |  4 +-
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py
index 0eb6a2227..f615f6b2b 100644
--- a/src/python/piper_train/__main__.py
+++ b/src/python/piper_train/__main__.py
@@ -15,6 +15,8 @@
 def main():
     logging.basicConfig(level=logging.DEBUG)
     logging.getLogger("fsspec").setLevel(logging.WARNING)
+    logging.getLogger("matplotlib").setLevel(logging.WARNING)
+    logging.getLogger("PIL").setLevel(logging.WARNING)
 
     parser = argparse.ArgumentParser()
     parser.add_argument(
diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index 9d5329da6..a6cba70f1 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -7,6 +7,8 @@
 from torch import autocast
 from torch.nn import functional as F
 from torch.utils.data import DataLoader, Dataset, random_split
+import matplotlib.pyplot as plt
+from IPython.display import display, clear_output
 
 from .commons import slice_segments
 from .dataset import Batch, PiperDataset, UtteranceCollate
@@ -75,6 +77,7 @@ def __init__(
         num_test_examples: int = 5,
         validation_split: float = 0.1,
         max_phoneme_ids: Optional[int] = None,
+        show_plot = False,
         **kwargs,
     ):
         super().__init__()
@@ -121,6 +124,14 @@ def __init__(
         self._y = None
         self._y_hat = None
 
+        if self.hparams.show_plot:
+            # Initialize plot
+            self.fig, self.ax = plt.subplots()
+            self.gen_losses = []
+            self.disc_losses = []
+            self.val_losses = []
+            self.epochs = []
+
     def _load_datasets(
         self,
         validation_split: float,
@@ -205,6 +216,8 @@ def training_step(self, batch: Batch, batch_idx: int):
         self.manual_backward(loss_disc_all)
         opt_d.step()
 
+        return {"loss_gen": loss_gen_all, "loss_disc": loss_disc_all}
+
     def training_step_g(self, batch: Batch):
         x, x_lengths, y, _, spec, spec_lengths, speaker_ids = (
             batch.phoneme_ids,
@@ -300,6 +313,7 @@ def training_step_d(self, batch: Batch):
 
     def validation_step(self, batch: Batch, batch_idx: int):
         val_loss = self.training_step_g(batch) + self.training_step_d(batch)
+
         self.log("step", self.global_step, prog_bar=True)
         self.log("val_loss", val_loss)
 
@@ -332,6 +346,31 @@ def validation_step(self, batch: Batch, batch_idx: int):
         scheduler_d.step(val_loss)
 
         return val_loss
+    
+    def on_train_epoch_end(self):
+        if not self.hparams.show_plot:
+            return
+
+        avg_gen_loss = self.trainer.callback_metrics.get("gen_loss")
+        avg_disc_loss = self.trainer.callback_metrics.get("disc_loss")
+
+        avg_gen_loss_cpu = avg_gen_loss.detach().cpu() if avg_gen_loss.is_cuda else avg_gen_loss.detach()
+        self.gen_losses.append(avg_gen_loss_cpu)
+
+        avg_disc_loss_cpu = avg_disc_loss.detach().cpu() if avg_disc_loss.is_cuda else avg_disc_loss.detach()
+        self.disc_losses.append(avg_disc_loss_cpu)
+
+        # Capture validation loss
+        val_loss = self.trainer.callback_metrics.get("val_loss")
+        if val_loss is not None:
+            val_loss_cpu = val_loss.detach().cpu() if val_loss.is_cuda else val_loss.detach()
+            self.val_losses.append(val_loss_cpu)
+
+        # Update epochs for plot
+        self.epochs.append(self.current_epoch)
+
+        # Update plot
+        self.update_plot()
 
     def configure_optimizers(self):
         optimizers = [
@@ -359,6 +398,25 @@ def configure_optimizers(self):
 
         return optimizers, schedulers
 
+    def update_plot(self):
+        if not self.hparams.show_plot:
+            raise ValueError("show_plot is not enabled")
+        self.ax.clear()
+
+        self.ax.plot(self.epochs, self.gen_losses, label='Generator Loss')
+        self.ax.plot(self.epochs, self.disc_losses, label='Discriminator Loss')
+        self.ax.plot(self.epochs, self.val_losses, label='Validation Loss')
+        self.ax.set_xlabel('Epoch')
+        self.ax.set_ylabel('Loss')
+        self.ax.legend()
+        title = F'Training Progress - Epoch: {self.current_epoch}'
+        self.ax.set_title(title)
+        self.ax.get_figure().canvas.manager.set_window_title(title)
+        self.ax.grid(True)
+        plt.draw()
+        clear_output(wait=True)
+        plt.pause(0.01)
+
     @staticmethod
     def add_model_specific_args(parent_parser):
         parser = parent_parser.add_argument_group("VitsModel")
@@ -379,5 +437,7 @@ def add_model_specific_args(parent_parser):
 
         parser.add_argument("--lr-reduce-factor", type=float, default=0.5)
         parser.add_argument("--lr-reduce-patience", type=int, default=10)
+        
+        parser.add_argument("--show-plot", type=bool, default=False)
         #
         return parent_parser
diff --git a/src/python/requirements.txt b/src/python/requirements.txt
index ee8d0a2a9..8fdb5280f 100644
--- a/src/python/requirements.txt
+++ b/src/python/requirements.txt
@@ -9,4 +9,6 @@ pytorch-lightning~=2.4.0
 torch==2.4.0+cu121
 torchmetrics==1.4.2
 onnx==1.16.2
-onnxruntime-gpu==1.19.2
\ No newline at end of file
+onnxruntime-gpu==1.19.2
+matplotlib==3.9.2
+ipython==8.27.0
\ No newline at end of file

From 35b245dc976937297a38a7a5230c90d635b150ff Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Sun, 15 Sep 2024 23:00:01 +0100
Subject: [PATCH 08/17] allow saving plot to file as well as / instead of
 drawing it to screen

---
 src/python/piper_train/vits/lightning.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index a6cba70f1..fead8a945 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -8,7 +8,6 @@
 from torch.nn import functional as F
 from torch.utils.data import DataLoader, Dataset, random_split
 import matplotlib.pyplot as plt
-from IPython.display import display, clear_output
 
 from .commons import slice_segments
 from .dataset import Batch, PiperDataset, UtteranceCollate
@@ -124,7 +123,7 @@ def __init__(
         self._y = None
         self._y_hat = None
 
-        if self.hparams.show_plot:
+        if self.hparams.show_plot or self.hparams.plot_save_path:
             # Initialize plot
             self.fig, self.ax = plt.subplots()
             self.gen_losses = []
@@ -348,7 +347,7 @@ def validation_step(self, batch: Batch, batch_idx: int):
         return val_loss
     
     def on_train_epoch_end(self):
-        if not self.hparams.show_plot:
+        if not self.hparams.show_plot and not self.hparams.plot_save_path:
             return
 
         avg_gen_loss = self.trainer.callback_metrics.get("gen_loss")
@@ -399,8 +398,8 @@ def configure_optimizers(self):
         return optimizers, schedulers
 
     def update_plot(self):
-        if not self.hparams.show_plot:
-            raise ValueError("show_plot is not enabled")
+        if not self.hparams.show_plot and not self.hparams.plot_save_path:
+            raise ValueError("show_plot or plot_save_path must be set to update plot")
         self.ax.clear()
 
         self.ax.plot(self.epochs, self.gen_losses, label='Generator Loss')
@@ -413,9 +412,13 @@ def update_plot(self):
         self.ax.set_title(title)
         self.ax.get_figure().canvas.manager.set_window_title(title)
         self.ax.grid(True)
-        plt.draw()
-        clear_output(wait=True)
-        plt.pause(0.01)
+
+        if self.hparams.show_plot:
+            plt.draw()
+            plt.pause(0.01)
+
+        if self.hparams.plot_save_path:
+            self.ax.get_figure().savefig(self.hparams.plot_save_path)
 
     @staticmethod
     def add_model_specific_args(parent_parser):
@@ -439,5 +442,6 @@ def add_model_specific_args(parent_parser):
         parser.add_argument("--lr-reduce-patience", type=int, default=10)
         
         parser.add_argument("--show-plot", type=bool, default=False)
-        #
+        parser.add_argument("--plot-save-path", type=str, default="plot.png")
+
         return parent_parser

From a58d09e76650903345be93ebe86dc2b8ed489f1b Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Sun, 15 Sep 2024 23:04:35 +0100
Subject: [PATCH 09/17] remove requirement

---
 src/python/piper_train/vits/lightning.py | 2 +-
 src/python/requirements.txt              | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index fead8a945..726ffd6d0 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -442,6 +442,6 @@ def add_model_specific_args(parent_parser):
         parser.add_argument("--lr-reduce-patience", type=int, default=10)
         
         parser.add_argument("--show-plot", type=bool, default=False)
-        parser.add_argument("--plot-save-path", type=str, default="plot.png")
+        parser.add_argument("--plot-save-path", type=str, default=None)
 
         return parent_parser
diff --git a/src/python/requirements.txt b/src/python/requirements.txt
index 8fdb5280f..6bf7ffc7c 100644
--- a/src/python/requirements.txt
+++ b/src/python/requirements.txt
@@ -10,5 +10,4 @@ torch==2.4.0+cu121
 torchmetrics==1.4.2
 onnx==1.16.2
 onnxruntime-gpu==1.19.2
-matplotlib==3.9.2
-ipython==8.27.0
\ No newline at end of file
+matplotlib==3.9.2
\ No newline at end of file

From a1585926e35dd182c36bb7abb6b95c967655f7c4 Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Mon, 16 Sep 2024 01:12:39 +0100
Subject: [PATCH 10/17] fix setting learning rate on existing models with new
 override parameter and add weight decay option

---
 src/python/piper_train/vits/lightning.py | 34 ++++++++++++++++++------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index 726ffd6d0..a67e1368d 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -60,6 +60,8 @@ def __init__(
         # training
         dataset: Optional[List[Union[str, Path]]] = None,
         learning_rate: float = 2e-4,
+        override_learning_rate: bool = False,
+        weight_decay: float = 1e-6,
         betas: Tuple[float, float] = (0.8, 0.99),
         eps: float = 1e-9,
         batch_size: int = 1,
@@ -203,6 +205,16 @@ def training_step(self, batch: Batch, batch_idx: int):
         # Manually access optimizers
         opt_g, opt_d = self.optimizers()
 
+        if self.first_epoch:
+            if self.hparams.override_learning_rate:
+                _LOGGER.info("First epoch, overriding learning rate to %f", self.hparams.learning_rate)
+                for param_group in opt_g.param_groups:
+                    param_group['lr'] = self.hparams.learning_rate
+                for param_group in opt_d.param_groups:
+                    param_group['lr'] = self.hparams.learning_rate
+                self.first_epoch = False
+
+
         # Perform generator step
         loss_gen_all = self.training_step_g(batch)
         opt_g.zero_grad()
@@ -215,6 +227,11 @@ def training_step(self, batch: Batch, batch_idx: int):
         self.manual_backward(loss_disc_all)
         opt_d.step()
 
+        # Log learning rates
+        self.log("gen_lr", opt_g.param_groups[0]['lr'])
+        self.log("disc_lr", opt_d.param_groups[0]['lr'])
+        self.log("step", self.global_step, prog_bar=True)
+
         return {"loss_gen": loss_gen_all, "loss_disc": loss_disc_all}
 
     def training_step_g(self, batch: Batch):
@@ -282,11 +299,7 @@ def training_step_g(self, batch: Batch):
             loss_gen, _losses_gen = generator_loss(y_d_hat_g)
             loss_gen_all = loss_gen + loss_fm + loss_mel + loss_dur + loss_kl
 
-            loss_gen_lr = self.trainer.optimizers[0].param_groups[0]['lr']
-
             self.log("gen_loss", loss_gen_all)
-            self.log("gen_lr", loss_gen_lr)
-            self.log("step", self.global_step, prog_bar=True)
 
             return loss_gen_all
 
@@ -303,17 +316,13 @@ def training_step_d(self, batch: Batch):
             )
             loss_disc_all = loss_disc
 
-            loss_disc_lr = self.trainer.optimizers[1].param_groups[0]['lr']
             self.log("disc_loss", loss_disc_all)
-            self.log("disc_lr", loss_disc_lr)
-            self.log("step", self.global_step, prog_bar=True)
 
             return loss_disc_all
 
     def validation_step(self, batch: Batch, batch_idx: int):
         val_loss = self.training_step_g(batch) + self.training_step_d(batch)
 
-        self.log("step", self.global_step, prog_bar=True)
         self.log("val_loss", val_loss)
 
         # # Generate audio examples
@@ -371,6 +380,9 @@ def on_train_epoch_end(self):
         # Update plot
         self.update_plot()
 
+    def on_train_start(self):
+        self.first_epoch = True
+
     def configure_optimizers(self):
         optimizers = [
             torch.optim.AdamW(
@@ -378,12 +390,14 @@ def configure_optimizers(self):
                 lr=self.hparams.learning_rate,
                 betas=self.hparams.betas,
                 eps=self.hparams.eps,
+                weight_decay=self.hparams.weight_decay,
             ),
             torch.optim.AdamW(
                 self.model_d.parameters(),
                 lr=self.hparams.learning_rate,
                 betas=self.hparams.betas,
                 eps=self.hparams.eps,
+                weight_decay=self.hparams.weight_decay,
             ),
         ]
         schedulers = [
@@ -444,4 +458,8 @@ def add_model_specific_args(parent_parser):
         parser.add_argument("--show-plot", type=bool, default=False)
         parser.add_argument("--plot-save-path", type=str, default=None)
 
+        parser.add_argument("--learning-rate", type=float, default=2e-4)
+        parser.add_argument("--weight-decay", type=float, default=1e-6)
+        parser.add_argument("--override-learning-rate", type=bool, default=False)
+
         return parent_parser

From 2ec1ca0f6f8efdfc019b2e4ddc2be991858b0208 Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Mon, 16 Sep 2024 01:37:36 +0100
Subject: [PATCH 11/17] graph learning rates

---
 src/python/piper_train/vits/lightning.py | 35 +++++++++++++++++++-----
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index a67e1368d..35b465d35 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -132,6 +132,8 @@ def __init__(
             self.disc_losses = []
             self.val_losses = []
             self.epochs = []
+            self.gen_lrs = []
+            self.disc_lrs = []
 
     def _load_datasets(
         self,
@@ -370,9 +372,19 @@ def on_train_epoch_end(self):
 
         # Capture validation loss
         val_loss = self.trainer.callback_metrics.get("val_loss")
-        if val_loss is not None:
-            val_loss_cpu = val_loss.detach().cpu() if val_loss.is_cuda else val_loss.detach()
-            self.val_losses.append(val_loss_cpu)
+        val_loss_cpu = val_loss.detach().cpu() if val_loss.is_cuda else val_loss.detach()
+        self.val_losses.append(val_loss_cpu)
+
+
+        # Capture learning rate
+        gen_lr = self.trainer.callback_metrics.get("gen_lr")
+        disc_lr = self.trainer.callback_metrics.get("disc_lr")
+
+        gen_lr_cpu = gen_lr.detach().cpu() if gen_lr.is_cuda else gen_lr.detach()
+        disc_lr_cpu = disc_lr.detach().cpu() if disc_lr.is_cuda else disc_lr.detach()
+
+        self.gen_lrs.append(gen_lr_cpu)
+        self.disc_lrs.append(disc_lr_cpu)
 
         # Update epochs for plot
         self.epochs.append(self.current_epoch)
@@ -416,12 +428,21 @@ def update_plot(self):
             raise ValueError("show_plot or plot_save_path must be set to update plot")
         self.ax.clear()
 
-        self.ax.plot(self.epochs, self.gen_losses, label='Generator Loss')
-        self.ax.plot(self.epochs, self.disc_losses, label='Discriminator Loss')
-        self.ax.plot(self.epochs, self.val_losses, label='Validation Loss')
+        self.ax.plot(self.epochs, self.gen_losses, label='Generator Loss', color='tab:blue')
+        self.ax.plot(self.epochs, self.disc_losses, label='Discriminator Loss', color='tab:orange')
+        self.ax.plot(self.epochs, self.val_losses, label='Validation Loss', color='tab:green')
         self.ax.set_xlabel('Epoch')
         self.ax.set_ylabel('Loss')
-        self.ax.legend()
+        self.ax.legend(loc='upper left')
+
+        # Create a secondary y-axis for the learning rate
+        ax2 = self.ax.twinx()
+        ax2.plot(self.epochs, self.gen_lrs, label='Generator Learning Rate', color='tab:red')
+        ax2.plot(self.epochs, self.disc_lrs, label='Discriminator Learning Rate', color='tab:purple')
+        ax2.set_xlabel('Epoch')
+        ax2.set_ylabel('Learning Rate')
+        ax2.legend(loc='upper right')
+
         title = F'Training Progress - Epoch: {self.current_epoch}'
         self.ax.set_title(title)
         self.ax.get_figure().canvas.manager.set_window_title(title)

From 77c876bff16ef0519ecb421793c11b0bc74cd3b0 Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Mon, 16 Sep 2024 02:39:18 +0100
Subject: [PATCH 12/17] hopefully fix graph

---
 src/python/piper_train/vits/lightning.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index 35b465d35..5ed8d2e1f 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -128,6 +128,7 @@ def __init__(
         if self.hparams.show_plot or self.hparams.plot_save_path:
             # Initialize plot
             self.fig, self.ax = plt.subplots()
+            self.ax2 = None
             self.gen_losses = []
             self.disc_losses = []
             self.val_losses = []
@@ -424,8 +425,6 @@ def configure_optimizers(self):
         return optimizers, schedulers
 
     def update_plot(self):
-        if not self.hparams.show_plot and not self.hparams.plot_save_path:
-            raise ValueError("show_plot or plot_save_path must be set to update plot")
         self.ax.clear()
 
         self.ax.plot(self.epochs, self.gen_losses, label='Generator Loss', color='tab:blue')
@@ -433,15 +432,18 @@ def update_plot(self):
         self.ax.plot(self.epochs, self.val_losses, label='Validation Loss', color='tab:green')
         self.ax.set_xlabel('Epoch')
         self.ax.set_ylabel('Loss')
-        self.ax.legend(loc='upper left')
 
         # Create a secondary y-axis for the learning rate
-        ax2 = self.ax.twinx()
-        ax2.plot(self.epochs, self.gen_lrs, label='Generator Learning Rate', color='tab:red')
-        ax2.plot(self.epochs, self.disc_lrs, label='Discriminator Learning Rate', color='tab:purple')
-        ax2.set_xlabel('Epoch')
-        ax2.set_ylabel('Learning Rate')
-        ax2.legend(loc='upper right')
+        if self.ax2 is not None:
+            self.ax2.clear()
+        self.ax2 = self.ax.twinx()
+        self.ax2.plot(self.epochs, self.gen_lrs, label='Generator Learning Rate', color='tab:red')
+        self.ax2.plot(self.epochs, self.disc_lrs, label='Discriminator Learning Rate', color='tab:purple')
+        self.ax2.set_xlabel('Epoch')
+        self.ax2.set_ylabel('Learning Rate')
+
+        self.ax.legend(loc='upper left')
+        self.ax2.legend(loc='upper right')
 
         title = F'Training Progress - Epoch: {self.current_epoch}'
         self.ax.set_title(title)

From c9720ea74ce73f91d49b95e1b59decc537389e90 Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Mon, 16 Sep 2024 02:59:53 +0100
Subject: [PATCH 13/17] add support for gradient clipping (probably)

---
 src/python/piper_train/vits/lightning.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index 5ed8d2e1f..981f492d4 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -222,12 +222,22 @@ def training_step(self, batch: Batch, batch_idx: int):
         loss_gen_all = self.training_step_g(batch)
         opt_g.zero_grad()
         self.manual_backward(loss_gen_all)
+
+         # Gradient clipping for generator
+        if self.hparams.grad_clip is not None:
+            torch.nn.utils.clip_grad_norm_(self.model_g.parameters(), self.hparams.grad_clip)
+
         opt_g.step()
 
         # Perform discriminator step
         loss_disc_all = self.training_step_d(batch)
         opt_d.zero_grad()
         self.manual_backward(loss_disc_all)
+
+        # Gradient clipping for discriminator
+        if self.hparams.grad_clip is not None:
+            torch.nn.utils.clip_grad_norm_(self.model_d.parameters(), self.hparams.grad_clip)
+
         opt_d.step()
 
         # Log learning rates
@@ -484,5 +494,6 @@ def add_model_specific_args(parent_parser):
         parser.add_argument("--learning-rate", type=float, default=2e-4)
         parser.add_argument("--weight-decay", type=float, default=1e-6)
         parser.add_argument("--override-learning-rate", type=bool, default=False)
+        parser.add_argument("--grad-clip", type=float, default=None)
 
         return parent_parser

From afbdbcc601913a056c037e13c0fd0b5ea345a985 Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Mon, 16 Sep 2024 19:28:52 +0100
Subject: [PATCH 14/17] add option for reduceLRonplateau, set other params to
 defaults as before

---
 src/python/piper_train/__main__.py       | 19 +++++++------
 src/python/piper_train/vits/config.py    |  2 +-
 src/python/piper_train/vits/lightning.py | 35 +++++++++++++++++-------
 3 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py
index f615f6b2b..5d5f522a2 100644
--- a/src/python/piper_train/__main__.py
+++ b/src/python/piper_train/__main__.py
@@ -102,7 +102,7 @@ def main():
     parser.add_argument(
         "--early_stop_patience",
         type=int,
-        default=20,
+        default=0,
         help="Early stopping patience."
     )
     args = parser.parse_args()
@@ -168,14 +168,15 @@ def supports_tensor_cores():
             "%s Checkpoints will be saved", args.num_ckpt
         )
 
-    # Early stopping callback
-    early_stopping_callback = EarlyStopping(
-        monitor='val_loss',
-        patience=args.early_stop_patience,
-        verbose=True,
-        mode='min'
-    )
-    callbacks.append(early_stopping_callback)
+    if args.early_stop_patience > 0:
+        # Early stopping callback
+        early_stopping_callback = EarlyStopping(
+            monitor='val_loss',
+            patience=args.early_stop_patience,
+            verbose=True,
+            mode='min'
+        )
+        callbacks.append(early_stopping_callback)
 
     # Learning rate monitor callback
     lr_monitor_callback = LearningRateMonitor(logging_interval='epoch')
diff --git a/src/python/piper_train/vits/config.py b/src/python/piper_train/vits/config.py
index 6de3c8616..e878f02cd 100644
--- a/src/python/piper_train/vits/config.py
+++ b/src/python/piper_train/vits/config.py
@@ -116,7 +116,7 @@ class TrainingConfig:
     eps: float = 1e-9
     # batch_size: int = 32
     fp16_run: bool = False
-    #lr_decay: float = 0.999875 # Disable fixed learning rate decay as it's handled by ReduceLROnPlateau
+    lr_decay: float = 0.999875
     init_lr_ratio: float = 1.0
     warmup_epochs: int = 0
     c_mel: int = 45
diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index 981f492d4..f9db89f46 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -61,11 +61,12 @@ def __init__(
         dataset: Optional[List[Union[str, Path]]] = None,
         learning_rate: float = 2e-4,
         override_learning_rate: bool = False,
-        weight_decay: float = 1e-6,
+        weight_decay: float = 1e-2,
         betas: Tuple[float, float] = (0.8, 0.99),
         eps: float = 1e-9,
         batch_size: int = 1,
         lr_decay: float = 0.999875,
+        lr_reduce_enabled: bool = False,
         lr_reduce_patience: int = 10,
         lr_reduce_factor: float = 0.5,
         init_lr_ratio: float = 1.0,
@@ -423,14 +424,26 @@ def configure_optimizers(self):
                 weight_decay=self.hparams.weight_decay,
             ),
         ]
-        schedulers = [
-            torch.optim.lr_scheduler.ReduceLROnPlateau(
-                optimizers[0], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience
-            ),
-            torch.optim.lr_scheduler.ReduceLROnPlateau(
-                optimizers[1], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience
-            ),
-        ]
+
+
+        if self.hparams.lr_reduce_enabled:
+            schedulers = [
+                torch.optim.lr_scheduler.ReduceLROnPlateau(
+                    optimizers[0], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience
+                ),
+                torch.optim.lr_scheduler.ReduceLROnPlateau(
+                    optimizers[1], mode='min', factor=self.hparams.lr_reduce_factor, patience=self.hparams.lr_reduce_patience
+                ),
+            ]
+        else:
+            schedulers = [
+                torch.optim.lr_scheduler.ExponentialLR(
+                    optimizers[0], gamma=self.hparams.lr_decay
+                ),
+                torch.optim.lr_scheduler.ExponentialLR(
+                    optimizers[1], gamma=self.hparams.lr_decay
+                )
+            ]
 
         return optimizers, schedulers
 
@@ -485,6 +498,8 @@ def add_model_specific_args(parent_parser):
         parser.add_argument("--n-layers", type=int, default=6)
         parser.add_argument("--n-heads", type=int, default=2)
 
+        parser.add_argument("--lr-decay", type=float, default=0.999875)
+        parser.add_argument("--lr-reduce-enabled", type=bool, default=False)
         parser.add_argument("--lr-reduce-factor", type=float, default=0.5)
         parser.add_argument("--lr-reduce-patience", type=int, default=10)
         
@@ -492,7 +507,7 @@ def add_model_specific_args(parent_parser):
         parser.add_argument("--plot-save-path", type=str, default=None)
 
         parser.add_argument("--learning-rate", type=float, default=2e-4)
-        parser.add_argument("--weight-decay", type=float, default=1e-6)
+        parser.add_argument("--weight-decay", type=float, default=1e-2)
         parser.add_argument("--override-learning-rate", type=bool, default=False)
         parser.add_argument("--grad-clip", type=float, default=None)
 

From beded97b704f4c67fd8414563b5a11bf1f391ae2 Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Mon, 16 Sep 2024 19:34:30 +0100
Subject: [PATCH 15/17] minor fix

---
 src/python/piper_train/vits/lightning.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/python/piper_train/vits/lightning.py b/src/python/piper_train/vits/lightning.py
index f9db89f46..232732e1e 100644
--- a/src/python/piper_train/vits/lightning.py
+++ b/src/python/piper_train/vits/lightning.py
@@ -362,10 +362,11 @@ def validation_step(self, batch: Batch, batch_idx: int):
         #         sample_rate=self.hparams.sample_rate
         #     )
 
-        # Step the scheduler with the validation loss
-        scheduler_g, scheduler_d = self.lr_schedulers()
-        scheduler_g.step(val_loss)
-        scheduler_d.step(val_loss)
+        if self.hparams.lr_reduce_enabled:
+            # Step the scheduler with the validation loss
+            scheduler_g, scheduler_d = self.lr_schedulers()
+            scheduler_g.step(val_loss)
+            scheduler_d.step(val_loss)
 
         return val_loss
     

From 9f752ee4f50992523a653dddcf22c04c393da7fb Mon Sep 17 00:00:00 2001
From: Matt Jeanes <mattjeanes23@gmail.com>
Date: Tue, 17 Sep 2024 20:30:47 +0100
Subject: [PATCH 16/17] add random seed function

---
 src/python/piper_train/__main__.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/python/piper_train/__main__.py b/src/python/piper_train/__main__.py
index 5d5f522a2..f00db9434 100644
--- a/src/python/piper_train/__main__.py
+++ b/src/python/piper_train/__main__.py
@@ -62,6 +62,11 @@ def main():
         type=int,
         default=1234
     )
+    parser.add_argument(
+        "--random_seed",
+        type=bool,
+        default=False
+    )
     parser.add_argument(
         "--resume_from_checkpoint",
         type=str,
@@ -113,7 +118,13 @@ def main():
         args.default_root_dir = args.dataset_dir
 
     torch.backends.cudnn.benchmark = True
-    torch.manual_seed(args.seed)
+
+    if args.random_seed:
+        seed = torch.seed()
+        _LOGGER.debug("Using random seed: %s", seed)
+    else:
+        torch.manual_seed(args.seed)
+        _LOGGER.debug("Using manual seed: %s", args.seed)
     
     # Function to check if the GPU supports Tensor Cores
     def supports_tensor_cores():

From 5afcab05631ab5894eac1cae578e4c593f8ac051 Mon Sep 17 00:00:00 2001
From: Mateo Cedillo <54605382+rmcpantoja@users.noreply.github.com>
Date: Fri, 13 Dec 2024 07:16:35 -0500
Subject: [PATCH 17/17] HiFi-GAN parametrization according to newer torch.

---
 src/python/piper_train/vits/models.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/python/piper_train/vits/models.py b/src/python/piper_train/vits/models.py
index 68ef7ba59..18bf65d6c 100644
--- a/src/python/piper_train/vits/models.py
+++ b/src/python/piper_train/vits/models.py
@@ -5,8 +5,9 @@
 from torch import nn
 from torch.nn import Conv1d, Conv2d, ConvTranspose1d
 from torch.nn import functional as F
-from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
-
+from torch.nn.utils import spectral_norm
+from torch.nn.utils.parametrizations import weight_norm
+from torch.nn.utils.parametrize import remove_parametrizations
 from . import attentions, commons, modules, monotonic_align
 from .commons import get_padding, init_weights
 
@@ -370,9 +371,9 @@ def forward(self, x, g=None):
     def remove_weight_norm(self):
         print("Removing weight norm...")
         for l in self.ups:
-            remove_weight_norm(l)
+            remove_parametrizations(l)
         for l in self.resblocks:
-            l.remove_weight_norm()
+            l.remove_parametrizations()
 
 
 class DiscriminatorP(torch.nn.Module):