diff --git a/mttl/datamodule/mt_seq_to_seq_module.py b/mttl/datamodule/mt_seq_to_seq_module.py index 5dd13ef48..4fcd401b0 100644 --- a/mttl/datamodule/mt_seq_to_seq_module.py +++ b/mttl/datamodule/mt_seq_to_seq_module.py @@ -96,10 +96,10 @@ def augment_few_shot( dataset, num_samples, tokenizer=None, max_input_length=None, seed=42 ): """Augment the dataset with few-shot examples.""" - import tqdm + from tqdm.auto import tqdm augmented_dataset = [] - for source in tqdm.tqdm(dataset.unique("task_name")): + for source in tqdm(dataset.unique("task_name")): augmented_dataset.append( Dataset.from_list( augment_few_shot_task( diff --git a/mttl/datamodule/ni_data_module.py b/mttl/datamodule/ni_data_module.py index 058938139..9f2a29ac3 100644 --- a/mttl/datamodule/ni_data_module.py +++ b/mttl/datamodule/ni_data_module.py @@ -8,8 +8,8 @@ import numpy as np import torch -import tqdm from torch.utils.data import DataLoader +from tqdm.auto import tqdm from transformers import AutoTokenizer from mttl.datamodule.base import DataModule, DatasetConfig, DefaultCollator @@ -324,7 +324,7 @@ def _check_test_references(self): eval_instances[instance["id"]] = instance eval_ids = list(eval_instances.keys()) - for element in tqdm.tqdm( + for element in tqdm( self.test_dataset, desc="Checking test instances", total=len(self.test_dataset), diff --git a/mttl/evaluators/code_evaluator.py b/mttl/evaluators/code_evaluator.py index 1d868251a..3dc50668f 100644 --- a/mttl/evaluators/code_evaluator.py +++ b/mttl/evaluators/code_evaluator.py @@ -1,7 +1,7 @@ import os -import tqdm from evaluate import load +from tqdm.auto import tqdm from mttl.evaluators.base import GenerativeEvaluator, switch_to_eval_mode from mttl.logging import logger @@ -71,7 +71,7 @@ def evaluate( if self.use_vllm: return self.evaluate_with_vllm(model, dataloader, num_batches, verbose) - pbar = tqdm.tqdm( + pbar = tqdm( enumerate(dataloader), total=len(dataloader), ) diff --git a/mttl/evaluators/em_evaluator.py b/mttl/evaluators/em_evaluator.py index a265fa551..07e26f5e3 100644 --- a/mttl/evaluators/em_evaluator.py +++ b/mttl/evaluators/em_evaluator.py @@ -1,5 +1,5 @@ import numpy as np -import tqdm +from tqdm.auto import tqdm from mttl.dataloader.ni_metrics import compute_metrics from mttl.evaluators.base import ( @@ -46,7 +46,7 @@ def evaluate( if self.use_vllm: return self.evaluate_with_vllm(model, dataloader, num_batches, verbose) - pbar = tqdm.tqdm( + pbar = tqdm( enumerate(dataloader), total=len(dataloader), ) diff --git a/mttl/evaluators/loglike_evaluator.py b/mttl/evaluators/loglike_evaluator.py index 7a0c89a7a..ee7b33adc 100644 --- a/mttl/evaluators/loglike_evaluator.py +++ b/mttl/evaluators/loglike_evaluator.py @@ -1,6 +1,6 @@ import numpy as np import torch -import tqdm +from tqdm.auto import tqdm from mttl.evaluators.base import Evaluator, switch_to_eval_mode from mttl.logging import logger @@ -31,7 +31,7 @@ def evaluate( if self.use_vllm: return self.evaluate_with_vllm(model, dataloader, num_batches, verbose) - pbar = tqdm.tqdm( + pbar = tqdm( enumerate(dataloader), total=len(dataloader), ) diff --git a/mttl/evaluators/mmlu_evaluator.py b/mttl/evaluators/mmlu_evaluator.py index a072b603b..3c77a7295 100644 --- a/mttl/evaluators/mmlu_evaluator.py +++ b/mttl/evaluators/mmlu_evaluator.py @@ -5,7 +5,7 @@ import click import numpy as np import torch -import tqdm +from tqdm.auto import tqdm from mttl.dataloader.ni_metrics import compute_metrics from mttl.evaluators.base import ( @@ -117,7 +117,7 @@ def evaluate( dataloader = self.get_dataloader(split, subsample, shuffle) - pbar = tqdm.tqdm( + pbar = tqdm( enumerate(dataloader), total=len(dataloader), ) diff --git a/mttl/evaluators/ni_evaluator.py b/mttl/evaluators/ni_evaluator.py index 572e021cc..d02b59ffb 100644 --- a/mttl/evaluators/ni_evaluator.py +++ b/mttl/evaluators/ni_evaluator.py @@ -3,7 +3,7 @@ from pathlib import Path import numpy as np -import tqdm +from tqdm.auto import tqdm from mttl.dataloader.ni_metrics import compute_grouped_metrics, compute_metrics from mttl.evaluators.base import GenerativeEvaluator, mean_stderr, switch_to_eval_mode @@ -96,7 +96,7 @@ def evaluate( path = re.sub(r"/[^/]*$", "", output_path) Path(path).mkdir(parents=True, exist_ok=True) - pbar = tqdm.tqdm( + pbar = tqdm( enumerate(dataloader), total=len(dataloader), ) diff --git a/mttl/evaluators/rouge_evaluator.py b/mttl/evaluators/rouge_evaluator.py index d7fe254ad..a8045f37b 100644 --- a/mttl/evaluators/rouge_evaluator.py +++ b/mttl/evaluators/rouge_evaluator.py @@ -1,7 +1,7 @@ from dataclasses import dataclass import numpy as np -import tqdm +from tqdm.auto import tqdm from mttl.evaluators.base import GenerativeEvaluator, switch_to_eval_mode from mttl.evaluators.ni_evaluator import compute_metrics @@ -48,7 +48,7 @@ def evaluate( if self.use_vllm: return self.evaluate_with_vllm(model, dataloader, num_batches, verbose) - pbar = tqdm.tqdm( + pbar = tqdm( enumerate(dataloader), total=len(dataloader), ) diff --git a/mttl/models/expert_model.py b/mttl/models/expert_model.py index ec9cc868f..1fb2a28bc 100644 --- a/mttl/models/expert_model.py +++ b/mttl/models/expert_model.py @@ -251,7 +251,7 @@ def delete_expert_container(self): def add_experts_from_library(self, library): import concurrent.futures - import tqdm + from tqdm.auto import tqdm if type(library) == str: from mttl.models.library.expert_library import ExpertLibrary @@ -269,7 +269,7 @@ def add_module(self, module_name): futures.append(executor.submit(partial(add_module, self), element)) # Progress bar setup - with tqdm.tqdm( + with tqdm( total=len(library), desc="Adding experts...", unit="expert" ) as progress_bar: for result in concurrent.futures.as_completed(futures): diff --git a/mttl/models/library/library_transforms.py b/mttl/models/library/library_transforms.py index 02508c708..74d902c8b 100644 --- a/mttl/models/library/library_transforms.py +++ b/mttl/models/library/library_transforms.py @@ -14,7 +14,7 @@ from pytorch_lightning import Trainer from sklearn.cluster import KMeans from sklearn.metrics.pairwise import cosine_similarity -from tqdm import tqdm +from tqdm.auto import tqdm from mttl.datamodule.base import get_datamodule from mttl.logging import logger @@ -33,56 +33,7 @@ from mttl.models.utils import transfer_batch_to_device from mttl.registrable import Registrable from mttl.serializable import Serializable - - -def train_phatgoose(args, model, datamodule): - """Mini-training loop for phatgoose.""" - import tqdm - - torch.manual_seed(args.seed) - if torch.cuda.is_available(): - torch.cuda.manual_seed(args.seed) - - (optimizer, scheduler), _ = get_optimizer_and_scheduler( - model, args, num_train_examples=len(datamodule.train_dataset) - ) - iter_train = iter(datamodule.train_dataloader()) - - bar = tqdm.tqdm(range(args.total_steps)) - running_loss = 0.0 - for step in bar: - loss_accum = 0.0 - model.train() - optimizer.zero_grad() - - for micro_step in range(args.gradient_accumulation_steps): - try: - batch = next(iter_train) - except StopIteration: - iter_train = iter(datamodule.train_dataloader()) - batch = next(iter_train) - - with torch.autocast( - device_type=model.device.type, - dtype=model.dtype, - ): - batch = transfer_batch_to_device(batch, model.device) - loss = model.forward(**batch).loss - loss = loss / args.gradient_accumulation_steps - loss_accum += loss.detach() - loss.backward() - - if loss_accum: - norm = torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) - running_loss += loss_accum.item() - optimizer.step() - scheduler.step() - if model.device.type == "cuda": - torch.cuda.synchronize() - bar.set_description_str( - f"Step {step + 1}/{args.total_steps}, Loss: {running_loss / (step + 1):.4f}, Lr: {scheduler.get_last_lr()[0]:.4f}" - ) - return model +from mttl.train_utils import train_model class LibraryTransform(abc.ABC, Registrable): @@ -750,7 +701,7 @@ def transform( frozen_sum += value.sum() value.requires_grad = False - train_phatgoose(training_config, model, dm) + train_model(training_config, model, dm) # for checksum frozen_sum_after, unfrozen_sum_after = 0, 0 diff --git a/mttl/models/lightning/callbacks.py b/mttl/models/lightning/callbacks.py index 14b40cdf3..afaa366ab 100644 --- a/mttl/models/lightning/callbacks.py +++ b/mttl/models/lightning/callbacks.py @@ -6,12 +6,12 @@ import pytorch_lightning as pl import torch -import tqdm from pytorch_lightning import LightningModule, Trainer from pytorch_lightning import callbacks as cb from pytorch_lightning.callbacks.progress.tqdm_progress import Tqdm from pytorch_lightning.utilities.rank_zero import rank_zero_only from torch.optim import Optimizer +from tqdm.auto import tqdm from transformers.utils import ModelOutput import wandb @@ -232,7 +232,7 @@ def test(self, pl_module: LightningModule): total_loss, deno = 0.0, 0.0 with torch.no_grad(): - for i, batch in tqdm.tqdm( + for i, batch in tqdm( enumerate(self.dataloader), total=len(self.dataloader), desc=f"Test {self.name}", diff --git a/mttl/models/openai.py b/mttl/models/openai.py index 883be3084..5318b973d 100644 --- a/mttl/models/openai.py +++ b/mttl/models/openai.py @@ -277,7 +277,7 @@ def generate( progress=True, **kwargs, ): - import tqdm + from tqdm.auto import tqdm if type(inputs) is not list: inputs = [inputs] @@ -286,7 +286,7 @@ def generate( generation_options = self.generation_options.copy() generation_options.update(**kwargs) - progress_bar = tqdm.tqdm(inputs, disable=not progress) + progress_bar = tqdm(inputs, disable=not progress) if self.engine in ( "gpt-3.5-turbo", diff --git a/mttl/models/ranker/baseline_rankers.py b/mttl/models/ranker/baseline_rankers.py index 7b8c39b49..b7b3ad582 100644 --- a/mttl/models/ranker/baseline_rankers.py +++ b/mttl/models/ranker/baseline_rankers.py @@ -43,7 +43,7 @@ def __init__(self, **kwargs): self.vectorizer = None def train(self): - import tqdm + from tqdm.auto import tqdm self.dataset = ( DatasetLibrary.pull_dataset(self.dataset_name, split="train") @@ -54,7 +54,7 @@ def train(self): norm="l2", sublinear_tf=True, stop_words="english" ) self.train_features = self.vectorizer.fit_transform( - tqdm.tqdm(self.dataset["source"]) + tqdm(self.dataset["source"]) ) self.task_names = list(self.dataset["task_name"]) diff --git a/mttl/train_utils.py b/mttl/train_utils.py new file mode 100644 index 000000000..132398f91 --- /dev/null +++ b/mttl/train_utils.py @@ -0,0 +1,123 @@ +import os + +import torch +from tqdm.auto import tqdm + +from mttl.datamodule.base import DataModule +from mttl.models.base_model import WEIGHTS_NAME, BaseExpertModel +from mttl.models.get_optimizer import get_optimizer_and_scheduler +from mttl.models.utils import transfer_batch_to_device + + +@torch.no_grad() +def evaluate_model(dataloader, model): + """Evaluation loop.""" + model.eval() + total_loss = 0.0 + total_samples = 0 + for batch in dataloader: + with torch.autocast( + device_type=model.device.type, + dtype=model.dtype, + ): + batch = transfer_batch_to_device(batch, model.device) + output = model.forward(**batch) + total_loss += output.loss.item() + total_samples += 1 + return total_loss / total_samples + + +def train_model( + args: "TrainingArguments", + model: BaseExpertModel, + datamodule: DataModule, + do_test=False, +) -> BaseExpertModel: + """Mini-training loop.""" + torch.manual_seed(args.seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(args.seed) + + (optimizer, scheduler), _ = get_optimizer_and_scheduler( + model, args, num_train_examples=len(datamodule.train_dataset) + ) + dataloader = datamodule.train_dataloader() + num_train_steps = len(dataloader) + iter_train = iter(dataloader) + + if args.total_steps == -1: + if args.num_train_epochs == -1: + raise ValueError("Either total_steps or num_train_epochs must be defined.") + args.total_steps = args.num_train_epochs * num_train_steps + + if args.eval_every_n_epoch != -1: + args.eval_every = num_train_steps * args.eval_every_n_epoch + + bar = tqdm(range(args.total_steps)) + best_val_loss = float("inf") + running_loss = 0.0 + + for step in bar: + loss_accum = 0.0 + model.train() + optimizer.zero_grad() + + for micro_step in range(args.gradient_accumulation_steps): + try: + batch = next(iter_train) + except StopIteration: + iter_train = iter(dataloader) + batch = next(iter_train) + + with torch.autocast( + device_type=model.device.type, + dtype=model.dtype, + ): + batch = transfer_batch_to_device(batch, model.device) + loss = model.forward(**batch).loss + loss = loss / args.gradient_accumulation_steps + loss_accum += loss.detach() + loss.backward() + + if loss_accum: + norm = torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) + running_loss += loss_accum.item() + optimizer.step() + scheduler.step() + if model.device.type == "cuda": + torch.cuda.synchronize() + + bar.set_description_str( + f"Step {step + 1}/{args.total_steps}, Loss: {running_loss / (step + 1):.4f}, Lr: {scheduler.get_last_lr()[0]:.4f}" + ) + + # eval and save best model + if ( + args.eval_every > 0 + and step % args.eval_every == 0 + and datamodule.dev_dataset + ): + val_loss = evaluate_model(datamodule.val_dataloader(), model) + if val_loss < best_val_loss: + best_val_loss = val_loss + if args.output_dir: + model.save_pretrained(args.output_dir + "/best_model") + running_loss = 0.0 + + # reload best model + if args.output_dir and os.path.exists( + args.output_dir + f"/best_model/{WEIGHTS_NAME}" + ): + model.load_state_dict( + torch.load( + args.output_dir + f"/best_model/{WEIGHTS_NAME}", weights_only=True + ), + strict=False, + ) + + # do test evaluation + if do_test and datamodule.test_dataset: + test_loss = evaluate_model(datamodule.test_dataloader(), model) + print(f"Test loss: {test_loss:.4f}") + + return model diff --git a/mttl/vllm_engines/engines.py b/mttl/vllm_engines/engines.py index f37724558..19e4ffe25 100644 --- a/mttl/vllm_engines/engines.py +++ b/mttl/vllm_engines/engines.py @@ -5,8 +5,8 @@ import numpy as np import torch -import tqdm from torch.utils.data import DataLoader +from tqdm.auto import tqdm from mttl.logging import logger from mttl.models.modifiers.base import MergeableModifierMixin @@ -137,9 +137,7 @@ def eval( } # we explicitly add requests here, so that we can keep track of the request id - for request_id, batch in enumerate( - tqdm.tqdm(dataloader, total=len(dataloader)) - ): + for request_id, batch in enumerate(tqdm(dataloader, total=len(dataloader))): for context, label, task_name in zip( batch["sources_texts"], batch["labels_texts"], batch["task_names"] ): diff --git a/tests/test_library_transforms.py b/tests/test_library_transforms.py index 2536b077d..6f7f0c0df 100644 --- a/tests/test_library_transforms.py +++ b/tests/test_library_transforms.py @@ -129,7 +129,7 @@ def test_phatgoose(tiny_flan, tmp_path, create_dummy_expert, monkeypatch): "warmup_steps": 0, "modify_layers": "k_proj|v_proj|q_proj|o_proj", "trainable_param_names": ".*lora_[ab].*", - "output_dir": tmp_path, + "output_dir": str(tmp_path), "precision": "32", "model": "EleutherAI/gpt-neo-125m", "dataset": dataset_id,