Skip to content

Commit

Permalink
Merge pull request #139 from microsoft/tqdm-auto
Browse files Browse the repository at this point in the history
Tqdm auto
  • Loading branch information
sordonia authored Nov 13, 2024
2 parents 9e7f882 + bd14696 commit 1f81ea9
Show file tree
Hide file tree
Showing 16 changed files with 153 additions and 81 deletions.
4 changes: 2 additions & 2 deletions mttl/datamodule/mt_seq_to_seq_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ def augment_few_shot(
dataset, num_samples, tokenizer=None, max_input_length=None, seed=42
):
"""Augment the dataset with few-shot examples."""
import tqdm
from tqdm.auto import tqdm

augmented_dataset = []
for source in tqdm.tqdm(dataset.unique("task_name")):
for source in tqdm(dataset.unique("task_name")):
augmented_dataset.append(
Dataset.from_list(
augment_few_shot_task(
Expand Down
4 changes: 2 additions & 2 deletions mttl/datamodule/ni_data_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

import numpy as np
import torch
import tqdm
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from transformers import AutoTokenizer

from mttl.datamodule.base import DataModule, DatasetConfig, DefaultCollator
Expand Down Expand Up @@ -324,7 +324,7 @@ def _check_test_references(self):
eval_instances[instance["id"]] = instance

eval_ids = list(eval_instances.keys())
for element in tqdm.tqdm(
for element in tqdm(
self.test_dataset,
desc="Checking test instances",
total=len(self.test_dataset),
Expand Down
4 changes: 2 additions & 2 deletions mttl/evaluators/code_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os

import tqdm
from evaluate import load
from tqdm.auto import tqdm

from mttl.evaluators.base import GenerativeEvaluator, switch_to_eval_mode
from mttl.logging import logger
Expand Down Expand Up @@ -71,7 +71,7 @@ def evaluate(
if self.use_vllm:
return self.evaluate_with_vllm(model, dataloader, num_batches, verbose)

pbar = tqdm.tqdm(
pbar = tqdm(
enumerate(dataloader),
total=len(dataloader),
)
Expand Down
4 changes: 2 additions & 2 deletions mttl/evaluators/em_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numpy as np
import tqdm
from tqdm.auto import tqdm

from mttl.dataloader.ni_metrics import compute_metrics
from mttl.evaluators.base import (
Expand Down Expand Up @@ -46,7 +46,7 @@ def evaluate(
if self.use_vllm:
return self.evaluate_with_vllm(model, dataloader, num_batches, verbose)

pbar = tqdm.tqdm(
pbar = tqdm(
enumerate(dataloader),
total=len(dataloader),
)
Expand Down
4 changes: 2 additions & 2 deletions mttl/evaluators/loglike_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
import torch
import tqdm
from tqdm.auto import tqdm

from mttl.evaluators.base import Evaluator, switch_to_eval_mode
from mttl.logging import logger
Expand Down Expand Up @@ -31,7 +31,7 @@ def evaluate(
if self.use_vllm:
return self.evaluate_with_vllm(model, dataloader, num_batches, verbose)

pbar = tqdm.tqdm(
pbar = tqdm(
enumerate(dataloader),
total=len(dataloader),
)
Expand Down
4 changes: 2 additions & 2 deletions mttl/evaluators/mmlu_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import click
import numpy as np
import torch
import tqdm
from tqdm.auto import tqdm

from mttl.dataloader.ni_metrics import compute_metrics
from mttl.evaluators.base import (
Expand Down Expand Up @@ -117,7 +117,7 @@ def evaluate(

dataloader = self.get_dataloader(split, subsample, shuffle)

pbar = tqdm.tqdm(
pbar = tqdm(
enumerate(dataloader),
total=len(dataloader),
)
Expand Down
4 changes: 2 additions & 2 deletions mttl/evaluators/ni_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pathlib import Path

import numpy as np
import tqdm
from tqdm.auto import tqdm

from mttl.dataloader.ni_metrics import compute_grouped_metrics, compute_metrics
from mttl.evaluators.base import GenerativeEvaluator, mean_stderr, switch_to_eval_mode
Expand Down Expand Up @@ -96,7 +96,7 @@ def evaluate(
path = re.sub(r"/[^/]*$", "", output_path)
Path(path).mkdir(parents=True, exist_ok=True)

pbar = tqdm.tqdm(
pbar = tqdm(
enumerate(dataloader),
total=len(dataloader),
)
Expand Down
4 changes: 2 additions & 2 deletions mttl/evaluators/rouge_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dataclasses import dataclass

import numpy as np
import tqdm
from tqdm.auto import tqdm

from mttl.evaluators.base import GenerativeEvaluator, switch_to_eval_mode
from mttl.evaluators.ni_evaluator import compute_metrics
Expand Down Expand Up @@ -48,7 +48,7 @@ def evaluate(
if self.use_vllm:
return self.evaluate_with_vllm(model, dataloader, num_batches, verbose)

pbar = tqdm.tqdm(
pbar = tqdm(
enumerate(dataloader),
total=len(dataloader),
)
Expand Down
4 changes: 2 additions & 2 deletions mttl/models/expert_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def delete_expert_container(self):
def add_experts_from_library(self, library):
import concurrent.futures

import tqdm
from tqdm.auto import tqdm

if type(library) == str:
from mttl.models.library.expert_library import ExpertLibrary
Expand All @@ -269,7 +269,7 @@ def add_module(self, module_name):
futures.append(executor.submit(partial(add_module, self), element))

# Progress bar setup
with tqdm.tqdm(
with tqdm(
total=len(library), desc="Adding experts...", unit="expert"
) as progress_bar:
for result in concurrent.futures.as_completed(futures):
Expand Down
55 changes: 3 additions & 52 deletions mttl/models/library/library_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pytorch_lightning import Trainer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm
from tqdm.auto import tqdm

from mttl.datamodule.base import get_datamodule
from mttl.logging import logger
Expand All @@ -33,56 +33,7 @@
from mttl.models.utils import transfer_batch_to_device
from mttl.registrable import Registrable
from mttl.serializable import Serializable


def train_phatgoose(args, model, datamodule):
"""Mini-training loop for phatgoose."""
import tqdm

torch.manual_seed(args.seed)
if torch.cuda.is_available():
torch.cuda.manual_seed(args.seed)

(optimizer, scheduler), _ = get_optimizer_and_scheduler(
model, args, num_train_examples=len(datamodule.train_dataset)
)
iter_train = iter(datamodule.train_dataloader())

bar = tqdm.tqdm(range(args.total_steps))
running_loss = 0.0
for step in bar:
loss_accum = 0.0
model.train()
optimizer.zero_grad()

for micro_step in range(args.gradient_accumulation_steps):
try:
batch = next(iter_train)
except StopIteration:
iter_train = iter(datamodule.train_dataloader())
batch = next(iter_train)

with torch.autocast(
device_type=model.device.type,
dtype=model.dtype,
):
batch = transfer_batch_to_device(batch, model.device)
loss = model.forward(**batch).loss
loss = loss / args.gradient_accumulation_steps
loss_accum += loss.detach()
loss.backward()

if loss_accum:
norm = torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
running_loss += loss_accum.item()
optimizer.step()
scheduler.step()
if model.device.type == "cuda":
torch.cuda.synchronize()
bar.set_description_str(
f"Step {step + 1}/{args.total_steps}, Loss: {running_loss / (step + 1):.4f}, Lr: {scheduler.get_last_lr()[0]:.4f}"
)
return model
from mttl.train_utils import train_model


class LibraryTransform(abc.ABC, Registrable):
Expand Down Expand Up @@ -750,7 +701,7 @@ def transform(
frozen_sum += value.sum()
value.requires_grad = False

train_phatgoose(training_config, model, dm)
train_model(training_config, model, dm)

# for checksum
frozen_sum_after, unfrozen_sum_after = 0, 0
Expand Down
4 changes: 2 additions & 2 deletions mttl/models/lightning/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

import pytorch_lightning as pl
import torch
import tqdm
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning import callbacks as cb
from pytorch_lightning.callbacks.progress.tqdm_progress import Tqdm
from pytorch_lightning.utilities.rank_zero import rank_zero_only
from torch.optim import Optimizer
from tqdm.auto import tqdm
from transformers.utils import ModelOutput

import wandb
Expand Down Expand Up @@ -232,7 +232,7 @@ def test(self, pl_module: LightningModule):

total_loss, deno = 0.0, 0.0
with torch.no_grad():
for i, batch in tqdm.tqdm(
for i, batch in tqdm(
enumerate(self.dataloader),
total=len(self.dataloader),
desc=f"Test {self.name}",
Expand Down
4 changes: 2 additions & 2 deletions mttl/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def generate(
progress=True,
**kwargs,
):
import tqdm
from tqdm.auto import tqdm

if type(inputs) is not list:
inputs = [inputs]
Expand All @@ -286,7 +286,7 @@ def generate(
generation_options = self.generation_options.copy()
generation_options.update(**kwargs)

progress_bar = tqdm.tqdm(inputs, disable=not progress)
progress_bar = tqdm(inputs, disable=not progress)

if self.engine in (
"gpt-3.5-turbo",
Expand Down
4 changes: 2 additions & 2 deletions mttl/models/ranker/baseline_rankers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(self, **kwargs):
self.vectorizer = None

def train(self):
import tqdm
from tqdm.auto import tqdm

self.dataset = (
DatasetLibrary.pull_dataset(self.dataset_name, split="train")
Expand All @@ -54,7 +54,7 @@ def train(self):
norm="l2", sublinear_tf=True, stop_words="english"
)
self.train_features = self.vectorizer.fit_transform(
tqdm.tqdm(self.dataset["source"])
tqdm(self.dataset["source"])
)
self.task_names = list(self.dataset["task_name"])

Expand Down
Loading

0 comments on commit 1f81ea9

Please sign in to comment.