From d3d2096bed3329feeb1d9336f64a999742593b99 Mon Sep 17 00:00:00 2001 From: Alessandro Sordoni Date: Mon, 11 Nov 2024 21:33:38 -0800 Subject: [PATCH] working! --- mttl/models/library/library_transforms.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/mttl/models/library/library_transforms.py b/mttl/models/library/library_transforms.py index 782e8bec..d2cf51da 100644 --- a/mttl/models/library/library_transforms.py +++ b/mttl/models/library/library_transforms.py @@ -622,15 +622,12 @@ def transform( @dataclass class PhatgooseConfig(LibraryTransformConfig): - n_steps: int = 100 + n_steps: int = 200 learning_rate: float = 3e-3 - warmup_ratio: float = 0.1 # 0.9999999 # 0.1 + warmup_ratio: float = 0.1 micro_batch_size: int = 1 batch_size: int = 1 - def __post_init__(self): - self.gradient_accumulation_steps = self.batch_size // self.micro_batch_size - @LibraryTransform.register("phatgoose", PhatgooseConfig) class PhatgooseTransform(HiddenStateComputer): @@ -699,7 +696,7 @@ def transform( training_config.train_batch_size = self.config.batch_size training_config.micro_batch_size = self.config.micro_batch_size training_config.gradient_accumulation_steps = ( - self.config.gradient_accumulation_steps + self.config.batch_size // self.config.micro_batch_size ) training_config.dataset = expert.expert_info.dataset