Add a logic for max_batch_size_dpo, update version

VProv · VProv · commit 1042d4f2ea4d · 2025-05-01T04:20:31.000-07:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "together"
-version = "1.5.6"
+version = "1.5.7"
 authors = ["Together AI <support@together.ai>"]
 description = "Python client for Together's Cloud Platform!"
 readme = "README.md"
diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py
@@ -258,10 +258,13 @@ def create(
             raise click.BadParameter(
                 f"LoRA fine-tuning is not supported for the model `{model}`"
             )
-
+        if training_method == "dpo":
+            default_batch_size = model_limits.lora_training.max_batch_size_dpo
+        else:
+            default_batch_size = model_limits.lora_training.max_batch_size
         default_values = {
             "lora_r": model_limits.lora_training.max_rank,
-            "batch_size": model_limits.lora_training.max_batch_size,
+            "batch_size": default_batch_size,
             "learning_rate": 1e-3,
         }
 
@@ -288,7 +291,12 @@ def create(
 
         batch_size_source = ctx.get_parameter_source("batch_size")  # type: ignore[attr-defined]
         if batch_size_source == ParameterSource.DEFAULT:
-            training_args["batch_size"] = model_limits.full_training.max_batch_size
+            if training_method == "dpo":
+                training_args["batch_size"] = (
+                    model_limits.full_training.max_batch_size_dpo
+                )
+            else:
+                training_args["batch_size"] = model_limits.full_training.max_batch_size
 
     if n_evals <= 0 and validation_file:
         log_warn(
diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
@@ -102,6 +102,7 @@ def create_finetune_request(
 
     training_type: TrainingType = FullTrainingType()
     max_batch_size: int = 0
+    max_batch_size_dpo: int = 0
     min_batch_size: int = 0
     if lora:
         if model_limits.lora_training is None:
@@ -119,7 +120,7 @@ def create_finetune_request(
 
         max_batch_size = model_limits.lora_training.max_batch_size
         min_batch_size = model_limits.lora_training.min_batch_size
-
+        max_batch_size_dpo = model_limits.lora_training.max_batch_size_dpo
     else:
         if model_limits.full_training is None:
             raise ValueError(
@@ -128,13 +129,24 @@ def create_finetune_request(
 
         max_batch_size = model_limits.full_training.max_batch_size
         min_batch_size = model_limits.full_training.min_batch_size
+        max_batch_size_dpo = model_limits.full_training.max_batch_size_dpo
 
-    batch_size = batch_size if batch_size != "max" else max_batch_size
+    if batch_size == "max":
+        if training_method == "dpo":
+            batch_size = max_batch_size_dpo
+        else:
+            batch_size = max_batch_size
 
-    if batch_size > max_batch_size:
-        raise ValueError(
-            f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size}."
-        )
+    if training_method == "sft":
+        if batch_size > max_batch_size:
+            raise ValueError(
+                f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size}."
+            )
+    elif training_method == "dpo":
+        if batch_size > max_batch_size_dpo:
+            raise ValueError(
+                f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size_dpo}."
+            )
 
     if batch_size < min_batch_size:
         raise ValueError(
diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py
@@ -329,6 +329,7 @@ class FinetuneDownloadResult(BaseModel):
 
 class FinetuneFullTrainingLimits(BaseModel):
     max_batch_size: int
+    max_batch_size_dpo: int
     min_batch_size: int
 
 
diff --git a/tests/unit/test_finetune_resources.py b/tests/unit/test_finetune_resources.py
@@ -18,10 +18,12 @@
     min_learning_rate=1e-6,
     full_training=FinetuneFullTrainingLimits(
         max_batch_size=96,
+        max_batch_size_dpo=48,
         min_batch_size=8,
     ),
     lora_training=FinetuneLoraTrainingLimits(
         max_batch_size=128,
+        max_batch_size_dpo=64,
         min_batch_size=8,
         max_rank=64,
         target_modules=["q", "k", "v", "o", "mlp"],
@@ -83,6 +85,40 @@ def test_lora_request():
     assert request.batch_size == _MODEL_LIMITS.lora_training.max_batch_size
 
 
+def test_dpo_request_lora():
+    request = create_finetune_request(
+        model_limits=_MODEL_LIMITS,
+        model=_MODEL_NAME,
+        training_file=_TRAINING_FILE,
+        training_method="dpo",
+        lora=True,
+    )
+
+    assert request.training_type.type == "Lora"
+    assert request.training_type.lora_r == _MODEL_LIMITS.lora_training.max_rank
+    assert request.training_type.lora_alpha == _MODEL_LIMITS.lora_training.max_rank * 2
+    assert request.training_type.lora_dropout == 0.0
+    assert request.training_type.lora_trainable_modules == "all-linear"
+    assert request.batch_size == _MODEL_LIMITS.lora_training.max_batch_size_dpo
+
+
+def test_dpo_request():
+    request = create_finetune_request(
+        model_limits=_MODEL_LIMITS,
+        model=_MODEL_NAME,
+        training_file=_TRAINING_FILE,
+        training_method="dpo",
+        lora=False,
+    )
+
+    assert request.training_type.type == "Lora"
+    assert request.training_type.lora_r == _MODEL_LIMITS.lora_training.max_rank
+    assert request.training_type.lora_alpha == _MODEL_LIMITS.lora_training.max_rank * 2
+    assert request.training_type.lora_dropout == 0.0
+    assert request.training_type.lora_trainable_modules == "all-linear"
+    assert request.batch_size == _MODEL_LIMITS.full_training.max_batch_size_dpo
+
+
 def test_from_checkpoint_request():
     request = create_finetune_request(
         model_limits=_MODEL_LIMITS,