Merge branch 'main' into mabraham/eng-18404

fdrose · web-flow · commit b8e5b7f343ae · 2025-07-08T16:01:47.000+02:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
 
 [tool.poetry]
 name = "together"
-version = "1.5.13"
+version = "1.5.18"
 authors = ["Together AI <support@together.ai>"]
 description = "Python client for Together's Cloud Platform!"
 readme = "README.md"
diff --git a/src/together/cli/api/finetune.py b/src/together/cli/api/finetune.py
@@ -139,9 +139,33 @@ def fine_tuning(ctx: click.Context) -> None:
 @click.option(
     "--dpo-beta",
     type=float,
-    default=0.1,
+    default=None,
     help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
 )
+@click.option(
+    "--dpo-normalize-logratios-by-length",
+    type=bool,
+    default=False,
+    help=(
+        "Whether to normalize logratios by sample length "
+        "(only used when '--training-method' is 'dpo')"
+    ),
+)
+@click.option(
+    "--rpo-alpha",
+    type=float,
+    default=None,
+    help=(
+        "RPO alpha parameter of DPO training to include NLL in the loss "
+        "(only used when '--training-method' is 'dpo')"
+    ),
+)
+@click.option(
+    "--simpo-gamma",
+    type=float,
+    default=None,
+    help="SimPO gamma parameter (only used when '--training-method' is 'dpo')",
+)
 @click.option(
     "--suffix",
     "-s",
@@ -164,7 +188,7 @@ def fine_tuning(ctx: click.Context) -> None:
 @click.option(
     "--train-on-inputs",
     type=BOOL_WITH_AUTO,
-    default="auto",
+    default=None,
     help="Whether to mask the user messages in conversational data or prompts in instruction data. "
     "`auto` will automatically determine whether to mask the inputs based on the data format.",
 )
@@ -176,6 +200,18 @@ def fine_tuning(ctx: click.Context) -> None:
     "The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}. "
     "The step value is optional, without it the final checkpoint will be used.",
 )
+@click.option(
+    "--hf-api-token",
+    type=str,
+    default=None,
+    help="HF API token to use for uploading a checkpoint to a private repo",
+)
+@click.option(
+    "--hf-output-repo-name",
+    type=str,
+    default=None,
+    help="HF repo to upload the fine-tuned model to",
+)
 def create(
     ctx: click.Context,
     training_file: str,
@@ -205,8 +241,13 @@ def create(
     confirm: bool,
     train_on_inputs: bool | Literal["auto"],
     training_method: str,
-    dpo_beta: float,
+    dpo_beta: float | None,
+    dpo_normalize_logratios_by_length: bool,
+    rpo_alpha: float | None,
+    simpo_gamma: float | None,
     from_checkpoint: str,
+    hf_api_token: str | None,
+    hf_output_repo_name: str | None,
 ) -> None:
     """Start fine-tuning"""
     client: Together = ctx.obj
@@ -239,7 +280,12 @@ def create(
         train_on_inputs=train_on_inputs,
         training_method=training_method,
         dpo_beta=dpo_beta,
+        dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+        rpo_alpha=rpo_alpha,
+        simpo_gamma=simpo_gamma,
         from_checkpoint=from_checkpoint,
+        hf_api_token=hf_api_token,
+        hf_output_repo_name=hf_output_repo_name,
     )
 
     if model is None and from_checkpoint is None:
@@ -250,7 +296,7 @@ def create(
         model_name = from_checkpoint.split(":")[0]
 
     model_limits: FinetuneTrainingLimits = client.fine_tuning.get_model_limits(
-        model=model_name
+        model=model_name,
     )
 
     if lora:
diff --git a/src/together/resources/chat/completions.py b/src/together/resources/chat/completions.py
@@ -38,7 +38,7 @@ def create(
         echo: bool | None = None,
         n: int | None = None,
         safety_model: str | None = None,
-        response_format: Dict[str, str | Dict[str, Any]] | None = None,
+        response_format: Dict[str, Any] | None = None,
         tools: List[Dict[str, Any]] | None = None,
         tool_choice: str | Dict[str, str | Dict[str, str]] | None = None,
         **kwargs: Any,
diff --git a/src/together/resources/finetune.py b/src/together/resources/finetune.py
@@ -72,7 +72,12 @@ def create_finetune_request(
     train_on_inputs: bool | Literal["auto"] | None = None,
     training_method: str = "sft",
     dpo_beta: float | None = None,
+    dpo_normalize_logratios_by_length: bool = False,
+    rpo_alpha: float | None = None,
+    simpo_gamma: float | None = None,
     from_checkpoint: str | None = None,
+    hf_api_token: str | None = None,
+    hf_output_repo_name: str | None = None,
 ) -> FinetuneRequest:
     if model is not None and from_checkpoint is not None:
         raise ValueError(
@@ -182,6 +187,21 @@ def create_finetune_request(
 
     if dpo_beta is not None and training_method != "dpo":
         raise ValueError("dpo_beta is only supported for DPO training")
+    if dpo_normalize_logratios_by_length and training_method != "dpo":
+        raise ValueError(
+            "dpo_normalize_logratios_by_length=True is only supported for DPO training"
+        )
+    if rpo_alpha is not None:
+        if training_method != "dpo":
+            raise ValueError("rpo_alpha is only supported for DPO training")
+        if not rpo_alpha >= 0.0:
+            raise ValueError(f"rpo_alpha should be non-negative (got {rpo_alpha})")
+
+    if simpo_gamma is not None:
+        if training_method != "dpo":
+            raise ValueError("simpo_gamma is only supported for DPO training")
+        if not simpo_gamma >= 0.0:
+            raise ValueError(f"simpo_gamma should be non-negative (got {simpo_gamma})")
 
     lr_scheduler: FinetuneLRScheduler
     if lr_scheduler_type == "cosine":
@@ -204,7 +224,24 @@ def create_finetune_request(
     if training_method == "sft":
         training_method_cls = TrainingMethodSFT(train_on_inputs=train_on_inputs)
     elif training_method == "dpo":
-        training_method_cls = TrainingMethodDPO(dpo_beta=dpo_beta)
+        if simpo_gamma is not None and simpo_gamma > 0:
+            dpo_reference_free = True
+            dpo_normalize_logratios_by_length = True
+            rprint(
+                f"Parameter simpo_gamma was set to {simpo_gamma}. "
+                "SimPO training detected. Reference logits will not be used "
+                "and length normalization of log-probabilities will be enabled."
+            )
+        else:
+            dpo_reference_free = False
+
+        training_method_cls = TrainingMethodDPO(
+            dpo_beta=dpo_beta,
+            dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+            dpo_reference_free=dpo_reference_free,
+            rpo_alpha=rpo_alpha,
+            simpo_gamma=simpo_gamma,
+        )
 
     finetune_request = FinetuneRequest(
         model=model,
@@ -227,6 +264,8 @@ def create_finetune_request(
         wandb_name=wandb_name,
         training_method=training_method_cls,
         from_checkpoint=from_checkpoint,
+        hf_api_token=hf_api_token,
+        hf_output_repo_name=hf_output_repo_name,
     )
 
     return finetune_request
@@ -302,7 +341,12 @@ def create(
         train_on_inputs: bool | Literal["auto"] | None = None,
         training_method: str = "sft",
         dpo_beta: float | None = None,
+        dpo_normalize_logratios_by_length: bool = False,
+        rpo_alpha: float | None = None,
+        simpo_gamma: float | None = None,
         from_checkpoint: str | None = None,
+        hf_api_token: str | None = None,
+        hf_output_repo_name: str | None = None,
     ) -> FinetuneResponse:
         """
         Method to initiate a fine-tuning job
@@ -353,9 +397,14 @@ def create(
             training_method (str, optional): Training method. Defaults to "sft".
                 Supported methods: "sft", "dpo".
             dpo_beta (float, optional): DPO beta parameter. Defaults to None.
+            dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False,
+            rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
+            simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
                 The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
                 The step value is optional, without it the final checkpoint will be used.
+            hf_api_token (str, optional): API key for the Hugging Face Hub. Defaults to None.
+            hf_output_repo_name (str, optional): HF repo to upload the fine-tuned model to. Defaults to None.
 
         Returns:
             FinetuneResponse: Object containing information about fine-tuning job.
@@ -405,7 +454,12 @@ def create(
             train_on_inputs=train_on_inputs,
             training_method=training_method,
             dpo_beta=dpo_beta,
+            dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+            rpo_alpha=rpo_alpha,
+            simpo_gamma=simpo_gamma,
             from_checkpoint=from_checkpoint,
+            hf_api_token=hf_api_token,
+            hf_output_repo_name=hf_output_repo_name,
         )
 
         if verbose:
@@ -714,7 +768,12 @@ async def create(
         train_on_inputs: bool | Literal["auto"] | None = None,
         training_method: str = "sft",
         dpo_beta: float | None = None,
+        dpo_normalize_logratios_by_length: bool = False,
+        rpo_alpha: float | None = None,
+        simpo_gamma: float | None = None,
         from_checkpoint: str | None = None,
+        hf_api_token: str | None = None,
+        hf_output_repo_name: str | None = None,
     ) -> FinetuneResponse:
         """
         Async method to initiate a fine-tuning job
@@ -765,9 +824,14 @@ async def create(
             training_method (str, optional): Training method. Defaults to "sft".
                 Supported methods: "sft", "dpo".
             dpo_beta (float, optional): DPO beta parameter. Defaults to None.
+            dpo_normalize_logratios_by_length (bool): Whether or not normalize logratios by sample length. Defaults to False,
+            rpo_alpha (float, optional): RPO alpha parameter of DPO training to include NLL in the loss. Defaults to None.
+            simpo_gamma: (float, optional): SimPO gamma parameter. Defaults to None.
             from_checkpoint (str, optional): The checkpoint identifier to continue training from a previous fine-tuning job.
                 The format: {$JOB_ID/$OUTPUT_MODEL_NAME}:{$STEP}.
                 The step value is optional, without it the final checkpoint will be used.
+            hf_api_token (str, optional): API key for the Huggging Face Hub. Defaults to None.
+            hf_output_repo_name (str, optional): HF repo to upload the fine-tuned model to. Defaults to None.
 
         Returns:
             FinetuneResponse: Object containing information about fine-tuning job.
@@ -817,7 +881,12 @@ async def create(
             train_on_inputs=train_on_inputs,
             training_method=training_method,
             dpo_beta=dpo_beta,
+            dpo_normalize_logratios_by_length=dpo_normalize_logratios_by_length,
+            rpo_alpha=rpo_alpha,
+            simpo_gamma=simpo_gamma,
             from_checkpoint=from_checkpoint,
+            hf_api_token=hf_api_token,
+            hf_output_repo_name=hf_output_repo_name,
         )
 
         if verbose:
diff --git a/src/together/types/chat_completions.py b/src/together/types/chat_completions.py
@@ -28,6 +28,7 @@ class MessageRole(str, Enum):
 class ResponseFormatType(str, Enum):
     JSON_OBJECT = "json_object"
     JSON_SCHEMA = "json_schema"
+    REGEX = "regex"
 
 
 class FunctionCall(BaseModel):
@@ -71,9 +72,15 @@ class ChatCompletionMessage(BaseModel):
 class ResponseFormat(BaseModel):
     type: ResponseFormatType
     schema_: Dict[str, Any] | None = None
+    pattern: str | None = None
 
     def to_dict(self) -> Dict[str, Any]:
-        return {"schema": self.schema_, "type": self.type}
+        result: Dict[str, Any] = {"type": self.type.value}
+        if self.schema_ is not None:
+            result["schema"] = self.schema_
+        if self.pattern is not None:
+            result["pattern"] = self.pattern
+        return result
 
 
 class FunctionTool(BaseModel):
diff --git a/src/together/types/finetune.py b/src/together/types/finetune.py
@@ -159,6 +159,10 @@ class TrainingMethodDPO(TrainingMethod):
 
     method: Literal["dpo"] = "dpo"
     dpo_beta: float | None = None
+    dpo_normalize_logratios_by_length: bool = False
+    dpo_reference_free: bool = False
+    rpo_alpha: float | None = None
+    simpo_gamma: float | None = None
 
 
 class FinetuneRequest(BaseModel):
@@ -208,6 +212,9 @@ class FinetuneRequest(BaseModel):
     )
     # from step
     from_checkpoint: str | None = None
+    # hf related fields
+    hf_api_token: str | None = None
+    hf_output_repo_name: str | None = None
 
 
 class FinetuneResponse(BaseModel):