Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions examples/multimodal_audio/whisper_example.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import torch
from datasets import load_dataset
from transformers import WhisperForConditionalGeneration, WhisperProcessor
from transformers import (
WhisperForConditionalGeneration,
WhisperProcessor,
default_data_collator,
)

from llmcompressor import oneshot
from llmcompressor.modifiers.quantization import GPTQModifier
Expand Down Expand Up @@ -55,20 +59,27 @@ def process(sample):
return_tensors="pt",
)

inputs["input_features"] = inputs["input_features"].to(dtype=model.dtype)
# treat labels as calibration prefill
inputs["decoder_input_ids"] = inputs["labels"]
del inputs["labels"]

# strip extra dim added by multimodal processors
inputs = {key: value[0] for key, value in inputs.items()}

return inputs


ds = ds.map(process, remove_columns=ds.column_names)


# Define a oneshot data collator for multimodal inputs.
def data_collator(batch):
assert len(batch) == 1
return {key: torch.tensor(value) for key, value in batch[0].items()}
# Patch: mismatch between processor and model dtype
def data_collator(features):
for feature in features:
feature["input_features"] = torch.tensor(
feature["input_features"], dtype=model.dtype
)

return default_data_collator(features, return_tensors="pt")


# Recipe
Expand Down
18 changes: 5 additions & 13 deletions examples/multimodal_vision/gemma3_example.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import requests
import torch
from PIL import Image
from transformers import AutoProcessor, Gemma3ForConditionalGeneration

Expand All @@ -13,17 +12,11 @@
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

# Oneshot arguments
DATASET_ID = "flickr30k"
DATASET_SPLIT = {"calibration": "test[:512]"}
BATCH_SIZE = 4
NUM_CALIBRATION_SAMPLES = 512
MAX_SEQUENCE_LENGTH = 2048


# Define a oneshot data collator for multimodal inputs.
def data_collator(batch):
assert len(batch) == 1
return {key: torch.tensor(value) for key, value in batch[0].items()}

DATASET_ID = "flickr30k"
DATASET_SPLIT = {"calibration": f"test[:{NUM_CALIBRATION_SAMPLES}]"}

# Recipe
recipe = [
Expand All @@ -41,14 +34,13 @@ def data_collator(batch):
# Perform oneshot
oneshot(
model=model,
tokenizer=model_id,
processor=processor,
dataset=DATASET_ID,
splits=DATASET_SPLIT,
recipe=recipe,
batch_size=BATCH_SIZE,
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
trust_remote_code_model=True,
data_collator=data_collator,
)

# Confirm generations of the quantized model look sane.
Expand Down
15 changes: 4 additions & 11 deletions examples/multimodal_vision/internvl3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,14 @@ def preprocess_and_tokenize(example):
return_dict=True,
return_tensors="pt",
)
return inputs


ds = ds.map(preprocess_and_tokenize)
# remove extra dim added by multimodal processors
inputs = {key: value[0] for key, value in inputs.items()}

return inputs

def data_collator(batch):
assert len(batch) == 1
item = {key: value for key, value in batch[0].items()}
item["attention_mask"] = torch.tensor([item["attention_mask"]])
item["input_ids"] = torch.LongTensor([item["input_ids"]])

return item

ds = ds.map(preprocess_and_tokenize, remove_columns=ds.column_names)

# Recipe
recipe = GPTQModifier(
Expand All @@ -68,7 +62,6 @@ def data_collator(batch):
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
trust_remote_code_model=True,
data_collator=data_collator,
)

# Save to disk compressed.
Expand Down
8 changes: 0 additions & 8 deletions examples/multimodal_vision/llava_example.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import requests
import torch
from PIL import Image
from transformers import AutoProcessor, LlavaForConditionalGeneration

Expand All @@ -19,12 +18,6 @@
MAX_SEQUENCE_LENGTH = 2048


# Define a oneshot data collator for multimodal inputs.
def data_collator(batch):
assert len(batch) == 1
return {key: torch.tensor(value) for key, value in batch[0].items()}


# Recipe
recipe = [
GPTQModifier(
Expand All @@ -44,7 +37,6 @@ def data_collator(batch):
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
trust_remote_code_model=True,
data_collator=data_collator,
sequential_targets=["LlamaDecoderLayer"],
)

Expand Down
22 changes: 11 additions & 11 deletions examples/multimodal_vision/mistral3_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
import requests
import torch
from PIL import Image
from transformers import AutoProcessor, Mistral3ForConditionalGeneration
from transformers import (
AutoProcessor,
Mistral3ForConditionalGeneration,
default_data_collator,
)

from llmcompressor import oneshot
from llmcompressor.modifiers.quantization import GPTQModifier
Expand All @@ -27,17 +31,13 @@
MAX_SEQUENCE_LENGTH = 2048


# Define a oneshot data collator for multimodal inputs.
def data_collator(batch):
assert len(batch) == 1
return {
key: (
torch.tensor(value)
if key != "pixel_values"
else torch.tensor(value, dtype=model.dtype)
# Patch: mismatch between processor and model dtype
def data_collator(features):
for feature in features:
feature["pixel_values"] = torch.tensor(
feature["pixel_values"], dtype=model.dtype
)
for key, value in batch[0].items()
}
return default_data_collator(features, return_tensors="pt")


# Recipe
Expand Down
8 changes: 0 additions & 8 deletions examples/multimodal_vision/mllama_example.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import requests
import torch
from PIL import Image
from transformers import AutoProcessor, MllamaForConditionalGeneration

Expand All @@ -19,12 +18,6 @@
MAX_SEQUENCE_LENGTH = 2048


# Define a oneshot data collator for multimodal inputs.
def data_collator(batch):
assert len(batch) == 1
return {key: torch.tensor(value) for key, value in batch[0].items()}


# Recipe
recipe = [
GPTQModifier(
Expand All @@ -44,7 +37,6 @@ def data_collator(batch):
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
trust_remote_code_model=True,
data_collator=data_collator,
sequential_targets=["MllamaSelfAttentionDecoderLayer"],
)

Expand Down
25 changes: 13 additions & 12 deletions examples/multimodal_vision/pixtral_example.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import requests
import torch
from PIL import Image
from transformers import AutoProcessor, LlavaForConditionalGeneration
from transformers import (
AutoProcessor,
LlavaForConditionalGeneration,
default_data_collator,
)

from llmcompressor import oneshot
from llmcompressor.modifiers.quantization import GPTQModifier
Expand All @@ -19,16 +23,13 @@
MAX_SEQUENCE_LENGTH = 2048


# Define a oneshot data collator for multimodal inputs.
# NOTE: for transformers<4.48.0, please squeeze the first dimension of `pixel_values`
# by appending `[0]` to the end of line 32
def data_collator(batch):
assert len(batch) == 1
return {
"input_ids": torch.LongTensor(batch[0]["input_ids"]),
"attention_mask": torch.tensor(batch[0]["attention_mask"]),
"pixel_values": torch.tensor(batch[0]["pixel_values"]),
}
# Patch: mismatch between processor and model dtype
def data_collator(features):
for feature in features:
feature["pixel_values"] = torch.tensor(
feature["pixel_values"], dtype=model.dtype
)
return default_data_collator(features, return_tensors="pt")


# Recipe
Expand All @@ -46,11 +47,11 @@ def data_collator(batch):
tokenizer=model_id,
dataset=DATASET_ID,
splits={"calibration": f"{DATASET_SPLIT}[:{NUM_CALIBRATION_SAMPLES}]"},
data_collator=data_collator,
recipe=recipe,
max_seq_length=MAX_SEQUENCE_LENGTH,
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
trust_remote_code_model=True,
data_collator=data_collator,
sequential_targets=["MistralDecoderLayer"],
)

Expand Down
42 changes: 33 additions & 9 deletions src/llmcompressor/args/dataset_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,7 @@
"""

from dataclasses import dataclass, field
from typing import Any, Callable

from transformers import DefaultDataCollator
from typing import Callable


@dataclass
Expand Down Expand Up @@ -69,9 +67,27 @@ class CustomDatasetArguments(DVCDatasetArguments):
},
)

data_collator: Callable[[Any], Any] = field(
default_factory=lambda: DefaultDataCollator(),
metadata={"help": "The function to used to form a batch from the dataset"},
batch_size: int = field(
default=1,
metadata={
"help": (
"Calibration batch size. During calibration, LLM Compressor disables "
"lm_head output computations to reduce memory usage from large "
"batch sizes. Large batch sizes may result in excess padding or "
"truncation, depending on the data_collator"
)
},
)

data_collator: str | Callable = field(
default="truncation",
metadata={
"help": (
"The function to used to form a batch from the dataset. Can also "
"specify 'truncation' or 'padding' to truncate or pad non-uniform "
"sequence lengths in a batch. Defaults to 'padding'."
)
},
)


Expand Down Expand Up @@ -126,8 +142,8 @@ class DatasetArguments(CustomDatasetArguments):
default=512,
metadata={"help": "Number of samples to use for one-shot calibration"},
)
shuffle_calibration_samples: bool | None = field(
default=True,
shuffle_calibration_samples: bool = field(
default=False,
metadata={
"help": "whether to shuffle the dataset before selecting calibration data"
},
Expand All @@ -142,7 +158,7 @@ class DatasetArguments(CustomDatasetArguments):
)
preprocessing_num_workers: int | None = field(
default=None,
metadata={"help": "The number of processes to use for the preprocessing."},
metadata={"help": "The number of workers to use for dataset processing."},
)
pad_to_max_length: bool = field(
default=True,
Expand Down Expand Up @@ -214,6 +230,14 @@ class DatasetArguments(CustomDatasetArguments):
"definition"
},
)
offload_sequential_activations: bool = field(
default=True,
metadata={
"help": "Whether to offload intermediate activations between sequential "
"layers to the CPU. Disabling offloading is much faster, but uses "
"signficiantly more memory. Default is True."
},
)
quantization_aware_calibration: bool = field(
default=True,
metadata={
Expand Down
Loading