@@ -120,7 +120,7 @@ def __init__(
120
120
low_gpu_mem_usage : bool = False ,
121
121
low_cpu_mem_usage : bool = False ,
122
122
iters : int = 200 ,
123
- seqlen : int = 2048 ,
123
+ seqlen : int = None ,
124
124
nsamples : int = 128 ,
125
125
sampler : str = "rand" ,
126
126
seed : int = 42 ,
@@ -136,7 +136,7 @@ def __init__(
136
136
act_dynamic : bool = True ,
137
137
to_quant_block_names : Union [str , list ] = None ,
138
138
enable_norm_bias_tuning : bool = False ,
139
- truncation : bool = False ,
139
+ truncation : bool = None ,
140
140
enable_torch_compile : bool = None ,
141
141
** kwargs ,
142
142
):
@@ -152,10 +152,6 @@ def __init__(
152
152
153
153
dataset = self .template .default_dataset if dataset is None else dataset
154
154
155
- if nsamples % batch_size != 0 :
156
- nsamples = (nsamples // batch_size + 1 ) * batch_size
157
- logger .warning (f"'nsamples' is not divisible by 'batch_size', will adjusted to { nsamples } " )
158
-
159
155
from ..calib_dataset import CALIB_DATASETS
160
156
from .mllm_dataset import MLLM_DATASET
161
157
if isinstance (dataset , str ):
@@ -170,17 +166,31 @@ def __init__(
170
166
171
167
if dataset in MLLM_DATASET .keys ():
172
168
truncation = False
173
- batch_size = 1
174
169
seqlen = 512 if seqlen is None else seqlen
170
+ if batch_size != 1 :
171
+ logger .warning (
172
+ f"rest batch_size({ batch_size } ) to 1 and "
173
+ f"gradient_accumulate_steps({ gradient_accumulate_steps } ) "
174
+ f"to { batch_size * gradient_accumulate_steps } , "
175
+ f"cause batch_size={ batch_size } cannot be used for { dataset } " )
176
+ gradient_accumulate_steps = batch_size * gradient_accumulate_steps
177
+ batch_size = 1
175
178
if quant_nontext_module and batch_size != 1 :
176
- logger .warning (f"batch_size({ batch_size } ) cannot be used for calibrating non-text modules,"
177
- "reset to 1" )
179
+ logger .warning (
180
+ f"rest batch_size({ batch_size } ) to 1 and "
181
+ f"gradient_accumulate_steps({ gradient_accumulate_steps } ) "
182
+ f"to { batch_size * gradient_accumulate_steps } , "
183
+ f"cause batch_size={ batch_size } cannot be used for calibrating non-text modules." )
178
184
gradient_accumulate_steps = batch_size * gradient_accumulate_steps
179
185
batch_size = 1
180
186
seqlen = 2048 if seqlen is None else seqlen
181
187
truncation = True if truncation is None else truncation
182
188
self .truncation = truncation
183
189
190
+ if nsamples % batch_size != 0 :
191
+ nsamples = (nsamples // batch_size + 1 ) * batch_size
192
+ logger .warning (f"'nsamples' is not divisible by 'batch_size', will adjusted to { nsamples } " )
193
+
184
194
super (AutoRoundMLLM , self ).__init__ (
185
195
model = model ,
186
196
tokenizer = tokenizer ,
@@ -259,7 +269,7 @@ def calib(self, nsamples, bs):
259
269
m = m .to (self .device )
260
270
261
271
total = nsamples if not hasattr (self .dataloader , "len" ) else min (nsamples , len (self .dataloader ))
262
- with tqdm (range (1 , total + 1 ), desc = "calib " ) as pbar :
272
+ with tqdm (range (1 , total + 1 ), desc = "cache block inputs " ) as pbar :
263
273
for data in self .dataloader :
264
274
if data is None :
265
275
pbar .update (1 )
@@ -337,7 +347,7 @@ def calib(self, nsamples, bs):
337
347
exit (- 1 )
338
348
elif total_cnt < nsamples :
339
349
logger .warning (
340
- f"Insufficient number of samples collected may affect the quantification . "
350
+ f"Insufficient number of samples collected may affect the quantization . "
341
351
f"target samples count is { nsamples } , while valid samples count is { total_cnt } "
342
352
)
343
353
if total_cnt < self .batch_size :
0 commit comments