@@ -312,6 +312,7 @@ def rtn_quantize(
312
312
model: fake quantized ONNXModel
313
313
"""
314
314
model = model if isinstance (model , BaseModel ) else ONNXModel (model )
315
+ base_dir = os .path .dirname (model .model_path ) if model .model_path is not None else ""
315
316
new_nodes = []
316
317
remove_nodes = []
317
318
for node in model .nodes ():
@@ -321,7 +322,7 @@ def rtn_quantize(
321
322
and weight_config .get (node .name , {}) != "fp32"
322
323
):
323
324
weight_tensor = model .get_initializer (node .input [1 ])
324
- weight = numpy_helper .to_array (weight_tensor , base_dir = os . path . dirname ( model . model_path ) ).copy ()
325
+ weight = numpy_helper .to_array (weight_tensor , base_dir = base_dir ).copy ()
325
326
if len (weight .shape ) != 2 :
326
327
continue
327
328
@@ -401,6 +402,7 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
401
402
new_added_mul_nodes = []
402
403
replace_input = []
403
404
updated_nodes = []
405
+ base_dir = os .path .dirname (model .model_path ) if model .model_path is not None else ""
404
406
405
407
for parent , nodes in absorb_pairs .items ():
406
408
if any ([node .input [0 ] not in output_dicts for node in nodes ]):
@@ -434,7 +436,7 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
434
436
if weight_config .get (node .name , {}) == "fp32" :
435
437
continue
436
438
437
- weight = numpy_helper .to_array (model .get_initializer (node .input [1 ]), os . path . dirname ( model . model_path ) )
439
+ weight = numpy_helper .to_array (model .get_initializer (node .input [1 ]), base_dir )
438
440
if len (weight .shape ) != 2 :
439
441
continue
440
442
@@ -476,7 +478,7 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
476
478
477
479
init_share_num = model .get_initializer_share_num (node .input [1 ])
478
480
weight_tensor = model .get_initializer (node .input [1 ])
479
- tensor = numpy_helper .to_array (weight_tensor , os . path . dirname ( model . model_path ) )
481
+ tensor = numpy_helper .to_array (weight_tensor , base_dir )
480
482
481
483
tensor = tensor .T * best_scale
482
484
tensor = (tensor .T ).astype ("float32" )
@@ -497,7 +499,7 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
497
499
) == len (nodes ):
498
500
for idx in [1 , 2 ]:
499
501
tensor = numpy_helper .to_array (
500
- model .get_initializer (parent .input [idx ]), os . path . dirname ( model . model_path )
502
+ model .get_initializer (parent .input [idx ]), base_dir
501
503
)
502
504
new_tensor = tensor / np .reshape (best_scale , (1 , - 1 ))
503
505
model .set_initializer (parent .input [idx ], new_tensor .astype (tensor .dtype ), raw = True )
@@ -511,7 +513,7 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
511
513
): # pragma: no cover
512
514
for inp in parent .input :
513
515
if model .get_initializer (inp ) is not None :
514
- tensor = numpy_helper .to_array (model .get_initializer (inp ), os . path . dirname ( model . model_path ) )
516
+ tensor = numpy_helper .to_array (model .get_initializer (inp ), base_dir )
515
517
new_tensor = tensor / np .reshape (best_scale , (1 , - 1 ))
516
518
model .set_initializer (inp , new_tensor .astype (tensor .dtype ), raw = True )
517
519
updated_nodes .append (parent .name )
@@ -520,7 +522,7 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
520
522
elif parent .op_type in ["Conv" , "FusedConv" ] and len (model .input_name_to_nodes [nodes [0 ].input [0 ]]) == len (
521
523
nodes
522
524
): # pragma: no cover
523
- tensor = numpy_helper .to_array (model .get_initializer (parent .input [2 ]), os . path . dirname ( model . model_path ) )
525
+ tensor = numpy_helper .to_array (model .get_initializer (parent .input [2 ]), base_dir )
524
526
new_tensor = tensor / np .reshape (best_scale , (1 , - 1 ))
525
527
model .set_initializer (parent .input [2 ], new_tensor .astype (tensor .dtype ), raw = True )
526
528
updated_nodes .append (parent .name )
@@ -558,6 +560,7 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
558
560
559
561
def apply_awq_clip (model , weight_config , absorb_pairs , output_dicts , num_bits , group_size , scheme ):
560
562
"""Apply clip for weight by checking mse."""
563
+ base_dir = os .path .dirname (model .model_path ) if model .model_path is not None else ""
561
564
ratios = {}
562
565
for parent , nodes in absorb_pairs .items ():
563
566
if any ([node .input [0 ] not in output_dicts for node in nodes ]):
@@ -577,7 +580,7 @@ def apply_awq_clip(model, weight_config, absorb_pairs, output_dicts, num_bits, g
577
580
scheme = weight_config [node .name ]["scheme" ]
578
581
579
582
org_weight = numpy_helper .to_array (
580
- model .get_initializer (node .input [1 ]), base_dir = os . path . dirname ( model . model_path )
583
+ model .get_initializer (node .input [1 ]), base_dir = base_dir
581
584
)
582
585
org_w_shape = org_weight .shape # ic, oc
583
586
group_size = group_size if group_size != - 1 else org_w_shape [0 ]
@@ -983,6 +986,7 @@ def gptq_quantize(
983
986
model: fake quantized ONNXModel
984
987
"""
985
988
model = model if isinstance (model , BaseModel ) else ONNXModel (model )
989
+ base_dir = os .path .dirname (model .model_path ) if model .model_path is not None else ""
986
990
output_dicts = {}
987
991
988
992
inputs , so = prepare_inputs (model , n_samples , dataloader )
@@ -1028,7 +1032,7 @@ def gptq_quantize(
1028
1032
and weight_config .get (node .name , {}).get ("algorithm" , "GPTQ" ) == "GPTQ"
1029
1033
):
1030
1034
weight = numpy_helper .to_array (
1031
- model .get_initializer (model .get_node (node .name ).input [1 ]), os . path . dirname ( model . model_path )
1035
+ model .get_initializer (model .get_node (node .name ).input [1 ]), base_dir
1032
1036
).copy ()
1033
1037
if len (weight .shape ) != 2 :
1034
1038
continue
0 commit comments