Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion QEfficient/base/modeling_qeff.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ def _compile(
custom_io: Optional[Dict[str, str]] = None,
mdp_ts_num_devices: int = 1,
num_speculative_tokens: Optional[int] = None,
mxfp6_matmul: bool = constants.DEFAULT_AIC_MXFP6_MATMUL,
enable_qnn: Optional[bool] = False,
qnn_config: Optional[str] = None,
**compiler_options,
Expand All @@ -307,6 +308,7 @@ def _compile(
:custom_io (dict): Custom IO to specify the input and outputs in different formats than default
:mdp_ts_num_devices (int): Number of devices to partition to use Multi-Device Partitioning with tensor-slicing.
:num_speculative_tokens (int, optional): Number of speculative tokens to take as input for Speculative Decoding Target Language Model.
:mxfp6_matmul (bool): Use MXFP6 to compress weights for MatMul nodes to run faster on device. ``Defaults to False``.
:enable_qnn (bool): Enables QNN Compilation. ``Defaults to False.``
:qnn_config (str): Path of QNN Config parameters file. Any extra parameters for QNN compilation can be passed via this file. ``Defaults to None.``
:compiler_options: Pass any compiler option as input.
Expand Down Expand Up @@ -337,7 +339,7 @@ def _compile(
custom_io=custom_io,
device_group=list(range(mdp_ts_num_devices)),
num_cores=compiler_options.get("aic_num_cores", constants.DEFAULT_AIC_NUM_CORES),
mxfp6=compiler_options.get("mxfp6_matmul", constants.DEFAULT_AIC_MXPF6_MATMUL),
mxfp6=mxfp6_matmul,
mxint8=mxint8_kv_cache,
qnn_config=qnn_config,
)
Expand All @@ -349,6 +351,9 @@ def _compile(
if mdp_ts_json_path := compiler_options.pop("mdp_load_partition_config", None):
command.append(f"-mdp-load-partition-config={mdp_ts_json_path}")

if mxfp6_matmul:
command.append("-mxfp6-matmul")

for key, value in compiler_options.items():
option = "-" + key.replace("_", "-")
if isinstance(value, bool):
Expand Down
11 changes: 10 additions & 1 deletion QEfficient/compile/qnn_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,17 @@ def parse_qnn_config(self):
for key, value in config_data.items():
if key == QnnConstants.CONVERTER_ARGS_EXTENSION_STR:
self.check_extension_arg(key, value, QnnConstants.IMMUTABLE_CONVERTER_ARGS)
if key == QnnConstants.CONTEXT_BIN_ARGS_EXTENSION_STR:
elif key == QnnConstants.CONTEXT_BIN_ARGS_EXTENSION_STR:
self.check_extension_arg(key, value, QnnConstants.IMMUTABLE_CONTEXT_BIN_GEN_ARGS)
elif key == QnnConstants.QNN_COMPILATION_BACKEND_STR:
immutable_param = [
sub_key for sub_key in value.keys() if sub_key in QnnConstants.IMMUTABLE_COMPILATION_BACKEND_ARGS
]
if immutable_param:
raise AttributeError(
f"Immutable Parameters {immutable_param} found in {QnnConstants.QNN_COMPILATION_BACKEND_STR}. Please remove them from QNN Configuration file."
)

self.qnn_config[key] = value

def create_qnn_tensor_slicing_json(self) -> str:
Expand Down
6 changes: 5 additions & 1 deletion QEfficient/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

# Compiler defaults
DEFAULT_AIC_NUM_CORES = 16
DEFAULT_AIC_MXPF6_MATMUL = False
DEFAULT_AIC_MXFP6_MATMUL = False
# Hashing defaults
HASH_HEXDIGEST_STR_LEN = 16
KWARGS_INCLUSION_LIST = [
Expand Down Expand Up @@ -207,6 +207,10 @@ class QnnConstants:
"--config_file ",
]

IMMUTABLE_COMPILATION_BACKEND_ARGS = [
"compiler_mxfp6_matmul_weights",
]

QNN_SAMPLE_CONFIG = {
"converter_args_extension": "--onnx_defer_loading",
"context_binary_generator_args_extension": "--log_level debug",
Expand Down
Loading