Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
efe5e74
Diffusers support (#604)
quic-amitraj Dec 9, 2025
002618a
Subfunction fixes for KV cache transform (#655)
abhishek-singh591 Dec 10, 2025
455f0cd
[Test]: subfunction test moved to qaic Test Stage (#665)
abukhoy Dec 11, 2025
404651f
Prefill+decode gpt oss (#608)
ochougul Dec 14, 2025
2258c83
Updated tests of onnx_sunfunction (#668)
quic-amitraj Dec 16, 2025
11df7e4
Extend on-device sampling support for dual QPC VLMs (#597)
quic-xiyushi Dec 17, 2025
768c7aa
test: Verify ONNX subfunction usage through model inspection instead …
vbaddi Dec 17, 2025
8d036ee
HOTFIX: Testing the Finetune base CI failure by installing pytorch2.9…
quic-dhirajku Dec 18, 2025
884e77a
Add Support for Guided Decoding to On Device Sampling (#624)
quic-sanising Dec 18, 2025
396c034
Added all the changes for logger
abhishek-singh591 Dec 19, 2025
49c667f
lint and format error fix
abhishek-singh591 Dec 19, 2025
d4120aa
added function which can be used to print table containing time in ea…
abhishek-singh591 Dec 19, 2025
db9ff76
added function which can be used to print table containing time in ea…
abhishek-singh591 Dec 19, 2025
c40a635
minor fixes
abhishek-singh591 Jan 2, 2026
74d6cf5
Prefill+decode gpt oss (#608)
ochougul Dec 14, 2025
0a5ab86
Updated tests of onnx_sunfunction (#668)
quic-amitraj Dec 22, 2025
1969db4
Extend on-device sampling support for dual QPC VLMs (#597)
quic-xiyushi Dec 17, 2025
f080e29
Add Support for Guided Decoding to On Device Sampling (#624)
quic-sanising Dec 18, 2025
a5729ae
Adding memory profiling (#674)
quic-rishinr Dec 19, 2025
0dc4121
HOTFIX: Modified replicate_kv_heads.py script to not run ONNXRT infer…
quic-dhirajku Dec 19, 2025
2e7817f
Add automatic CCL list generation for prefill and decode when user do…
vjanfaza Dec 19, 2025
fa5f540
Adding WAN Lightning support (#669)
tv-karthikeya Dec 20, 2025
2eb237c
Added blocking support to flux (#679)
quic-amitraj Dec 22, 2025
da78151
fixed new NPI for changed ONNX names (#684)
ochougul Dec 22, 2025
b365f36
Updated compile command for subfunction (#681)
quic-amitraj Dec 22, 2025
f2429f3
Disagg hotfix gpt oss (#689)
ochougul Dec 23, 2025
0e51a5b
Minor fixes
abhishek-singh591 Jan 2, 2026
aa91d79
Merge branch 'main' into logger
abhishek-singh591 Jan 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion QEfficient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,17 @@
from QEfficient.peft import QEffAutoPeftModelForCausalLM
from QEfficient.transformers.transform import transform
from QEfficient.utils import custom_format_warning
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")

# custom warning for the better logging experience
warnings.formatwarning = custom_format_warning


# custom warning for the better logging experience
warnings.formatwarning = custom_format_warning


# custom warning for the better logging experience
warnings.formatwarning = custom_format_warning
Expand Down
6 changes: 4 additions & 2 deletions QEfficient/base/modeling_qeff.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import gc
import inspect
import logging
import shutil
import subprocess
import warnings
Expand Down Expand Up @@ -35,8 +34,9 @@
load_json,
)
from QEfficient.utils.export_utils import export_wrapper
from QEfficient.utils.logging_utils import QEFFLogger

logger = logging.getLogger(__name__)
logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


class QEFFBaseModel(ABC):
Expand Down Expand Up @@ -326,6 +326,7 @@ def _export(
self.prefill_onnx_path = onnx_path
else:
self.onnx_path = onnx_path
logger.info("Model export is finished and saved at: %s", onnx_path)
return onnx_path

def get_onnx_path(
Expand Down Expand Up @@ -539,4 +540,5 @@ def _compile(
logger.info("Hashed parameters exported successfully.")

self.qpc_path = qpc_path
logger.info("Model compilation is finished and saved at: %s", qpc_path)
return qpc_path
4 changes: 3 additions & 1 deletion QEfficient/base/pytorch_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

from torch import nn

from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


class PytorchTransform:
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/cloud/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
from QEfficient.base.common import QEFFCommonLoader
from QEfficient.utils import check_and_assign_cache_dir
from QEfficient.utils.custom_yaml import generate_custom_io
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")

# Specifically for Docker images.
ROOT_DIR = os.path.dirname(os.path.abspath(""))
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/cloud/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@
from QEfficient.finetune.utils.dataset_utils import get_dataloader, get_longest_seq_length
from QEfficient.finetune.utils.device_map import get_device_map
from QEfficient.finetune.utils.helper import Task_Mode, get_world_size
from QEfficient.finetune.utils.logging_utils import logger
from QEfficient.finetune.utils.parser import get_finetune_parser
from QEfficient.finetune.utils.train_utils import print_model_size, print_trainable_parameters, train
from QEfficient.utils._utils import hf_download
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")

# Try importing QAIC-specific module, proceed without it if unavailable
try:
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/cloud/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

from QEfficient.base.common import QEFFCommonLoader
from QEfficient.utils import check_and_assign_cache_dir, load_hf_processor, load_hf_tokenizer
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


# TODO: Remove after adding support for VLM's compile and execute
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/compile/compile_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
from QEfficient.compile.qnn_compiler import compile as qnn_compile
from QEfficient.utils import constants
from QEfficient.utils._utils import load_json, load_yaml
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


def create_and_dump_specializations(
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/compile/qnn_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
generate_qnn_specialization,
)
from QEfficient.utils.hash_utils import to_hashable
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


class QNN:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
_get_qkv_projections,
)

from QEfficient.diffusers.models.modeling_utils import compute_blocked_attention, get_attention_blocking_config
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


def qeff_apply_rotary_emb(
Expand Down Expand Up @@ -90,9 +91,9 @@
key = qeff_apply_rotary_emb(key, image_rotary_emb)

# Get blocking configuration
blocking_mode, head_block_size, num_kv_blocks, num_q_blocks = get_attention_blocking_config()

Check failure on line 94 in QEfficient/diffusers/models/transformers/transformer_flux.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F821)

QEfficient/diffusers/models/transformers/transformer_flux.py:94:71: F821 Undefined name `get_attention_blocking_config`
# Apply blocking using pipeline_utils
hidden_states = compute_blocked_attention(

Check failure on line 96 in QEfficient/diffusers/models/transformers/transformer_flux.py

View workflow job for this annotation

GitHub Actions / lint

Ruff (F821)

QEfficient/diffusers/models/transformers/transformer_flux.py:96:25: F821 Undefined name `compute_blocked_attention`
query.transpose(1, 2),
key.transpose(1, 2),
value.transpose(1, 2),
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/diffusers/pipelines/flux/pipeline_flux.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
set_module_device_ids,
)
from QEfficient.generation.cloud_infer import QAICInferenceSession
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEffFluxPipeline:
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/diffusers/pipelines/pipeline_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
from tqdm import tqdm

from QEfficient.utils._utils import load_json
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


def calculate_compressed_latent_dimension(height: int, width: int, vae_scale_factor: int) -> int:
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/exporter/export_hf_to_cloud_ai_100.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from QEfficient.utils import load_hf_tokenizer
from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants
from QEfficient.utils.generate_inputs import InputHandler
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


def convert_to_cloud_bertstyle(
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/finetune/dataset/alpaca_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
import torch
from torch.utils.data import Dataset

from QEfficient.finetune.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")

PROMPT_DICT = {
"prompt_input": (
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/finetune/dataset/custom_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import logging
from pathlib import Path

from QEfficient.finetune.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")


def load_module_from_py_file(py_file: str) -> object:
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/finetune/dataset/grammar_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
from datasets import load_dataset
from torch.utils.data import Dataset

from QEfficient.finetune.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")


class grammar(Dataset):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/finetune/utils/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from QEfficient.finetune.configs.training import TrainConfig
from QEfficient.finetune.dataset.dataset_config import DATASET_PREPROC
from QEfficient.finetune.utils.helper import Peft_Method
from QEfficient.finetune.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")


def update_config(config, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/finetune/utils/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
from QEfficient.finetune.data.sampler import DistributedLengthBasedBatchSampler
from QEfficient.finetune.dataset.dataset_config import DATALOADER_COLLATE_FUNC, DATASET_PREPROC
from QEfficient.finetune.utils.helper import get_world_size
from QEfficient.finetune.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")


def get_preprocessed_dataset(
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/finetune/utils/plot_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

import matplotlib.pyplot as plt

from QEfficient.finetune.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")


def plot_metric(data, metric_name, x_label, y_label, title, colors):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/generation/embedding_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@

from QEfficient.generation.cloud_infer import QAICInferenceSession
from QEfficient.utils import constants
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


class VisionHandler:
Expand Down
5 changes: 4 additions & 1 deletion QEfficient/generation/text_generation_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
from QEfficient.generation.cloud_infer import QAICInferenceSession
from QEfficient.utils import padding_check_and_fix
from QEfficient.utils.constants import Constants
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger
from QEfficient.utils.sampler_utils import validate_sampler_inputs

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


@dataclass
class PerfMetrics:
Expand Down Expand Up @@ -1316,4 +1318,5 @@ def generate(
generated_ids=self._qaic_model.generated_ids,
perf_metrics=perf_metrics,
)
logger.info("Text Generated finised")
return latency_stats
4 changes: 3 additions & 1 deletion QEfficient/generation/vlm_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
)
from QEfficient.utils import LRUCache
from QEfficient.utils.constants import Constants
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("INFRA", loglevel="INFO")


class VisionLanguageGeneration(QEffTextGenerationBase):
Expand Down
4 changes: 2 additions & 2 deletions QEfficient/peft/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# ----------------------------------------------------------------------------

import hashlib
import logging
import warnings
from typing import List, Optional, Union

Expand All @@ -32,8 +31,9 @@
from QEfficient.utils import constants
from QEfficient.utils._utils import get_padding_shape_from_config
from QEfficient.utils.hash_utils import to_hashable
from QEfficient.utils.logging_utils import QEFFLogger

logger = logging.getLogger(__name__)
logger = QEFFLogger.get_logger("FT", loglevel="INFO")


class QEffAutoPeftModelForCausalLM(QEFFBaseModel):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/peft/lora/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
from QEfficient.peft.lora.pytorch_transforms import LoraModelInputsTransform, TargetModulesTransform
from QEfficient.utils import constants, get_padding_shape_from_config
from QEfficient.utils.hash_utils import to_hashable
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("FT", loglevel="INFO")


class QEffAutoLoraModelForCausalLM(QEFFAutoModelForCausalLM):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@
from QEfficient.transformers.cache_utils import QEffHybridCacheForGPTOSS
from QEfficient.transformers.modeling_attn_mask_utils import _create_causal_mask
from QEfficient.utils.constants import MIN_MASKED_ATTENTION_VALUE
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEffGptOssExperts(GptOssExperts):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@

from QEfficient.utils import constants
from QEfficient.utils._utils import IOInfo, get_padding_shape_from_config
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEffInternEncoderWrapper(nn.Module):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/transformers/models/llava/modeling_llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
)

from QEfficient.utils._utils import IOInfo
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")

BS = 1
FBS = 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@

from QEfficient.utils import constants
from QEfficient.utils._utils import IOInfo
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")

BS = constants.ONNX_EXPORT_EXAMPLE_BATCH_SIZE
FBS = constants.ONNX_EXPORT_EXAMPLE_FBS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@

from QEfficient.utils import constants
from QEfficient.utils._utils import IOInfo, get_padding_shape_from_config
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


def custom_cumsum(tensor):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/transformers/models/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@
get_padding_shape_from_config,
)
from QEfficient.utils.check_ccl_specializations import process_ccl_specializations
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger
from QEfficient.utils.sampler_utils import get_sampling_inputs_and_outputs

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEFFTransformersBase(QEFFBaseModel):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@
from QEfficient.utils import constants
from QEfficient.utils._utils import IOInfo, get_padding_shape_from_config
from QEfficient.utils.constants import MIN_MASKED_ATTENTION_VALUE
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


def qeff_apply_rotary_pos_emb(q, k, cos, sin, position_ids, mrope_section, unsqueeze_dim=1):
Expand Down
4 changes: 3 additions & 1 deletion QEfficient/transformers/quantizers/quantizer_awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
replace_linear_layer_with_target_layer,
replace_quantization_scales,
)
from QEfficient.utils.logging_utils import logger
from QEfficient.utils.logging_utils import QEFFLogger

logger = QEFFLogger.get_logger("MODEL", loglevel="INFO")


class QEffAwqConfig(AwqConfig):
Expand Down
Loading
Loading