diff --git a/QEfficient/__init__.py b/QEfficient/__init__.py index 3c9f68efd..e71bee177 100644 --- a/QEfficient/__init__.py +++ b/QEfficient/__init__.py @@ -35,7 +35,17 @@ from QEfficient.peft import QEffAutoPeftModelForCausalLM from QEfficient.transformers.transform import transform from QEfficient.utils import custom_format_warning -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") + +# custom warning for the better logging experience +warnings.formatwarning = custom_format_warning + + +# custom warning for the better logging experience +warnings.formatwarning = custom_format_warning + # custom warning for the better logging experience warnings.formatwarning = custom_format_warning diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py index b5c838a94..1155734aa 100644 --- a/QEfficient/base/modeling_qeff.py +++ b/QEfficient/base/modeling_qeff.py @@ -7,7 +7,6 @@ import gc import inspect -import logging import shutil import subprocess import warnings @@ -35,8 +34,9 @@ load_json, ) from QEfficient.utils.export_utils import export_wrapper +from QEfficient.utils.logging_utils import QEFFLogger -logger = logging.getLogger(__name__) +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") class QEFFBaseModel(ABC): @@ -326,6 +326,7 @@ def _export( self.prefill_onnx_path = onnx_path else: self.onnx_path = onnx_path + logger.info("Model export is finished and saved at: %s", onnx_path) return onnx_path def get_onnx_path( @@ -539,4 +540,5 @@ def _compile( logger.info("Hashed parameters exported successfully.") self.qpc_path = qpc_path + logger.info("Model compilation is finished and saved at: %s", qpc_path) return qpc_path diff --git a/QEfficient/base/pytorch_transforms.py b/QEfficient/base/pytorch_transforms.py index e503a057f..dd8d9c0b8 100644 --- a/QEfficient/base/pytorch_transforms.py +++ b/QEfficient/base/pytorch_transforms.py @@ -9,7 +9,9 @@ from torch import nn -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") class PytorchTransform: diff --git a/QEfficient/cloud/export.py b/QEfficient/cloud/export.py index a5e0b6e19..48d10c700 100644 --- a/QEfficient/cloud/export.py +++ b/QEfficient/cloud/export.py @@ -12,7 +12,9 @@ from QEfficient.base.common import QEFFCommonLoader from QEfficient.utils import check_and_assign_cache_dir from QEfficient.utils.custom_yaml import generate_custom_io -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") # Specifically for Docker images. ROOT_DIR = os.path.dirname(os.path.abspath("")) diff --git a/QEfficient/cloud/finetune.py b/QEfficient/cloud/finetune.py index 35ebbde32..3e67e3354 100644 --- a/QEfficient/cloud/finetune.py +++ b/QEfficient/cloud/finetune.py @@ -29,10 +29,12 @@ from QEfficient.finetune.utils.dataset_utils import get_dataloader, get_longest_seq_length from QEfficient.finetune.utils.device_map import get_device_map from QEfficient.finetune.utils.helper import Task_Mode, get_world_size -from QEfficient.finetune.utils.logging_utils import logger from QEfficient.finetune.utils.parser import get_finetune_parser from QEfficient.finetune.utils.train_utils import print_model_size, print_trainable_parameters, train from QEfficient.utils._utils import hf_download +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("FT", loglevel="INFO") # Try importing QAIC-specific module, proceed without it if unavailable try: diff --git a/QEfficient/cloud/infer.py b/QEfficient/cloud/infer.py index ef05d29ab..3a5e20ca6 100644 --- a/QEfficient/cloud/infer.py +++ b/QEfficient/cloud/infer.py @@ -17,7 +17,9 @@ from QEfficient.base.common import QEFFCommonLoader from QEfficient.utils import check_and_assign_cache_dir, load_hf_processor, load_hf_tokenizer -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") # TODO: Remove after adding support for VLM's compile and execute diff --git a/QEfficient/compile/compile_helper.py b/QEfficient/compile/compile_helper.py index 5de21f876..bf83013f4 100644 --- a/QEfficient/compile/compile_helper.py +++ b/QEfficient/compile/compile_helper.py @@ -15,7 +15,9 @@ from QEfficient.compile.qnn_compiler import compile as qnn_compile from QEfficient.utils import constants from QEfficient.utils._utils import load_json, load_yaml -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") def create_and_dump_specializations( diff --git a/QEfficient/compile/qnn_compiler.py b/QEfficient/compile/qnn_compiler.py index e2ec20364..9fcadb6d0 100644 --- a/QEfficient/compile/qnn_compiler.py +++ b/QEfficient/compile/qnn_compiler.py @@ -18,7 +18,9 @@ generate_qnn_specialization, ) from QEfficient.utils.hash_utils import to_hashable -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") class QNN: diff --git a/QEfficient/diffusers/models/transformers/transformer_flux.py b/QEfficient/diffusers/models/transformers/transformer_flux.py index 40b7e3e7e..c76ed2e29 100644 --- a/QEfficient/diffusers/models/transformers/transformer_flux.py +++ b/QEfficient/diffusers/models/transformers/transformer_flux.py @@ -18,8 +18,9 @@ _get_qkv_projections, ) -from QEfficient.diffusers.models.modeling_utils import compute_blocked_attention, get_attention_blocking_config -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") def qeff_apply_rotary_emb( diff --git a/QEfficient/diffusers/pipelines/flux/pipeline_flux.py b/QEfficient/diffusers/pipelines/flux/pipeline_flux.py index eeb260c53..52401eae9 100644 --- a/QEfficient/diffusers/pipelines/flux/pipeline_flux.py +++ b/QEfficient/diffusers/pipelines/flux/pipeline_flux.py @@ -38,7 +38,9 @@ set_module_device_ids, ) from QEfficient.generation.cloud_infer import QAICInferenceSession -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") class QEffFluxPipeline: diff --git a/QEfficient/diffusers/pipelines/pipeline_utils.py b/QEfficient/diffusers/pipelines/pipeline_utils.py index 135a6bd07..71ba9da4d 100644 --- a/QEfficient/diffusers/pipelines/pipeline_utils.py +++ b/QEfficient/diffusers/pipelines/pipeline_utils.py @@ -18,7 +18,9 @@ from tqdm import tqdm from QEfficient.utils._utils import load_json -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") def calculate_compressed_latent_dimension(height: int, width: int, vae_scale_factor: int) -> int: diff --git a/QEfficient/exporter/export_hf_to_cloud_ai_100.py b/QEfficient/exporter/export_hf_to_cloud_ai_100.py index 2547d9db3..73f9d00cf 100644 --- a/QEfficient/exporter/export_hf_to_cloud_ai_100.py +++ b/QEfficient/exporter/export_hf_to_cloud_ai_100.py @@ -20,7 +20,9 @@ from QEfficient.utils import load_hf_tokenizer from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants from QEfficient.utils.generate_inputs import InputHandler -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") def convert_to_cloud_bertstyle( diff --git a/QEfficient/finetune/dataset/alpaca_dataset.py b/QEfficient/finetune/dataset/alpaca_dataset.py index ff44860eb..f25fa9767 100644 --- a/QEfficient/finetune/dataset/alpaca_dataset.py +++ b/QEfficient/finetune/dataset/alpaca_dataset.py @@ -11,7 +11,9 @@ import torch from torch.utils.data import Dataset -from QEfficient.finetune.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("FT", loglevel="INFO") PROMPT_DICT = { "prompt_input": ( diff --git a/QEfficient/finetune/dataset/custom_dataset.py b/QEfficient/finetune/dataset/custom_dataset.py index ef76e83ed..2a98a3ca4 100644 --- a/QEfficient/finetune/dataset/custom_dataset.py +++ b/QEfficient/finetune/dataset/custom_dataset.py @@ -9,7 +9,9 @@ import logging from pathlib import Path -from QEfficient.finetune.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("FT", loglevel="INFO") def load_module_from_py_file(py_file: str) -> object: diff --git a/QEfficient/finetune/dataset/grammar_dataset.py b/QEfficient/finetune/dataset/grammar_dataset.py index 8fb3eb152..4a2e4658d 100644 --- a/QEfficient/finetune/dataset/grammar_dataset.py +++ b/QEfficient/finetune/dataset/grammar_dataset.py @@ -10,7 +10,9 @@ from datasets import load_dataset from torch.utils.data import Dataset -from QEfficient.finetune.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("FT", loglevel="INFO") class grammar(Dataset): diff --git a/QEfficient/finetune/utils/config_utils.py b/QEfficient/finetune/utils/config_utils.py index 0c8b3d827..89d1753d9 100644 --- a/QEfficient/finetune/utils/config_utils.py +++ b/QEfficient/finetune/utils/config_utils.py @@ -20,7 +20,9 @@ from QEfficient.finetune.configs.training import TrainConfig from QEfficient.finetune.dataset.dataset_config import DATASET_PREPROC from QEfficient.finetune.utils.helper import Peft_Method -from QEfficient.finetune.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("FT", loglevel="INFO") def update_config(config, **kwargs): diff --git a/QEfficient/finetune/utils/dataset_utils.py b/QEfficient/finetune/utils/dataset_utils.py index 01c1e32aa..9e15a7322 100644 --- a/QEfficient/finetune/utils/dataset_utils.py +++ b/QEfficient/finetune/utils/dataset_utils.py @@ -16,7 +16,9 @@ from QEfficient.finetune.data.sampler import DistributedLengthBasedBatchSampler from QEfficient.finetune.dataset.dataset_config import DATALOADER_COLLATE_FUNC, DATASET_PREPROC from QEfficient.finetune.utils.helper import get_world_size -from QEfficient.finetune.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("FT", loglevel="INFO") def get_preprocessed_dataset( diff --git a/QEfficient/finetune/utils/plot_metrics.py b/QEfficient/finetune/utils/plot_metrics.py index 1e22bc6a8..4ea307dff 100644 --- a/QEfficient/finetune/utils/plot_metrics.py +++ b/QEfficient/finetune/utils/plot_metrics.py @@ -11,7 +11,9 @@ import matplotlib.pyplot as plt -from QEfficient.finetune.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("FT", loglevel="INFO") def plot_metric(data, metric_name, x_label, y_label, title, colors): diff --git a/QEfficient/generation/embedding_handler.py b/QEfficient/generation/embedding_handler.py index e07b5dd04..c33906b61 100644 --- a/QEfficient/generation/embedding_handler.py +++ b/QEfficient/generation/embedding_handler.py @@ -23,7 +23,9 @@ from QEfficient.generation.cloud_infer import QAICInferenceSession from QEfficient.utils import constants -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") class VisionHandler: diff --git a/QEfficient/generation/text_generation_inference.py b/QEfficient/generation/text_generation_inference.py index de10c9b88..0103beec3 100755 --- a/QEfficient/generation/text_generation_inference.py +++ b/QEfficient/generation/text_generation_inference.py @@ -19,9 +19,11 @@ from QEfficient.generation.cloud_infer import QAICInferenceSession from QEfficient.utils import padding_check_and_fix from QEfficient.utils.constants import Constants -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger from QEfficient.utils.sampler_utils import validate_sampler_inputs +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") + @dataclass class PerfMetrics: @@ -1316,4 +1318,5 @@ def generate( generated_ids=self._qaic_model.generated_ids, perf_metrics=perf_metrics, ) + logger.info("Text Generated finised") return latency_stats diff --git a/QEfficient/generation/vlm_generation.py b/QEfficient/generation/vlm_generation.py index adacc373e..3dfd68efb 100644 --- a/QEfficient/generation/vlm_generation.py +++ b/QEfficient/generation/vlm_generation.py @@ -37,7 +37,9 @@ ) from QEfficient.utils import LRUCache from QEfficient.utils.constants import Constants -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") class VisionLanguageGeneration(QEffTextGenerationBase): diff --git a/QEfficient/peft/auto.py b/QEfficient/peft/auto.py index 6c7173072..28c19601e 100644 --- a/QEfficient/peft/auto.py +++ b/QEfficient/peft/auto.py @@ -6,7 +6,6 @@ # ---------------------------------------------------------------------------- import hashlib -import logging import warnings from typing import List, Optional, Union @@ -32,8 +31,9 @@ from QEfficient.utils import constants from QEfficient.utils._utils import get_padding_shape_from_config from QEfficient.utils.hash_utils import to_hashable +from QEfficient.utils.logging_utils import QEFFLogger -logger = logging.getLogger(__name__) +logger = QEFFLogger.get_logger("FT", loglevel="INFO") class QEffAutoPeftModelForCausalLM(QEFFBaseModel): diff --git a/QEfficient/peft/lora/auto.py b/QEfficient/peft/lora/auto.py index 8ff8335f5..07f61c1e9 100644 --- a/QEfficient/peft/lora/auto.py +++ b/QEfficient/peft/lora/auto.py @@ -19,7 +19,9 @@ from QEfficient.peft.lora.pytorch_transforms import LoraModelInputsTransform, TargetModulesTransform from QEfficient.utils import constants, get_padding_shape_from_config from QEfficient.utils.hash_utils import to_hashable -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("FT", loglevel="INFO") class QEffAutoLoraModelForCausalLM(QEFFAutoModelForCausalLM): diff --git a/QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py b/QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py index 3efe890b8..48eaf6f54 100644 --- a/QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py +++ b/QEfficient/transformers/models/gpt_oss/modeling_gpt_oss.py @@ -33,7 +33,9 @@ from QEfficient.transformers.cache_utils import QEffHybridCacheForGPTOSS from QEfficient.transformers.modeling_attn_mask_utils import _create_causal_mask from QEfficient.utils.constants import MIN_MASKED_ATTENTION_VALUE -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") class QEffGptOssExperts(GptOssExperts): diff --git a/QEfficient/transformers/models/internvl/modeling_internvl.py b/QEfficient/transformers/models/internvl/modeling_internvl.py index b47db7eda..2b30c2822 100644 --- a/QEfficient/transformers/models/internvl/modeling_internvl.py +++ b/QEfficient/transformers/models/internvl/modeling_internvl.py @@ -13,7 +13,9 @@ from QEfficient.utils import constants from QEfficient.utils._utils import IOInfo, get_padding_shape_from_config -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") class QEffInternEncoderWrapper(nn.Module): diff --git a/QEfficient/transformers/models/llava/modeling_llava.py b/QEfficient/transformers/models/llava/modeling_llava.py index abdb77ea5..ec8c7909d 100644 --- a/QEfficient/transformers/models/llava/modeling_llava.py +++ b/QEfficient/transformers/models/llava/modeling_llava.py @@ -15,7 +15,9 @@ ) from QEfficient.utils._utils import IOInfo -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") BS = 1 FBS = 4 diff --git a/QEfficient/transformers/models/llava_next/modeling_llava_next.py b/QEfficient/transformers/models/llava_next/modeling_llava_next.py index 627f7393e..3fe5d03bd 100755 --- a/QEfficient/transformers/models/llava_next/modeling_llava_next.py +++ b/QEfficient/transformers/models/llava_next/modeling_llava_next.py @@ -18,7 +18,9 @@ from QEfficient.utils import constants from QEfficient.utils._utils import IOInfo -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") BS = constants.ONNX_EXPORT_EXAMPLE_BATCH_SIZE FBS = constants.ONNX_EXPORT_EXAMPLE_FBS diff --git a/QEfficient/transformers/models/mistral3/modeling_mistral3.py b/QEfficient/transformers/models/mistral3/modeling_mistral3.py index d2149b6bd..a277ce67c 100644 --- a/QEfficient/transformers/models/mistral3/modeling_mistral3.py +++ b/QEfficient/transformers/models/mistral3/modeling_mistral3.py @@ -21,7 +21,9 @@ from QEfficient.utils import constants from QEfficient.utils._utils import IOInfo, get_padding_shape_from_config -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") def custom_cumsum(tensor): diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py index 236f6c9f5..4dd37c18c 100644 --- a/QEfficient/transformers/models/modeling_auto.py +++ b/QEfficient/transformers/models/modeling_auto.py @@ -69,9 +69,11 @@ get_padding_shape_from_config, ) from QEfficient.utils.check_ccl_specializations import process_ccl_specializations -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger from QEfficient.utils.sampler_utils import get_sampling_inputs_and_outputs +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") + class QEFFTransformersBase(QEFFBaseModel): """ diff --git a/QEfficient/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py b/QEfficient/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py index 21d2e026e..6c5cd854d 100644 --- a/QEfficient/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +++ b/QEfficient/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py @@ -38,7 +38,9 @@ from QEfficient.utils import constants from QEfficient.utils._utils import IOInfo, get_padding_shape_from_config from QEfficient.utils.constants import MIN_MASKED_ATTENTION_VALUE -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") def qeff_apply_rotary_pos_emb(q, k, cos, sin, position_ids, mrope_section, unsqueeze_dim=1): diff --git a/QEfficient/transformers/quantizers/quantizer_awq.py b/QEfficient/transformers/quantizers/quantizer_awq.py index ef8a03521..0968bdd27 100644 --- a/QEfficient/transformers/quantizers/quantizer_awq.py +++ b/QEfficient/transformers/quantizers/quantizer_awq.py @@ -15,7 +15,9 @@ replace_linear_layer_with_target_layer, replace_quantization_scales, ) -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") class QEffAwqConfig(AwqConfig): diff --git a/QEfficient/transformers/quantizers/quantizer_compressed_tensors.py b/QEfficient/transformers/quantizers/quantizer_compressed_tensors.py index e7e14166d..86d7ae139 100644 --- a/QEfficient/transformers/quantizers/quantizer_compressed_tensors.py +++ b/QEfficient/transformers/quantizers/quantizer_compressed_tensors.py @@ -14,7 +14,9 @@ from transformers.utils.quantization_config import CompressedTensorsConfig, QuantizationConfigMixin, QuantizationMethod from QEfficient.transformers.quantizers.quantizer_utils import get_keys_to_not_convert -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") FP8_DTYPE = torch.float8_e4m3fn diff --git a/QEfficient/transformers/quantizers/quantizer_gptq.py b/QEfficient/transformers/quantizers/quantizer_gptq.py index 8a0bea1a2..073f370e1 100644 --- a/QEfficient/transformers/quantizers/quantizer_gptq.py +++ b/QEfficient/transformers/quantizers/quantizer_gptq.py @@ -15,7 +15,9 @@ repack_zeros, replace_linear_layer_with_target_layer, ) -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") class QEffGPTQConfig(GPTQConfig): diff --git a/QEfficient/transformers/quantizers/quantizer_mxfp4.py b/QEfficient/transformers/quantizers/quantizer_mxfp4.py index 2ffba1bea..bfbfe473e 100644 --- a/QEfficient/transformers/quantizers/quantizer_mxfp4.py +++ b/QEfficient/transformers/quantizers/quantizer_mxfp4.py @@ -14,7 +14,9 @@ from transformers.utils.quantization_config import Mxfp4Config from QEfficient.transformers.quantizers.quantizer_utils import convert_moe_packed_tensors, get_keys_to_not_convert -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("MODEL", loglevel="INFO") class QEffMxfp4GptOssExperts(nn.Module): diff --git a/QEfficient/transformers/transform.py b/QEfficient/transformers/transform.py index 11d7c1dfd..ecbc16a38 100644 --- a/QEfficient/transformers/transform.py +++ b/QEfficient/transformers/transform.py @@ -13,7 +13,9 @@ from QEfficient.base.modeling_qeff import QEFFBaseModel from QEfficient.transformers.cache_utils import QEffDynamicCache from QEfficient.transformers.modeling_utils import TransformersToQEffModulesDict -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") def replace_module_with_qeff_layers(model: nn.Module) -> None: diff --git a/QEfficient/utils/_utils.py b/QEfficient/utils/_utils.py index 26bae7a34..21d60fa1c 100644 --- a/QEfficient/utils/_utils.py +++ b/QEfficient/utils/_utils.py @@ -28,7 +28,9 @@ from QEfficient.utils.constants import KWARGS_INCLUSION_LIST, QEFF_MODELS_DIR, Constants, QnnConstants from QEfficient.utils.hash_utils import json_serializable -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") class LRUCache: diff --git a/QEfficient/utils/device_utils.py b/QEfficient/utils/device_utils.py index a76dfae8a..72e5b423e 100644 --- a/QEfficient/utils/device_utils.py +++ b/QEfficient/utils/device_utils.py @@ -10,7 +10,9 @@ import subprocess from QEfficient.utils.constants import Constants -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") def is_networks_loaded(stdout): diff --git a/QEfficient/utils/export_utils.py b/QEfficient/utils/export_utils.py index 33ba694cf..140a961e2 100644 --- a/QEfficient/utils/export_utils.py +++ b/QEfficient/utils/export_utils.py @@ -17,9 +17,11 @@ from QEfficient.transformers.models.pytorch_transforms import get_decoder_layer_classes_for_export from QEfficient.utils.cache import QEFF_HOME from QEfficient.utils.hash_utils import create_export_hash -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger from QEfficient.utils.torch_patches import apply_torch_patches, undo_torch_patches +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") + def export_wrapper(func): """ diff --git a/QEfficient/utils/logging_utils.py b/QEfficient/utils/logging_utils.py index d2086d830..f2659dff6 100644 --- a/QEfficient/utils/logging_utils.py +++ b/QEfficient/utils/logging_utils.py @@ -5,54 +5,332 @@ # # ----------------------------------------------------------------------------- +import json import logging +import os +import queue +import threading +from datetime import datetime +from logging.handlers import RotatingFileHandler +from typing import Any, Dict, List, Optional +from tabulate import tabulate -class QEffFormatter(logging.Formatter): - """ - Formatter class used to set colors for printing different logging levels of messages on console. + +class JSONNamespaceFormatter(logging.Formatter): """ + Custom formatter to output log records in JSON format with metadata. + + Methods: + format(record): Formats a log record into a JSON string. - cyan: str = "\x1b[38;5;14m" - yellow: str = "\x1b[33;20m" - red: str = "\x1b[31;20m" - bold_red: str = "\x1b[31;1m" - reset: str = "\x1b[0m" - common_format: str = "%(levelname)s - %(name)s - %(message)s" # type: ignore - format_with_line_info = "%(levelname)s - %(name)s - %(message)s (%(filename)s:%(lineno)d)" # type: ignore - - FORMATS = { - logging.DEBUG: cyan + format_with_line_info + reset, - logging.INFO: cyan + common_format + reset, - logging.WARNING: yellow + common_format + reset, - logging.ERROR: red + format_with_line_info + reset, - logging.CRITICAL: bold_red + format_with_line_info + reset, - } + Parameters: + record (logging.LogRecord): The log record to format. + + Returns: + str: JSON-formatted log string. + """ def format(self, record): + log_record = { + "date": datetime.fromtimestamp(record.created).strftime("%Y-%m-%d"), + "time": datetime.fromtimestamp(record.created).strftime("%H:%M:%S"), + "level": record.levelname, + "namespace": getattr(record, "namespace", "default"), + "file": record.filename, + "line": record.lineno, + "message": record.getMessage(), + } + return json.dumps(log_record) + + +class QEFFLoggerThread(threading.Thread): + """ + Background thread to handle logging asynchronously using a queue. + + Attributes: + logger (logging.Logger): Logger instance to handle log records. + log_queue (queue.Queue): Queue from which log records are consumed. + running (bool): Flag to control thread execution. + """ + + def __init__(self, logger, log_queue): + """ + Initialize the logging thread. + + Parameters: + logger (logging.Logger): Logger instance. + log_queue (queue.Queue): Queue for log records. + """ + super().__init__(daemon=True) + self.logger = logger + self.log_queue = log_queue + self.running = True + + def run(self): + """ + Continuously process log records from the queue and pass them to the logger. + """ + while self.running: + try: + record = self.log_queue.get(timeout=1) + self.logger.handle(record) + except queue.Empty: + continue + + def stop(self): """ - Overriding the base class method to Choose format based on log level. + Stop the logging thread gracefully. """ - log_fmt = self.FORMATS.get(record.levelno) - formatter = logging.Formatter(log_fmt) - return formatter.format(record) + self.running = False -def create_logger() -> logging.Logger: +class QEFFLogger: """ - Creates a logger object with Colored QEffFormatter. + Singleton logger class for structured logging with namespace support. + + Class Attributes: + _instance (Optional[logging.Logger]): Singleton logger instance. + _logfile (Optional[str]): Path to the log file. + _log_queue (queue.Queue): Queue for asynchronous logging. + _logger_thread (Optional[QEFFLoggerThread]): Background logging thread. """ - logger = logging.getLogger("QEfficient") - # create console handler and set level to debug - ch = logging.StreamHandler() - ch.setLevel(logging.INFO) - # define formatter - ch.setFormatter(QEffFormatter()) + _instance: Optional[logging.Logger] = None + _logfile: Optional[str] = None + _log_queue: queue.Queue = queue.Queue() + _logger_thread: Optional[QEFFLoggerThread] = None + + def __init__(self, loglevel: Optional[str] = "INFO", log_path: Optional[str] = None): + """ + Initialize the logger instance with specified log level and path. + + Parameters: + loglevel (str): Logging level (e.g., "INFO", "DEBUG"). + log_path (str): Optional path to the log file. + """ + if QEFFLogger._instance is None: + self.loglevel = loglevel + self.log_path = log_path + self.logger = self._initialize_logger() + QEFFLogger._instance = self.logger + QEFFLogger._logger_thread = QEFFLoggerThread(self.logger, QEFFLogger._log_queue) + QEFFLogger._logger_thread.start() + + def _initialize_logger(self) -> logging.Logger: + """ + Set up the logger with rotating file handler and JSON formatter. + + Returns: + logging.Logger: Configured logger instance. + """ + if self.log_path is None: + log_dir = os.path.expanduser("~/.cache/qefficient_logs") + os.makedirs(log_dir, exist_ok=True) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + self.log_path = os.path.join(log_dir, f"QEFF_{timestamp}.log") + + QEFFLogger._logfile = self.log_path + + numeric_level = getattr(logging, self.loglevel.upper(), None) + if not isinstance(numeric_level, int): + raise ValueError(f"Invalid log level: {self.loglevel}") + + logger = logging.getLogger("QEFF_LOGGER") + logger.setLevel(numeric_level) + + if not logger.handlers: + handler = RotatingFileHandler(self.log_path, maxBytes=5 * 1024 * 1024, backupCount=10) + handler.setFormatter(JSONNamespaceFormatter()) + logger.addHandler(handler) + + return logger + + @classmethod + def get_logger( + cls, namespace: str, loglevel: Optional[str] = "INFO", log_path: Optional[str] = None + ) -> logging.Logger: + """ + Retrieve a logger adapter with a specific namespace. + + Parameters: + namespace (str): Logical grouping for the log. + loglevel (str): Logging level. + log_path (str): Optional path to the log file. + + Returns: + logging.Logger: Logger adapter with namespace. + """ + if cls._instance is None: + cls(loglevel, log_path) + return logging.LoggerAdapter(cls._instance, {"namespace": namespace}) + + @classmethod + def log(cls, level: str, namespace: str, msg: str, fn: str = "", lno: int = 0, func: str = ""): + """ + Log a message with specified level and metadata. + + Parameters: + level (str): Logging level (e.g., "INFO", "ERROR"). + namespace (str): Logical grouping for the log. + msg (str): Log message. + fn (str): Filename where the log is generated. + lno (int): Line number in the file. + func (str): Function name. + """ + if cls._instance is None: + raise RuntimeError("Logger has not been initialized. Call get_logger() first.") + + level_num = getattr(logging, level.upper(), None) + if not isinstance(level_num, int): + raise ValueError(f"Invalid log level: {level}") + + record = cls._instance.makeRecord( + name="QEFF_LOGGER", + level=level_num, + fn=fn, + lno=lno, + msg=msg, + args=(), + exc_info=None, + func=func, + extra={"namespace": namespace}, + ) + cls._log_queue.put(record) + + @classmethod + def set_loglevel(cls, loglevel: Optional[str] = "INFO"): + """ + Update the log level of the logger. + + Parameters: + loglevel (str): New log level to set. + """ + if cls._instance is None: + raise RuntimeError("Logger has not been initialized yet. Call get_logger() first.") + + numeric_level = getattr(logging, loglevel.upper(), None) + if not isinstance(numeric_level, int): + raise ValueError(f"Invalid log level: {loglevel}") + + cls._instance.setLevel(numeric_level) + + @classmethod + def close_logger(cls): + """ + Gracefully shut down the logger and its thread. + """ + if cls._logger_thread: + cls._logger_thread.stop() + cls._logger_thread.join() + cls._logger_thread = None + + if cls._instance: + handlers = cls._instance.handlers[:] + for handler in handlers: + handler.close() + cls._instance.removeHandler(handler) + cls._instance = None + cls._logfile = None + + @classmethod + def _parse_dt(cls, date_str: str, time_str: str) -> datetime: + """Parse 'YYYY-MM-DD' and 'HH:MM:SS' into a datetime.""" + return datetime.strptime(f"{date_str} {time_str}", "%Y-%m-%d %H:%M:%S") + + @classmethod + def print_table(cls) -> None: + """ + Parse the line-delimited JSON log in cls._logfile and print timing table with t1 as baseline (0.0s): + - Model Loading : t2 - t1 + - Model Exporting : t3 - t2 + - Model Compilation : t4 - t3 + - Text Generation : t5 - t4 + - Total Time : t5 - t1 + + Milestones (matched to your log sample): + t1: first log line timestamp (baseline) + t2: "PyTorch export successful" + t3: "Transformed ONNX saved" + t4: "Model compilation is finished and saved" + t5: "Text Generated finised" + If t5 is missing, we fall back to "specialization_file_path" as readiness marker. + """ + path = cls._logfile + if not path: + raise FileNotFoundError("Log file path is not set (cls._logfile is None).") + if not os.path.exists(path): + raise FileNotFoundError(f"Log file does not exist: {path}") + + t_start: Optional[datetime] = None + t_export_done: Optional[datetime] = None + t_onnx_saved: Optional[datetime] = None + t_compile_done: Optional[datetime] = None + t_text_done: Optional[datetime] = None + t_text_ready: Optional[datetime] = None + + with open(path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + try: + rec: Dict[str, Any] = json.loads(line) + except json.JSONDecodeError: + continue + + date_str = rec.get("date") + time_str = rec.get("time") + msg = rec.get("message", "") + if not date_str or not time_str: + continue + + ts = cls._parse_dt(date_str, time_str) + + if t_start is None: + t_start = ts + + if ("PyTorch export successful" in msg) and (t_export_done is None): + t_export_done = ts + + if ("Transformed ONNX saved" in msg) and (t_onnx_saved is None): + t_onnx_saved = ts + + if ("Model compilation is finished and saved" in msg) and (t_compile_done is None): + t_compile_done = ts + + if ("Text Generated finised" in msg) and (t_text_done is None): + t_text_done = ts + + if ("specialization_file_path" in msg) and (t_text_ready is None): + t_text_ready = ts + + if t_start is None: + raise ValueError("Could not determine start time (no valid log lines with date/time).") + + if t_text_done is None: + t_text_done = t_text_ready + + t_export_done = t_export_done or t_start + t_onnx_saved = t_onnx_saved or t_export_done + t_compile_done = t_compile_done or t_onnx_saved + t_text_done = t_text_done or t_compile_done + + def to_offset_seconds(t: datetime) -> float: + return (t - t_start).total_seconds() - logger.addHandler(ch) - return logger + o1 = 0.0 + o2 = to_offset_seconds(t_export_done) + o3 = to_offset_seconds(t_onnx_saved) + o4 = to_offset_seconds(t_compile_done) + o5 = to_offset_seconds(t_text_done) + timing_data: List[List[Any]] = [ + ["Model Loading", max(0.0, o2 - o1)], + ["Model Exporting", max(0.0, o3 - o2)], + ["Model Compilation", max(0.0, o4 - o3)], + ["Text Generation", max(0.0, o5 - o4)], + ["Total Time", max(0.0, o5 - o1)], + ] -# Define the logger object that can be used for logging purposes throughout the module. -logger = create_logger() + print(tabulate(timing_data, headers=["Step", "Time (s)"], tablefmt="github", floatfmt=".3f")) diff --git a/QEfficient/utils/sampler_utils.py b/QEfficient/utils/sampler_utils.py index 82a0843bc..847266ae8 100644 --- a/QEfficient/utils/sampler_utils.py +++ b/QEfficient/utils/sampler_utils.py @@ -11,7 +11,9 @@ from QEfficient.utils import constants from QEfficient.utils.constants import Constants -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") def validate_sampler_inputs( diff --git a/examples/onboarding_guide/causallm/example_pytorch_transforms.py b/examples/onboarding_guide/causallm/example_pytorch_transforms.py index ff62588f9..503efc12d 100644 --- a/examples/onboarding_guide/causallm/example_pytorch_transforms.py +++ b/examples/onboarding_guide/causallm/example_pytorch_transforms.py @@ -27,12 +27,6 @@ from types import MethodType from typing import Callable, Optional, Tuple, Union -from QEfficient.transformers.models.blueprint.modeling_blueprint import ( - QEffBlueprintAttention, - QEffBlueprintDecoderLayer, - QEffBlueprintForCausalLM, - QEffBlueprintModel, -) from torch import nn # Example imports for three representative models @@ -62,6 +56,12 @@ from QEfficient.base.pytorch_transforms import ExternalModuleMapperTransform, ModuleMappingTransform from QEfficient.customop import CustomRMSNormAIC from QEfficient.transformers.embeddings.embedding_utils import POOLING_MAP, PooledModel, validate_user_pooling_function +from QEfficient.transformers.models.blueprint.modeling_blueprint import ( + QEffBlueprintAttention, + QEffBlueprintDecoderLayer, + QEffBlueprintForCausalLM, + QEffBlueprintModel, +) from QEfficient.transformers.models.llama.modeling_llama import ( QEffLlamaAttention, QEffLlamaDecoderLayer, diff --git a/examples/performance/on_device_sampling.py b/examples/performance/on_device_sampling.py index da9c5b43b..d64d8c60c 100644 --- a/examples/performance/on_device_sampling.py +++ b/examples/performance/on_device_sampling.py @@ -302,4 +302,4 @@ def main(args, **kwargs): ) compiler_options_dict[key] = value - main(args, **compiler_options_dict) + main(args, **compiler_options_dict) \ No newline at end of file diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile index 3420c025b..67f57c6dd 100644 --- a/scripts/Jenkinsfile +++ b/scripts/Jenkinsfile @@ -252,4 +252,4 @@ pipeline { // deleteDir() // } } -} \ No newline at end of file +} diff --git a/scripts/finetune/run_ft_model.py b/scripts/finetune/run_ft_model.py index f5b64e717..97ffb80cb 100644 --- a/scripts/finetune/run_ft_model.py +++ b/scripts/finetune/run_ft_model.py @@ -14,7 +14,9 @@ from transformers import AutoModelForCausalLM, AutoTokenizer from QEfficient.finetune.configs.training import TrainConfig -from QEfficient.finetune.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("FT", loglevel="INFO") # Suppress all warnings warnings.filterwarnings("ignore") diff --git a/scripts/perplexity_computation/calculate_perplexity.py b/scripts/perplexity_computation/calculate_perplexity.py index e2988a0ae..04ce624ec 100644 --- a/scripts/perplexity_computation/calculate_perplexity.py +++ b/scripts/perplexity_computation/calculate_perplexity.py @@ -18,8 +18,9 @@ from transformers import AutoModelForCausalLM, AutoTokenizer from QEfficient.generation.cloud_infer import QAICInferenceSession +from QEfficient.utils.logging_utils import QEFFLogger -logger = logging.getLogger(__name__) +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") # 1. Data Loading diff --git a/tests/conftest.py b/tests/conftest.py index ba0f341fe..b5037b7d0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,9 +13,11 @@ from transformers import AutoConfig from QEfficient.utils.constants import QEFF_MODELS_DIR -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger from QEfficient.utils.test_utils import ModelConfig +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") + def get_custom_model_config_dict(configs): """ diff --git a/tests/transformers/test_transformer_pytorch_transforms.py b/tests/transformers/test_transformer_pytorch_transforms.py index eb05b3f95..279d49bdf 100644 --- a/tests/transformers/test_transformer_pytorch_transforms.py +++ b/tests/transformers/test_transformer_pytorch_transforms.py @@ -20,7 +20,9 @@ from QEfficient.transformers.quantizers.quant_transforms import AwqToMatmulNbitsTransform, GPTQToMatmulNbitsTransform from QEfficient.transformers.spd.turbo import ResBlock from QEfficient.utils._utils import get_padding_shape_from_config -from QEfficient.utils.logging_utils import logger +from QEfficient.utils.logging_utils import QEFFLogger + +logger = QEFFLogger.get_logger("INFRA", loglevel="INFO") KVCacheTransformTestConfigs = [ ("llama", 3, 32, 128, {"num_key_value_heads": 8, "intermediate_size": 512}, 0.8), diff --git a/tests/utils/test_logger.py b/tests/utils/test_logger.py new file mode 100644 index 000000000..e6bd51c5c --- /dev/null +++ b/tests/utils/test_logger.py @@ -0,0 +1,48 @@ +# ----------------------------------------------------------------------------- +# +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: BSD-3-Clause +# +# ----------------------------------------------------------------------------- + +import threading +import time + +from QEfficient.utils.logging_utils import QEFFLogger + +# ------------------------------- +# Define namespace once +# ------------------------------- +NAMESPACE = "model" + +# ------------------------------- +# Initialize logger +# ------------------------------- +logger = QEFFLogger.get_logger(NAMESPACE, "DEBUG") + + +# ------------------------------- +# Worker function for threads +# ------------------------------- +def log_worker(thread_id): + for i in range(5): + logger.info(f"Thread-{thread_id} logging message {i}") + time.sleep(0.1) + + +# ------------------------------- +# Create and start threads +# ------------------------------- +threads = [] +for t_id in range(3): + t = threading.Thread(target=log_worker, args=(t_id,)) + threads.append(t) + t.start() + +for t in threads: + t.join() + +# ------------------------------- +# Graceful shutdown +# ------------------------------- +QEFFLogger.close_logger()