1313# limitations under the License.
1414
1515
16+ import logging
1617import warnings
1718from pathlib import Path
1819from typing import Any , Callable , Dict , List , Optional , Union
3536 UnavailableError ,
3637)
3738
38- try :
39- from nemo .utils import logging
40- except (ImportError , ModuleNotFoundError ):
41- import logging
42-
43- logging = logging .getLogger (__name__ )
39+ logger = logging .getLogger (__name__ )
4440
4541try :
4642 import modelopt .torch .quantization as mtq
@@ -90,15 +86,15 @@ def wrapper(*args, **kwargs):
9086try :
9187 from pytriton .decorators import batch
9288except Exception :
93- logging .warning ("PyTriton is not available." )
89+ logger .warning ("PyTriton is not available." )
9490 use_pytriton = False
9591
9692
9793use_onnxruntime = True
9894try :
9995 import onnxruntime
10096except Exception :
101- logging .warning ("onnxruntime is not available." )
97+ logger .warning ("onnxruntime is not available." )
10298 use_onnxruntime = False
10399
104100
@@ -255,7 +251,7 @@ def _export_to_onnx(
255251 verbose = verbose ,
256252 opset_version = opset ,
257253 )
258- logging .info (f"Successfully exported PyTorch model to ONNX model { self .onnx_model_path } " )
254+ logger .info (f"Successfully exported PyTorch model to ONNX model { self .onnx_model_path } " )
259255
260256 existing_directory_path = Path (self .onnx_model_dir ) / "tokenizer"
261257 existing_directory_path .mkdir (exist_ok = True )
@@ -285,7 +281,7 @@ def export_onnx_to_trt(
285281 if not HAVE_TENSORRT :
286282 raise UnavailableError (MISSING_TENSORRT_MSG )
287283
288- logging .info (f"Building TRT engine from ONNX model ({ self .onnx_model_path } )" )
284+ logger .info (f"Building TRT engine from ONNX model ({ self .onnx_model_path } )" )
289285 trt_logger = trt .Logger (trt .Logger .WARNING )
290286 builder = trt .Builder (trt_logger )
291287 network = builder .create_network (1 << int (trt .NetworkDefinitionCreationFlag .EXPLICIT_BATCH ))
@@ -295,9 +291,9 @@ def export_onnx_to_trt(
295291 # we use parse_from_file() instead of parse() because it can be used for both single
296292 # file models as well as externally stored models (required when model >2GiB)
297293 if not parser .parse_from_file (self .onnx_model_path ):
298- logging .warning ("ONNX model could not be parsed" )
294+ logger .warning ("ONNX model could not be parsed" )
299295 for error in range (parser .num_errors ):
300- logging .error (parser .get_error (error ))
296+ logger .error (parser .get_error (error ))
301297 return
302298
303299 if profiles :
@@ -316,22 +312,22 @@ def export_onnx_to_trt(
316312 config .add_optimization_profile (optimization_profile )
317313
318314 if trt_dtype == "fp16" :
319- logging .info ("Setting Build Flag FP16" )
315+ logger .info ("Setting Build Flag FP16" )
320316 config .set_flag (trt .BuilderFlag .FP16 )
321317 elif trt_dtype == "fp8" :
322318 # With FP8 export we want to also enable FP16 layers as a fallback instead of FP32
323- logging .info ("Setting Build Flag FP8 and FP16" )
319+ logger .info ("Setting Build Flag FP8 and FP16" )
324320 config .set_flag (trt .BuilderFlag .FP8 )
325321 config .set_flag (trt .BuilderFlag .FP16 )
326322 validate_fp8_network (network )
327323
328324 # patch network
329325 if override_layernorm_precision_to_fp32 :
330- logging .info ("Overriding TensorRT network LayerNorm precision to float32." )
326+ logger .info ("Overriding TensorRT network LayerNorm precision to float32." )
331327 self ._override_layernorm_precision_to_fp32 (network )
332328
333329 if override_layers_to_fp32 :
334- logging .info ("Overriding some layers to float32." )
330+ logger .info ("Overriding some layers to float32." )
335331 self ._override_layers_to_fp32 (network , override_layers_to_fp32 )
336332
337333 try :
@@ -343,7 +339,7 @@ def export_onnx_to_trt(
343339 except KeyError :
344340 error_msg = "Unknown profiling verbosity value."
345341 raise ValueError (error_msg )
346- logging .info (f"Setting Profiling Verbosity to { config .profiling_verbosity } " )
342+ logger .info (f"Setting Profiling Verbosity to { config .profiling_verbosity } " )
347343
348344 if trt_builder_flags is not None :
349345 for flag in trt_builder_flags :
@@ -357,7 +353,7 @@ def export_onnx_to_trt(
357353 trt_model_path .mkdir (parents = True , exist_ok = True )
358354 trt_model_path = trt_model_path / "model.plan"
359355 trt_model_path .write_bytes (engine_string )
360- logging .info (f"Successfully exported ONNX model ({ self .onnx_model_path } ) to TRT engine ({ trt_model_path } )" )
356+ logger .info (f"Successfully exported ONNX model ({ self .onnx_model_path } ) to TRT engine ({ trt_model_path } )" )
361357
362358 def _override_layer_precision_to_fp32 (self , layer : trt .ILayer ) -> None :
363359 if not HAVE_TENSORRT :
@@ -378,7 +374,7 @@ def _override_layers_to_fp32(self, network: trt.INetworkDefinition, fp32_layer_p
378374 trt .float16 ,
379375 }:
380376 if layer .type in {trt .LayerType .CAST }:
381- logging .info (f"Skipping overriding { layer .type } layer { i } { layer_name } dtype" )
377+ logger .info (f"Skipping overriding { layer .type } layer { i } { layer_name } dtype" )
382378 continue
383379 if any (
384380 layer .get_input (input_idx ).dtype in {trt .float32 , trt .float16 }
@@ -387,11 +383,11 @@ def _override_layers_to_fp32(self, network: trt.INetworkDefinition, fp32_layer_p
387383 # Note: Assigning to layer.precision (even the same value) sets precision_is_set=True,
388384 # which prevents TensorRT from changing this layer's precision
389385 layer .precision = trt .float32
390- logging .info (f"Setting layer { i } { layer_name } (type: { layer .type } ) precision to FP32" )
386+ logger .info (f"Setting layer { i } { layer_name } (type: { layer .type } ) precision to FP32" )
391387 for j in range (layer .num_outputs ):
392388 if layer .get_output_type (j ) in {trt .float32 , trt .float16 }:
393389 layer .set_output_type (j , trt .float32 )
394- logging .info (f"Setting layer { i } { layer_name } (type: { layer .type } ) output type { j } to FP32" )
390+ logger .info (f"Setting layer { i } { layer_name } (type: { layer .type } ) output type { j } to FP32" )
395391
396392 def _override_layernorm_precision_to_fp32 (self , network : trt .INetworkDefinition ) -> None :
397393 """Set the precision of LayerNorm subgraphs to FP32 to preserve accuracy.
@@ -506,9 +502,9 @@ def quantize(
506502 )
507503 quant_cfg = QUANT_CFG_CHOICES [quant_cfg ]
508504
509- logging .info ("Starting quantization..." )
505+ logger .info ("Starting quantization..." )
510506 mtq .quantize (self .model , quant_cfg , forward_loop = forward_loop )
511- logging .info ("Quantization is completed." )
507+ logger .info ("Quantization is completed." )
512508
513509 @property
514510 def get_model (self ):
0 commit comments