@@ -46,10 +46,10 @@ def _substitute_process_id(path: str) -> str:
4646 os .environ .get ("FLASHINFER_LOGDEST_DBG" , "stdout" )
4747)
4848
49- # Enable cuDNN, cuBLAS, and cuBLASLt API logging when FlashInfer logging level >= 3
49+ # Enable cuDNN, cuBLAS, and cuBLASLt API logging when FlashInfer logging level >= 5
5050# Only override if the user hasn't already configured the logging switch
5151# If the switch is not set, we override both the switch and destination as a bundle
52- if _API_LOG_LEVEL >= 3 :
52+ if _API_LOG_LEVEL >= 5 :
5353 # cuBLAS logging: Check switch, set both switch and destination
5454 if "CUBLAS_LOGINFO_DBG" not in os .environ :
5555 os .environ ["CUBLAS_LOGINFO_DBG" ] = "1"
@@ -104,7 +104,7 @@ def _setup_logger():
104104 else :
105105 handler = logging .FileHandler (_API_LOG_DEST , mode = "a" )
106106
107- # Use a simple formatter (we'll format the detailed content ourselves )
107+ # Use a simple formatter (we'll add timestamps manually to key lines )
108108 formatter = logging .Formatter ("%(message)s" )
109109 handler .setFormatter (formatter )
110110
@@ -116,14 +116,21 @@ def _setup_logger():
116116_setup_logger ()
117117
118118
119+ def _get_timestamp () -> str :
120+ """Get current timestamp in the format [YYYY-MM-DD HH:MM:SS]."""
121+ from datetime import datetime
122+
123+ return datetime .now ().strftime ("[%Y-%m-%d %H:%M:%S]" )
124+
125+
119126def _log_system_info ():
120127 """Log system information once at module initialization."""
121128 if _API_LOG_LEVEL == 0 :
122129 return
123130
124131 lines = []
125132 lines .append ("=" * 80 )
126- lines .append (" FlashInfer API Logging - System Information" )
133+ lines .append (f" { _get_timestamp () } FlashInfer API Logging - System Information" )
127134 lines .append ("=" * 80 )
128135
129136 try :
@@ -179,9 +186,9 @@ def _log_system_info():
179186 lines .append (f"PyTorch version: { torch .__version__ } " )
180187
181188 # cuDNN/cuBLAS/cuBLASLt logging status
182- if _API_LOG_LEVEL >= 3 :
189+ if _API_LOG_LEVEL >= 5 :
183190 lines .append ("" )
184- lines .append ("cuDNN/cuBLAS/cuBLASLt Logging: Enabled (Level 3 )" )
191+ lines .append ("cuDNN/cuBLAS/cuBLASLt Logging: Enabled (Level 5 )" )
185192 cublas_info = os .environ .get ("CUBLAS_LOGINFO_DBG" , "not set" )
186193 cublas_dest = os .environ .get ("CUBLAS_LOGDEST_DBG" , "not set" )
187194 cublaslt_level = os .environ .get ("CUBLASLT_LOG_LEVEL" , "not set" )
@@ -249,7 +256,7 @@ def _format_value(value: Any, level: int, indent: int = 0) -> str:
249256 if level == 1 :
250257 return f"{ indent_str } Tensor(...)"
251258
252- # Level 2 +: Show metadata
259+ # Level 3 +: Show metadata
253260 lines = [f"{ indent_str } Tensor(" ]
254261 lines .append (f"{ indent_str } shape={ tuple (value .shape )} " )
255262 lines .append (f"{ indent_str } stride={ tuple (value .stride ())} " )
@@ -258,8 +265,8 @@ def _format_value(value: Any, level: int, indent: int = 0) -> str:
258265 lines .append (f"{ indent_str } requires_grad={ value .requires_grad } " )
259266 lines .append (f"{ indent_str } is_contiguous={ value .is_contiguous ()} " )
260267
261- # Level 3 : Add statistics
262- if level >= 3 :
268+ # Level 5 : Add statistics
269+ if level >= 5 :
263270 try :
264271 # Skip statistics if we're in CUDA graph capture mode
265272 # (operations like .min()/.max()/.mean() cause synchronization issues)
@@ -452,11 +459,11 @@ def _log_function_inputs(
452459 kwargs : dict
453460 Keyword arguments
454461 level : int
455- Logging level (2 or 3 )
462+ Logging level (3 or 5 )
456463 """
457464 lines = []
458465 lines .append ("=" * 80 )
459- lines .append (f"FlashInfer API Call: { func_name } " )
466+ lines .append (f"{ _get_timestamp () } FlashInfer API Call: { func_name } " )
460467 lines .append ("-" * 80 )
461468
462469 # Log explicitly provided inputs
@@ -499,7 +506,7 @@ def _log_function_outputs(func_name: str, result: Any, level: int) -> None:
499506 result : Any
500507 Function return value
501508 level : int
502- Logging level (2 or 3 )
509+ Logging level (3 or 5 )
503510 """
504511 lines = []
505512 # Log outputs
@@ -524,8 +531,8 @@ def flashinfer_api_log(func: Callable = None) -> Callable:
524531 FLASHINFER_LOGLEVEL_DBG : int (default: 0)
525532 - 0: No logging (zero overhead - decorator returns original function)
526533 - 1: Log function name only (logged BEFORE execution - crash-safe)
527- - 2 : Log function name + inputs/outputs with metadata (inputs logged BEFORE execution - crash-safe)
528- - 3 : Log function name + inputs/outputs with metadata + tensor statistics (inputs logged BEFORE execution - crash-safe)
534+ - 3 : Log function name + inputs/outputs with metadata (inputs logged BEFORE execution - crash-safe)
535+ - 5 : Log function name + inputs/outputs with metadata + tensor statistics (inputs logged BEFORE execution - crash-safe)
529536
530537 FLASHINFER_LOGDEST_DBG : str (default: "stdout")
531538 - "stdout": Log to standard output
@@ -543,18 +550,20 @@ def flashinfer_api_log(func: Callable = None) -> Callable:
543550
544551 Notes
545552 -----
553+ - Key header lines include a timestamp in the format: [YYYY-MM-DD HH:MM:SS]
554+ (e.g., "FlashInfer API Call: function_name", "FlashInfer API Logging - System Information")
546555 - When FLASHINFER_LOGLEVEL_DBG=0, the decorator has truly zero overhead
547556 as it returns the original function unchanged.
548557 - Function names and inputs are logged BEFORE execution:
549558 - Level 1: Function name only
550- - Levels 2-3 : Function name + inputs with metadata
559+ - Levels 3-5 : Function name + inputs with metadata
551560 This means critical debugging information is preserved even if the function
552561 crashes (e.g., CUDA illegal memory access, out-of-bounds, etc.).
553- - Outputs are logged AFTER successful execution for levels 2 and 3 .
554- - **CUDA Graph Compatibility**: At level 3 , tensor statistics (min/max/mean/nan_count)
562+ - Outputs are logged AFTER successful execution for levels 3 and 5 .
563+ - **CUDA Graph Compatibility**: At level 5 , tensor statistics (min/max/mean/nan_count)
555564 are automatically skipped during CUDA graph capture to avoid synchronization issues.
556565 The message "[statistics skipped: CUDA graph capture in progress]" will be logged.
557- - **cuDNN/cuBLAS/cuBLASLt Integration**: At level 3 , if not already set by the user, the following
566+ - **cuDNN/cuBLAS/cuBLASLt Integration**: At level 5 , if not already set by the user, the following
558567 environment variables are automatically configured to enable cuDNN, cuBLAS, and cuBLASLt logging:
559568 - CUBLAS_LOGINFO_DBG=1, CUBLAS_LOGDEST_DBG=flashinfer_cublas_log_%i.txt
560569 - CUBLASLT_LOG_LEVEL=2, CUBLASLT_LOG_FILE=flashinfer_cublaslt_log_%i.txt
@@ -588,20 +597,22 @@ def wrapper(*args, **kwargs):
588597 try :
589598 if _API_LOG_LEVEL == 1 :
590599 # Level 1: Just log function name before execution (crash-safe)
591- _logger .debug (f"FlashInfer API Call: { func_name } " )
592- elif _API_LOG_LEVEL >= 2 :
593- # Level 2+: Log full inputs before execution (crash-safe)
600+ _logger .debug (
601+ f"{ _get_timestamp ()} FlashInfer API Call: { func_name } "
602+ )
603+ elif _API_LOG_LEVEL >= 3 :
604+ # Level 3+: Log full inputs before execution (crash-safe)
594605 _log_function_inputs (f , func_name , args , kwargs , _API_LOG_LEVEL )
595606 except Exception as e :
596607 _logger .error (f"[LOGGING ERROR in { func_name } (pre-execution)]: { e } " )
597608
598609 # Call the original function (may crash here with CUDA errors)
599610 result = f (* args , ** kwargs )
600611
601- # Log outputs AFTER successful execution (level 2 + only)
612+ # Log outputs AFTER successful execution (level 3 + only)
602613 try :
603- if _API_LOG_LEVEL >= 2 :
604- # Level 2 +: Log outputs (inputs were already logged above)
614+ if _API_LOG_LEVEL >= 3 :
615+ # Level 3 +: Log outputs (inputs were already logged above)
605616 _log_function_outputs (func_name , result , _API_LOG_LEVEL )
606617 except Exception as e :
607618 _logger .error (f"[LOGGING ERROR in { func_name } (outputs)]: { e } " )
0 commit comments