Add time stamps and space out levels

bkryu · bkryu · commit 73c8eb20dba1 · 2025-11-19T23:35:08.000Z
diff --git a/benchmarks/bench_logging_overhead.py b/benchmarks/bench_logging_overhead.py
@@ -12,14 +12,14 @@
 
 Usage:
     # Set the logging level before running
-    export FLASHINFER_LOGLEVEL_DBG=2
+    export FLASHINFER_LOGLEVEL_DBG=3
     python bench_logging_overhead.py
 
     # Or run with different levels
     FLASHINFER_LOGLEVEL_DBG=0 python bench_logging_overhead.py
     FLASHINFER_LOGLEVEL_DBG=1 python bench_logging_overhead.py
-    FLASHINFER_LOGLEVEL_DBG=2 python bench_logging_overhead.py
     FLASHINFER_LOGLEVEL_DBG=3 python bench_logging_overhead.py
+    FLASHINFER_LOGLEVEL_DBG=5 python bench_logging_overhead.py
 
     # Or use the helper script to run all levels
     bash benchmark_all_levels.sh
@@ -233,8 +233,8 @@ def main():
     level_names = {
         0: "No logging (zero-overhead)",
         1: "Function name only",
-        2: "Name + inputs/outputs + metadata",
-        3: "Name + inputs/outputs + metadata + statistics",
+        3: "Name + inputs/outputs + metadata",
+        5: "Name + inputs/outputs + metadata + statistics",
     }
     print(f"  Level description: {level_names.get(LOGGING_LEVEL, 'Unknown')}")
 
@@ -329,7 +329,7 @@ def main():
     print("RECOMMENDATIONS")
     print("=" * 80)
     print("\nTo benchmark other levels, run:")
-    for level in [0, 1, 2, 3]:
+    for level in [0, 1, 3, 5]:
         if level != LOGGING_LEVEL:
             print(f"  FLASHINFER_LOGLEVEL_DBG={level} python {sys.argv[0]}")
 
diff --git a/flashinfer/api_logging.py b/flashinfer/api_logging.py
@@ -46,10 +46,10 @@ def _substitute_process_id(path: str) -> str:
     os.environ.get("FLASHINFER_LOGDEST_DBG", "stdout")
 )
 
-# Enable cuDNN, cuBLAS, and cuBLASLt API logging when FlashInfer logging level >= 3
+# Enable cuDNN, cuBLAS, and cuBLASLt API logging when FlashInfer logging level >= 5
 # Only override if the user hasn't already configured the logging switch
 # If the switch is not set, we override both the switch and destination as a bundle
-if _API_LOG_LEVEL >= 3:
+if _API_LOG_LEVEL >= 5:
     # cuBLAS logging: Check switch, set both switch and destination
     if "CUBLAS_LOGINFO_DBG" not in os.environ:
         os.environ["CUBLAS_LOGINFO_DBG"] = "1"
@@ -104,7 +104,7 @@ def _setup_logger():
     else:
         handler = logging.FileHandler(_API_LOG_DEST, mode="a")
 
-    # Use a simple formatter (we'll format the detailed content ourselves)
+    # Use a simple formatter (we'll add timestamps manually to key lines)
     formatter = logging.Formatter("%(message)s")
     handler.setFormatter(formatter)
 
@@ -116,14 +116,21 @@ def _setup_logger():
 _setup_logger()
 
 
+def _get_timestamp() -> str:
+    """Get current timestamp in the format [YYYY-MM-DD HH:MM:SS]."""
+    from datetime import datetime
+
+    return datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
+
+
 def _log_system_info():
     """Log system information once at module initialization."""
     if _API_LOG_LEVEL == 0:
         return
 
     lines = []
     lines.append("=" * 80)
-    lines.append("FlashInfer API Logging - System Information")
+    lines.append(f"{_get_timestamp()} FlashInfer API Logging - System Information")
     lines.append("=" * 80)
 
     try:
@@ -179,9 +186,9 @@ def _log_system_info():
         lines.append(f"PyTorch version: {torch.__version__}")
 
         # cuDNN/cuBLAS/cuBLASLt logging status
-        if _API_LOG_LEVEL >= 3:
+        if _API_LOG_LEVEL >= 5:
             lines.append("")
-            lines.append("cuDNN/cuBLAS/cuBLASLt Logging: Enabled (Level 3)")
+            lines.append("cuDNN/cuBLAS/cuBLASLt Logging: Enabled (Level 5)")
             cublas_info = os.environ.get("CUBLAS_LOGINFO_DBG", "not set")
             cublas_dest = os.environ.get("CUBLAS_LOGDEST_DBG", "not set")
             cublaslt_level = os.environ.get("CUBLASLT_LOG_LEVEL", "not set")
@@ -249,7 +256,7 @@ def _format_value(value: Any, level: int, indent: int = 0) -> str:
         if level == 1:
             return f"{indent_str}Tensor(...)"
 
-        # Level 2+: Show metadata
+        # Level 3+: Show metadata
         lines = [f"{indent_str}Tensor("]
         lines.append(f"{indent_str}  shape={tuple(value.shape)}")
         lines.append(f"{indent_str}  stride={tuple(value.stride())}")
@@ -258,8 +265,8 @@ def _format_value(value: Any, level: int, indent: int = 0) -> str:
         lines.append(f"{indent_str}  requires_grad={value.requires_grad}")
         lines.append(f"{indent_str}  is_contiguous={value.is_contiguous()}")
 
-        # Level 3: Add statistics
-        if level >= 3:
+        # Level 5: Add statistics
+        if level >= 5:
             try:
                 # Skip statistics if we're in CUDA graph capture mode
                 # (operations like .min()/.max()/.mean() cause synchronization issues)
@@ -452,11 +459,11 @@ def _log_function_inputs(
     kwargs : dict
         Keyword arguments
     level : int
-        Logging level (2 or 3)
+        Logging level (3 or 5)
     """
     lines = []
     lines.append("=" * 80)
-    lines.append(f"FlashInfer API Call: {func_name}")
+    lines.append(f"{_get_timestamp()} FlashInfer API Call: {func_name}")
     lines.append("-" * 80)
 
     # Log explicitly provided inputs
@@ -499,7 +506,7 @@ def _log_function_outputs(func_name: str, result: Any, level: int) -> None:
     result : Any
         Function return value
     level : int
-        Logging level (2 or 3)
+        Logging level (3 or 5)
     """
     lines = []
     # Log outputs
@@ -524,8 +531,8 @@ def flashinfer_api_log(func: Callable = None) -> Callable:
     FLASHINFER_LOGLEVEL_DBG : int (default: 0)
         - 0: No logging (zero overhead - decorator returns original function)
         - 1: Log function name only (logged BEFORE execution - crash-safe)
-        - 2: Log function name + inputs/outputs with metadata (inputs logged BEFORE execution - crash-safe)
-        - 3: Log function name + inputs/outputs with metadata + tensor statistics (inputs logged BEFORE execution - crash-safe)
+        - 3: Log function name + inputs/outputs with metadata (inputs logged BEFORE execution - crash-safe)
+        - 5: Log function name + inputs/outputs with metadata + tensor statistics (inputs logged BEFORE execution - crash-safe)
 
     FLASHINFER_LOGDEST_DBG : str (default: "stdout")
         - "stdout": Log to standard output
@@ -543,18 +550,20 @@ def flashinfer_api_log(func: Callable = None) -> Callable:
 
     Notes
     -----
+    - Key header lines include a timestamp in the format: [YYYY-MM-DD HH:MM:SS]
+      (e.g., "FlashInfer API Call: function_name", "FlashInfer API Logging - System Information")
     - When FLASHINFER_LOGLEVEL_DBG=0, the decorator has truly zero overhead
       as it returns the original function unchanged.
     - Function names and inputs are logged BEFORE execution:
       - Level 1: Function name only
-      - Levels 2-3: Function name + inputs with metadata
+      - Levels 3-5: Function name + inputs with metadata
       This means critical debugging information is preserved even if the function
       crashes (e.g., CUDA illegal memory access, out-of-bounds, etc.).
-    - Outputs are logged AFTER successful execution for levels 2 and 3.
-    - **CUDA Graph Compatibility**: At level 3, tensor statistics (min/max/mean/nan_count)
+    - Outputs are logged AFTER successful execution for levels 3 and 5.
+    - **CUDA Graph Compatibility**: At level 5, tensor statistics (min/max/mean/nan_count)
       are automatically skipped during CUDA graph capture to avoid synchronization issues.
       The message "[statistics skipped: CUDA graph capture in progress]" will be logged.
-    - **cuDNN/cuBLAS/cuBLASLt Integration**: At level 3, if not already set by the user, the following
+    - **cuDNN/cuBLAS/cuBLASLt Integration**: At level 5, if not already set by the user, the following
       environment variables are automatically configured to enable cuDNN, cuBLAS, and cuBLASLt logging:
       - CUBLAS_LOGINFO_DBG=1, CUBLAS_LOGDEST_DBG=flashinfer_cublas_log_%i.txt
       - CUBLASLT_LOG_LEVEL=2, CUBLASLT_LOG_FILE=flashinfer_cublaslt_log_%i.txt
@@ -588,20 +597,22 @@ def wrapper(*args, **kwargs):
             try:
                 if _API_LOG_LEVEL == 1:
                     # Level 1: Just log function name before execution (crash-safe)
-                    _logger.debug(f"FlashInfer API Call: {func_name}")
-                elif _API_LOG_LEVEL >= 2:
-                    # Level 2+: Log full inputs before execution (crash-safe)
+                    _logger.debug(
+                        f"{_get_timestamp()} FlashInfer API Call: {func_name}"
+                    )
+                elif _API_LOG_LEVEL >= 3:
+                    # Level 3+: Log full inputs before execution (crash-safe)
                     _log_function_inputs(f, func_name, args, kwargs, _API_LOG_LEVEL)
             except Exception as e:
                 _logger.error(f"[LOGGING ERROR in {func_name} (pre-execution)]: {e}")
 
             # Call the original function (may crash here with CUDA errors)
             result = f(*args, **kwargs)
 
-            # Log outputs AFTER successful execution (level 2+ only)
+            # Log outputs AFTER successful execution (level 3+ only)
             try:
-                if _API_LOG_LEVEL >= 2:
-                    # Level 2+: Log outputs (inputs were already logged above)
+                if _API_LOG_LEVEL >= 3:
+                    # Level 3+: Log outputs (inputs were already logged above)
                     _log_function_outputs(func_name, result, _API_LOG_LEVEL)
             except Exception as e:
                 _logger.error(f"[LOGGING ERROR in {func_name} (outputs)]: {e}")
diff --git a/tests/utils/test_logging.py b/tests/utils/test_logging.py
@@ -119,13 +119,13 @@ def test_function(x, y):
         finally:
             Path(log_file).unlink(missing_ok=True)
 
-    def test_level_2_inputs_outputs(self):
-        """Test that level 2 logs inputs and outputs with metadata."""
+    def test_level_3_inputs_outputs(self):
+        """Test that level 3 logs inputs and outputs with metadata."""
         with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".txt") as f:
             log_file = f.name
 
         try:
-            decorator = self.setup_logging(level=2, dest=log_file)
+            decorator = self.setup_logging(level=3, dest=log_file)
 
             @decorator
             def test_function(tensor, value):
@@ -151,19 +151,19 @@ def test_function(tensor, value):
             # Should log outputs
             assert "Output value:" in log_contents
 
-            # Should NOT log statistics (level 3 only)
+            # Should NOT log statistics (level 5 only)
             assert "min=" not in log_contents
             assert "max=" not in log_contents
         finally:
             Path(log_file).unlink(missing_ok=True)
 
-    def test_level_3_statistics(self):
-        """Test that level 3 logs tensor statistics."""
+    def test_level_5_statistics(self):
+        """Test that level 5 logs tensor statistics."""
         with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".txt") as f:
             log_file = f.name
 
         try:
-            decorator = self.setup_logging(level=3, dest=log_file)
+            decorator = self.setup_logging(level=5, dest=log_file)
 
             @decorator
             def test_function(tensor):
@@ -191,7 +191,7 @@ def test_enum_logging(self):
             log_file = f.name
 
         try:
-            decorator = self.setup_logging(level=2, dest=log_file)
+            decorator = self.setup_logging(level=3, dest=log_file)
 
             @decorator
             def test_function(mode: TestEnum, strategy: StringEnum):
@@ -221,7 +221,7 @@ def test_default_parameters(self):
             log_file = f.name
 
         try:
-            decorator = self.setup_logging(level=2, dest=log_file)
+            decorator = self.setup_logging(level=3, dest=log_file)
 
             @decorator
             def test_function(x, y=10, z=20, mode=TestEnum.OPTION_A):
@@ -252,7 +252,7 @@ def test_explicit_vs_default_parameters(self):
             log_file = f.name
 
         try:
-            decorator = self.setup_logging(level=2, dest=log_file)
+            decorator = self.setup_logging(level=3, dest=log_file)
 
             @decorator
             def test_function(x, y=10, z=20):
@@ -316,7 +316,7 @@ def test_crash_safety_inputs_logged_before_execution(self):
             log_file = f.name
 
         try:
-            decorator = self.setup_logging(level=2, dest=log_file)
+            decorator = self.setup_logging(level=3, dest=log_file)
 
             @decorator
             def crashing_function(x, y):
@@ -349,7 +349,7 @@ def test_different_data_types(self):
             log_file = f.name
 
         try:
-            decorator = self.setup_logging(level=2, dest=log_file)
+            decorator = self.setup_logging(level=3, dest=log_file)
 
             @decorator
             def test_function(
@@ -387,7 +387,7 @@ def test_tensor_metadata(self):
             log_file = f.name
 
         try:
-            decorator = self.setup_logging(level=2, dest=log_file)
+            decorator = self.setup_logging(level=3, dest=log_file)
 
             @decorator
             def test_function(tensor):
@@ -420,7 +420,7 @@ def test_nested_structures(self):
             log_file = f.name
 
         try:
-            decorator = self.setup_logging(level=2, dest=log_file)
+            decorator = self.setup_logging(level=3, dest=log_file)
 
             @decorator
             def test_function(nested):
@@ -510,7 +510,7 @@ def test_kwargs_logging(self):
             log_file = f.name
 
         try:
-            decorator = self.setup_logging(level=2, dest=log_file)
+            decorator = self.setup_logging(level=3, dest=log_file)
 
             @decorator
             def test_function(a, b, c):
@@ -534,12 +534,12 @@ def test_function(a, b, c):
 
     @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
     def test_cuda_graph_compatibility(self):
-        """Test that level 3 logging is compatible with CUDA graph capture."""
+        """Test that level 5 logging is compatible with CUDA graph capture."""
         with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".txt") as f:
             log_file = f.name
 
         try:
-            decorator = self.setup_logging(level=3, dest=log_file)
+            decorator = self.setup_logging(level=5, dest=log_file)
 
             @decorator
             def test_cuda_function(tensor):