diff --git a/include/env_config.h b/include/env_config.h index e7b4443..121ae58 100644 --- a/include/env_config.h +++ b/include/env_config.h @@ -72,3 +72,7 @@ void init_analysis(const std::string& analysis_str); // 1 = NDJSON+Zstd (compressed JSON) // 2 = NDJSON only (uncompressed JSON, good for debugging) extern int trace_format_ndjson; + +// Zstd compression level (1-22, higher = better compression but slower) +// Default: 9 (good compression with reasonable speed) +extern int zstd_compression_level; diff --git a/include/trace_writer.h b/include/trace_writer.h index 1c9d2c9..b99b926 100644 --- a/include/trace_writer.h +++ b/include/trace_writer.h @@ -159,7 +159,7 @@ class TraceWriter { // ========== Mode 1 (Zstd compression) support ========== ZSTD_CCtx* zstd_ctx_; // Zstd compression context std::vector compressed_buffer_; // Pre-allocated compression output buffer - int compression_level_; // Zstd compression level (1-22, default 9) + int compression_level_; // Zstd compression level (1-22, default 22) public: /** diff --git a/pr.md b/pr.md new file mode 100644 index 0000000..f3ca399 --- /dev/null +++ b/pr.md @@ -0,0 +1,41 @@ +## Summary + +Add `CUTRACER_ZSTD_LEVEL` environment variable to configure zstd compression level (1-22). This allows users to trade off compression speed vs compression ratio based on their use case. + +## Changes + +- `include/env_config.h`: Add extern declaration for `zstd_compression_level` +- `src/env_config.cu`: Add environment variable reading logic with validation (range 1-22) +- `src/trace_writer.cpp`: Use configurable compression level instead of hardcoded value 22 +- `readme.md`: Document new `CUTRACER_ZSTD_LEVEL` environment variable + +## Configuration + +`CUTRACER_ZSTD_LEVEL`: Zstd compression level (1-22, default 22) +- Lower values (1-3): Faster compression, slightly larger output +- Higher values (19-22): Maximum compression, slower but smallest output +- Default of 22 provides maximum compression for smallest output + +## Motivation + +The default compression level of 22 (maximum) prioritizes compression ratio over speed. For use cases where compression speed is more important, users can set a lower level (e.g., 3) to get nearly the same compression ratio with significantly faster compression. This change allows users to choose the trade-off that best suits their workflow. + +## Example Usage + +```bash +# Fast compression (level 1) +CUTRACER_ZSTD_LEVEL=1 CUDA_INJECTION64_PATH=~/CUTracer/lib/cutracer.so ./app + +# Maximum compression (level 22, default) +CUDA_INJECTION64_PATH=~/CUTracer/lib/cutracer.so ./app + +# Balanced compression (level 9) +CUTRACER_ZSTD_LEVEL=9 CUDA_INJECTION64_PATH=~/CUTracer/lib/cutracer.so ./app +``` + +## Test Plan + +1. Build CUTracer: `make -j$(nproc)` +2. Verify default level works: Run with `TRACE_FORMAT_NDJSON=1` and check output +3. Verify custom level: Set `CUTRACER_ZSTD_LEVEL=1` and verify faster compression +4. Verify validation: Set invalid value (e.g., 25) and verify warning + fallback to default diff --git a/readme.md b/readme.md index 8ece312..a4cfce6 100644 --- a/readme.md +++ b/readme.md @@ -83,6 +83,10 @@ CUDA_INJECTION64_PATH=~/CUTracer/lib/cutracer.so \ - **1** (default): NDJSON+Zstd compressed (`.ndjson.zst`, ~12x compression, 92% space savings) - 0: Plain text (`.log`, legacy format, verbose) - 2: NDJSON uncompressed (`.ndjson`, for debugging) +- `CUTRACER_ZSTD_LEVEL`: Zstd compression level (1-22, default 22) + - Lower values (1-3): Faster compression, slightly larger output + - Higher values (19-22): Maximum compression, slower but smallest output + - Default of 22 provides maximum compression for smallest output Note: The tool sets `CUDA_MANAGED_FORCE_DEVICE_ALLOC=1` to simplify channel memory handling. diff --git a/src/env_config.cu b/src/env_config.cu index 7f653e9..8a1cc37 100644 --- a/src/env_config.cu +++ b/src/env_config.cu @@ -28,6 +28,9 @@ std::unordered_set enabled_analysis_types; // Trace format configuration variable int trace_format_ndjson; +// Zstd compression level +int zstd_compression_level; + /** * @brief Parses a comma-separated string of kernel name filters for substring matching. * @@ -226,6 +229,15 @@ void init_config_from_env() { trace_format_ndjson = 0; } + // Zstd compression level (only used when trace_format_ndjson == 1) + get_var_int(zstd_compression_level, "CUTRACER_ZSTD_LEVEL", 22, "Zstd compression level (1-22, default 22)"); + + // Validate compression level range + if (zstd_compression_level < 1 || zstd_compression_level > 22) { + printf("WARNING: Invalid CUTRACER_ZSTD_LEVEL=%d. Using default=22.\n", zstd_compression_level); + zstd_compression_level = 22; + } + std::string pad(100, '-'); loprintf("%s\n", pad.c_str()); } diff --git a/src/trace_writer.cpp b/src/trace_writer.cpp index 00a5fec..d31bed1 100644 --- a/src/trace_writer.cpp +++ b/src/trace_writer.cpp @@ -16,6 +16,8 @@ #include #include +#include "env_config.h" + // ============================================================================ // Constructor & Destructor // ============================================================================ @@ -28,7 +30,7 @@ TraceWriter::TraceWriter(const std::string& filename, int trace_mode, size_t buf trace_mode_(trace_mode), enabled_(true), zstd_ctx_(nullptr), - compression_level_(22) { // Maximum compression level for best compression ratio + compression_level_(zstd_compression_level) { // Use configurable compression level from env_config // Validate trace mode if (trace_mode < 0 || trace_mode > 2) {