Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions csrc/nv_internal/cpp/common/envUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,11 +222,6 @@ bool getEnvDisaggLayerwise() {
return disaggLayerwise;
}

bool getEnvParallelCacheSend() {
static bool const parallelCacheSend = getBoolEnv("TRTLLM_PARALLEL_CACHE_SEND");
return parallelCacheSend;
}

bool getEnvRequestKVCacheConcurrent() {
static bool const requestKVCacheConcurrent = getBoolEnv("TRTLLM_REQUEST_KV_CACHE_CONCURRENT");
return requestKVCacheConcurrent;
Expand Down Expand Up @@ -277,7 +272,7 @@ size_t getEnvAllReduceWorkspaceSize() {
return workspaceSize;
}

std::string getEnvKVCacheTransferOutputPath() {
std::string const& getEnvKVCacheTimeOutputPath() {
static std::string outputPath = getStrEnv("TRTLLM_KVCACHE_TIME_OUTPUT_PATH").value_or("");
return outputPath;
}
Expand Down Expand Up @@ -328,4 +323,37 @@ uint16_t getEnvNixlPort() {

bool getEnvDisaggBenchmarkGenOnly() { return getBoolEnv("TRTLLM_DISAGG_BENCHMARK_GEN_ONLY"); }

bool getEnvMoeA2AOneBlockPerToken() {
// Default true; return false only if env set to "0"
static std::optional<int32_t> const val = getIntEnv("TLLM_MOE_A2A_ONE_BLOCK_PER_TOKEN");
if (!val.has_value()) {
return true;
}
return val.value() != 0;
}

static int sanitizeBlockSize(std::optional<int32_t> const& val) {
// Default 256 when not set or invalid
int block = val.value_or(256);
// Clamp to sane CUDA bounds and warp multiples
if (block <= 0) block = 256;
if (block > 1024) block = 1024;
// Round to nearest multiple of 32 (warp size)
block = (block + 31) / 32 * 32;
if (block == 0) block = 256;
return block;
}

int getEnvMoeA2ADispatchBlockSize() {
static int const kBlock = sanitizeBlockSize(getIntEnv("TLLM_MOE_A2A_DISPATCH_BLOCK_SIZE"));
return kBlock;
}

int getEnvMoeA2ACombineBlockSize() {
static int const kBlock = sanitizeBlockSize(getIntEnv("TLLM_MOE_A2A_COMBINE_BLOCK_SIZE"));
return kBlock;
}

bool getEnvEplbForceGdrcopy() { return getBoolEnv("TRTLLM_EPLB_FORCE_GDRCOPY"); }

} // namespace tensorrt_llm::common
11 changes: 10 additions & 1 deletion csrc/nv_internal/tensorrt_llm/common/envUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ bool getEnvDisableKVCacheTransferOverlap();

bool getEnvEnableReceiveKVCacheParallel();

std::string getEnvKVCacheTransferOutputPath();
std::string const& getEnvKVCacheTimeOutputPath();

bool getEnvTryZCopyForKVCacheTransfer();

Expand Down Expand Up @@ -92,4 +92,13 @@ size_t getEnvKVCacheSendMaxConcurrenceNum();

size_t getEnvMemSizeForKVCacheTransferBuffer();

// Whether to use one block per token for MoE A2A kernels (default true).
bool getEnvMoeA2AOneBlockPerToken();

// TODO: For DEV purpose temporarily.
// Block size (threads per block) for MoE A2A Dispatch kernels (default 256 if unset or invalid)
int getEnvMoeA2ADispatchBlockSize();
// Block size (threads per block) for MoE A2A Combine kernels (default 256 if unset or invalid)
int getEnvMoeA2ACombineBlockSize();

} // namespace tensorrt_llm::common
Loading