Skip to content

Commit 198e041

Browse files
committed
add flag for EPLB to force using GDRCopy
Signed-off-by: Dongxu Yang <[email protected]>
1 parent a7c2c8c commit 198e041

File tree

3 files changed

+20
-2
lines changed

3 files changed

+20
-2
lines changed

cpp/tensorrt_llm/common/envUtils.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,4 +450,9 @@ bool getEnvDisableChunkedAttentionInGenPhase()
450450
return getBoolEnv("TRTLLM_DISABLE_CHUNKED_ATTENTION_IN_GEN_PHASE");
451451
}
452452

453+
bool getEnvEplbForceGdrcopy()
454+
{
455+
return getBoolEnv("TRTLLM_EPLB_FORCE_GDRCOPY");
456+
}
457+
453458
} // namespace tensorrt_llm::common

cpp/tensorrt_llm/common/envUtils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,4 +136,6 @@ bool getEnvDisaggBenchmarkGenOnly();
136136
// Whether to disable the chunked-attention in the generation phase.
137137
bool getEnvDisableChunkedAttentionInGenPhase();
138138

139+
bool getEnvEplbForceGdrcopy();
140+
139141
} // namespace tensorrt_llm::common

cpp/tensorrt_llm/runtime/moeLoadBalancer/hostAccessibleDeviceAllocator.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "topologyDetector.h"
2828

2929
#include "tensorrt_llm/common/cudaUtils.h"
30+
#include "tensorrt_llm/common/envUtils.h"
3031
#include "tensorrt_llm/common/logger.h"
3132

3233
namespace tensorrt_llm::runtime
@@ -169,7 +170,8 @@ bool HostAccessibleDeviceAllocator::mAllowManagedFallback = false;
169170

170171
bool HostAccessibleDeviceAllocator::isSupported()
171172
{
172-
if (TopologyDetector::getInstance().getCurrentGpuMemoryNumaId() >= 0)
173+
if (!tensorrt_llm::common::getEnvEplbForceGdrcopy()
174+
&& TopologyDetector::getInstance().getCurrentGpuMemoryNumaId() >= 0)
173175
{
174176
// we are on systems that GPU memory is also a NUMA node.
175177
return true;
@@ -195,7 +197,16 @@ void HostAccessibleDeviceAllocator::init()
195197
}
196198

197199
TLLM_CUDA_CHECK(cudaGetDevice(&mDevId));
198-
mGpuMemNumaId = TopologyDetector::getInstance().getCurrentGpuMemoryNumaId();
200+
if (tensorrt_llm::common::getEnvEplbForceGdrcopy())
201+
{
202+
mGpuMemNumaId = -1;
203+
TLLM_LOG_INFO("Force using GDRCopy for EPLB, ignore NUMA node for GPU memory.");
204+
}
205+
else
206+
{
207+
mGpuMemNumaId = TopologyDetector::getInstance().getCurrentGpuMemoryNumaId();
208+
}
209+
199210
if (mGpuMemNumaId < 0)
200211
{
201212
// We only use GDRCopy when there is no NUMA node for GPU memory.

0 commit comments

Comments
 (0)