Skip to content

Commit 00eaf5f

Browse files
authored
[None][feat] add flag for EPLB to force using GDRCopy (#8650)
Signed-off-by: Dongxu Yang <[email protected]>
1 parent 19ca7b1 commit 00eaf5f

File tree

3 files changed

+20
-2
lines changed

3 files changed

+20
-2
lines changed

cpp/tensorrt_llm/common/envUtils.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,4 +495,9 @@ int getEnvMoeA2ACombineBlockSize()
495495
return kBlock;
496496
}
497497

498+
bool getEnvEplbForceGdrcopy()
499+
{
500+
return getBoolEnv("TRTLLM_EPLB_FORCE_GDRCOPY");
501+
}
502+
498503
} // namespace tensorrt_llm::common

cpp/tensorrt_llm/common/envUtils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,4 +147,6 @@ int getEnvMoeA2ACombineBlockSize();
147147

148148
bool getEnvKVCacheTransferAllBlocksForWindow();
149149

150+
bool getEnvEplbForceGdrcopy();
151+
150152
} // namespace tensorrt_llm::common

cpp/tensorrt_llm/runtime/moeLoadBalancer/hostAccessibleDeviceAllocator.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "topologyDetector.h"
2828

2929
#include "tensorrt_llm/common/cudaUtils.h"
30+
#include "tensorrt_llm/common/envUtils.h"
3031
#include "tensorrt_llm/common/logger.h"
3132

3233
namespace tensorrt_llm::runtime
@@ -169,7 +170,8 @@ bool HostAccessibleDeviceAllocator::mAllowManagedFallback = false;
169170

170171
bool HostAccessibleDeviceAllocator::isSupported()
171172
{
172-
if (TopologyDetector::getInstance().getCurrentGpuMemoryNumaId() >= 0)
173+
if (!tensorrt_llm::common::getEnvEplbForceGdrcopy()
174+
&& TopologyDetector::getInstance().getCurrentGpuMemoryNumaId() >= 0)
173175
{
174176
// we are on systems that GPU memory is also a NUMA node.
175177
return true;
@@ -195,7 +197,16 @@ void HostAccessibleDeviceAllocator::init()
195197
}
196198

197199
TLLM_CUDA_CHECK(cudaGetDevice(&mDevId));
198-
mGpuMemNumaId = TopologyDetector::getInstance().getCurrentGpuMemoryNumaId();
200+
if (tensorrt_llm::common::getEnvEplbForceGdrcopy())
201+
{
202+
mGpuMemNumaId = -1;
203+
TLLM_LOG_INFO("Force using GDRCopy for EPLB, ignore NUMA node for GPU memory.");
204+
}
205+
else
206+
{
207+
mGpuMemNumaId = TopologyDetector::getInstance().getCurrentGpuMemoryNumaId();
208+
}
209+
199210
if (mGpuMemNumaId < 0)
200211
{
201212
// We only use GDRCopy when there is no NUMA node for GPU memory.

0 commit comments

Comments
 (0)