File tree Expand file tree Collapse file tree 3 files changed +20
-2
lines changed Expand file tree Collapse file tree 3 files changed +20
-2
lines changed Original file line number Diff line number Diff line change @@ -495,4 +495,9 @@ int getEnvMoeA2ACombineBlockSize()
495495 return kBlock ;
496496}
497497
498+ bool getEnvEplbForceGdrcopy ()
499+ {
500+ return getBoolEnv (" TRTLLM_EPLB_FORCE_GDRCOPY" );
501+ }
502+
498503} // namespace tensorrt_llm::common
Original file line number Diff line number Diff line change @@ -147,4 +147,6 @@ int getEnvMoeA2ACombineBlockSize();
147147
148148bool getEnvKVCacheTransferAllBlocksForWindow ();
149149
150+ bool getEnvEplbForceGdrcopy ();
151+
150152} // namespace tensorrt_llm::common
Original file line number Diff line number Diff line change 2727#include " topologyDetector.h"
2828
2929#include " tensorrt_llm/common/cudaUtils.h"
30+ #include " tensorrt_llm/common/envUtils.h"
3031#include " tensorrt_llm/common/logger.h"
3132
3233namespace tensorrt_llm ::runtime
@@ -169,7 +170,8 @@ bool HostAccessibleDeviceAllocator::mAllowManagedFallback = false;
169170
170171bool HostAccessibleDeviceAllocator::isSupported ()
171172{
172- if (TopologyDetector::getInstance ().getCurrentGpuMemoryNumaId () >= 0 )
173+ if (!tensorrt_llm::common::getEnvEplbForceGdrcopy ()
174+ && TopologyDetector::getInstance ().getCurrentGpuMemoryNumaId () >= 0 )
173175 {
174176 // we are on systems that GPU memory is also a NUMA node.
175177 return true ;
@@ -195,7 +197,16 @@ void HostAccessibleDeviceAllocator::init()
195197 }
196198
197199 TLLM_CUDA_CHECK (cudaGetDevice (&mDevId ));
198- mGpuMemNumaId = TopologyDetector::getInstance ().getCurrentGpuMemoryNumaId ();
200+ if (tensorrt_llm::common::getEnvEplbForceGdrcopy ())
201+ {
202+ mGpuMemNumaId = -1 ;
203+ TLLM_LOG_INFO (" Force using GDRCopy for EPLB, ignore NUMA node for GPU memory." );
204+ }
205+ else
206+ {
207+ mGpuMemNumaId = TopologyDetector::getInstance ().getCurrentGpuMemoryNumaId ();
208+ }
209+
199210 if (mGpuMemNumaId < 0 )
200211 {
201212 // We only use GDRCopy when there is no NUMA node for GPU memory.
You can’t perform that action at this time.
0 commit comments