File tree Expand file tree Collapse file tree 3 files changed +20
-2
lines changed Expand file tree Collapse file tree 3 files changed +20
-2
lines changed Original file line number Diff line number Diff line change @@ -450,4 +450,9 @@ bool getEnvDisableChunkedAttentionInGenPhase()
450450 return getBoolEnv (" TRTLLM_DISABLE_CHUNKED_ATTENTION_IN_GEN_PHASE" );
451451}
452452
453+ bool getEnvEplbForceGdrcopy ()
454+ {
455+ return getBoolEnv (" TRTLLM_EPLB_FORCE_GDRCOPY" );
456+ }
457+
453458} // namespace tensorrt_llm::common
Original file line number Diff line number Diff line change @@ -136,4 +136,6 @@ bool getEnvDisaggBenchmarkGenOnly();
136136// Whether to disable the chunked-attention in the generation phase.
137137bool getEnvDisableChunkedAttentionInGenPhase ();
138138
139+ bool getEnvEplbForceGdrcopy ();
140+
139141} // namespace tensorrt_llm::common
Original file line number Diff line number Diff line change 2727#include " topologyDetector.h"
2828
2929#include " tensorrt_llm/common/cudaUtils.h"
30+ #include " tensorrt_llm/common/envUtils.h"
3031#include " tensorrt_llm/common/logger.h"
3132
3233namespace tensorrt_llm ::runtime
@@ -169,7 +170,8 @@ bool HostAccessibleDeviceAllocator::mAllowManagedFallback = false;
169170
170171bool HostAccessibleDeviceAllocator::isSupported ()
171172{
172- if (TopologyDetector::getInstance ().getCurrentGpuMemoryNumaId () >= 0 )
173+ if (!tensorrt_llm::common::getEnvEplbForceGdrcopy ()
174+ && TopologyDetector::getInstance ().getCurrentGpuMemoryNumaId () >= 0 )
173175 {
174176 // we are on systems that GPU memory is also a NUMA node.
175177 return true ;
@@ -195,7 +197,16 @@ void HostAccessibleDeviceAllocator::init()
195197 }
196198
197199 TLLM_CUDA_CHECK (cudaGetDevice (&mDevId ));
198- mGpuMemNumaId = TopologyDetector::getInstance ().getCurrentGpuMemoryNumaId ();
200+ if (tensorrt_llm::common::getEnvEplbForceGdrcopy ())
201+ {
202+ mGpuMemNumaId = -1 ;
203+ TLLM_LOG_INFO (" Force using GDRCopy for EPLB, ignore NUMA node for GPU memory." );
204+ }
205+ else
206+ {
207+ mGpuMemNumaId = TopologyDetector::getInstance ().getCurrentGpuMemoryNumaId ();
208+ }
209+
199210 if (mGpuMemNumaId < 0 )
200211 {
201212 // We only use GDRCopy when there is no NUMA node for GPU memory.
You can’t perform that action at this time.
0 commit comments