Skip to content

Commit c500e1d

Browse files
authored
Merge pull request #70 from InfiniTensor/fix_alignment
查询获取设备对齐值
2 parents 0bc8305 + 322835d commit c500e1d

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

src/02hardware/src/devices/nvidia/device.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,23 @@ namespace refactor::hardware {
2424
size_t free, total;
2525
CUDA_ASSERT(cudaMemGetInfo(&free, &total));
2626
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
27-
fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}",
28-
card, free, total, size);
27+
cudaDeviceProp prop;
28+
CUDA_ASSERT(cudaGetDeviceProperties(&prop, 0));
29+
size_t alignment = prop.textureAlignment;
30+
fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}, alignment {}",
31+
card, free, total, size, alignment);
2932
return std::make_shared<MemPool>(
3033
std::make_shared<NvidiaMemory>(),
3134
size,
32-
256ul);
35+
alignment);
3336
#else
3437
RUNTIME_ERROR("CUDA is not enabled");
3538
#endif
3639
}
3740

3841
Nvidia::Nvidia(int32_t card) : Device(card, cudaMemory(card)) {}
3942

40-
void Nvidia::setContext() const {
43+
void Nvidia::setContext() const {
4144
#ifdef USE_CUDA
4245
CUDA_ASSERT(cudaSetDevice(_card));
4346
#endif

0 commit comments

Comments
 (0)