@@ -25,9 +25,32 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY,
25
25
blockDim.x = blockX;
26
26
blockDim.y = blockY;
27
27
blockDim.z = blockZ;
28
- cudaStream_t stream = 0 ;
28
+ cudaStream_t stream = 0 ; // TODO stream managment
29
29
CUDA_REPORT_IF_ERROR (
30
30
cudaLaunchKernel (kernel, gridDim, blockDim, params, smem, stream));
31
31
}
32
32
33
+ void RTDEF (CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX,
34
+ intptr_t clusterY, intptr_t clusterZ, intptr_t gridX, intptr_t gridY,
35
+ intptr_t gridZ, intptr_t blockX, intptr_t blockY, intptr_t blockZ,
36
+ int32_t smem, void **params, void **extra) {
37
+ cudaLaunchConfig_t config;
38
+ config.gridDim .x = gridX;
39
+ config.gridDim .y = gridY;
40
+ config.gridDim .z = gridZ;
41
+ config.blockDim .x = blockX;
42
+ config.blockDim .y = blockY;
43
+ config.blockDim .z = blockZ;
44
+ config.dynamicSmemBytes = smem;
45
+ config.stream = 0 ; // TODO stream managment
46
+ cudaLaunchAttribute launchAttr[1 ];
47
+ launchAttr[0 ].id = cudaLaunchAttributeClusterDimension;
48
+ launchAttr[0 ].val .clusterDim .x = clusterX;
49
+ launchAttr[0 ].val .clusterDim .y = clusterY;
50
+ launchAttr[0 ].val .clusterDim .z = clusterZ;
51
+ config.numAttrs = 1 ;
52
+ config.attrs = launchAttr;
53
+ CUDA_REPORT_IF_ERROR (cudaLaunchKernelExC (&config, kernel, params));
54
+ }
55
+
33
56
} // extern "C"
0 commit comments