|
| 1 | + |
| 2 | +#include "kul/gpu.hpp" |
| 3 | + |
| 4 | +static constexpr size_t WIDTH = 1024, HEIGHT = 1024; |
| 5 | +static constexpr size_t NUM = WIDTH * HEIGHT; |
| 6 | +static constexpr size_t THREADS_PER_BLOCK_X = 16, THREADS_PER_BLOCK_Y = 16; |
| 7 | + |
| 8 | +template<typename Float, bool GPU = false> |
| 9 | +struct DevClass : kul::gpu::DeviceClass<GPU> |
| 10 | +{ |
| 11 | + using Super = kul::gpu::DeviceClass<GPU>; |
| 12 | + using gpu_t = DevClass<Float, true>; |
| 13 | + |
| 14 | + template<typename T> |
| 15 | + using container_t = typename Super::template container_t<T>; |
| 16 | + |
| 17 | + template<bool gpu = GPU, std::enable_if_t<!gpu, bool> = 0> |
| 18 | + DevClass(std::uint32_t nbr) |
| 19 | + : data{nbr} |
| 20 | + { |
| 21 | + } |
| 22 | + |
| 23 | + template<bool gpu = GPU, std::enable_if_t<!gpu, bool> = 0> |
| 24 | + DevClass(std::vector<Float> const& in) |
| 25 | + : data{in} |
| 26 | + { |
| 27 | + } |
| 28 | + |
| 29 | + template<bool gpu = GPU, std::enable_if_t<!gpu, bool> = 0> |
| 30 | + auto operator()() |
| 31 | + { |
| 32 | + return Super::template alloc<gpu_t>(data); |
| 33 | + } |
| 34 | + |
| 35 | + template<bool gpu = GPU, std::enable_if_t<gpu, bool> = 0> |
| 36 | + auto& operator[](int i) __device__ { return data[i]; } |
| 37 | + template<bool gpu = GPU, std::enable_if_t<gpu, bool> = 0> |
| 38 | + auto const& operator[](int i) const __device__ { return data[i]; } |
| 39 | + |
| 40 | + container_t<Float> data; |
| 41 | +}; |
| 42 | + |
| 43 | +template <typename T> |
| 44 | +using GPUClass = typename ::DevClass<T>::gpu_t; |
| 45 | + |
| 46 | +template <typename T> |
| 47 | +__global__ void vectoradd(GPUClass<T>* a, GPUClass<T> const* b, GPUClass<T> const* c) { |
| 48 | + int i = kul::gpu::idx(); |
| 49 | + (*a)[i] = (*b)[i] + (*c)[i]; |
| 50 | +} |
| 51 | + |
| 52 | +template<typename Float> |
| 53 | +size_t test(){ |
| 54 | + std::vector<Float> hostB(NUM), hostC(NUM); |
| 55 | + for (size_t i = 0; i < NUM; i++) hostB[i] = i; |
| 56 | + for (size_t i = 0; i < NUM; i++) hostC[i] = i * 100.0f; |
| 57 | + DevClass<Float> devA(NUM), devB(hostB), devC(hostC); |
| 58 | + kul::gpu::Launcher{WIDTH, HEIGHT, THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y}( |
| 59 | + vectoradd<Float>, devA(), devB(), devC()); |
| 60 | + auto hostA = devA.data(); |
| 61 | + for (size_t i = 0; i < NUM; i++) |
| 62 | + if (hostA[i] != (hostB[i] + hostC[i])) return 1; |
| 63 | + return 0; |
| 64 | +} |
| 65 | + |
| 66 | +int main() { |
| 67 | + kul::gpu::prinfo(); |
| 68 | + return test<float>() + test<double>(); |
| 69 | +} |
0 commit comments