Skip to content

Commit a77be09

Browse files
authored
next (#19)
1 parent 8b44c8c commit a77be09

26 files changed

+1145
-161
lines changed

.github/workflows/build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ jobs:
1010
build:
1111
runs-on: ubuntu-latest
1212
steps:
13-
- uses: actions/checkout@v2
13+
- uses: actions/checkout@v4
1414

1515
- name: test
1616
run: |
1717
curl -Lo mkn https://github.com/mkn/mkn/releases/download/latest/mkn_nix
1818
chmod +x mkn
19-
KLOG=3 ./mkn clean build run -dtKOp cpu -a "-std=c++17" -g 0 test -W 9
19+
KLOG=3 ./mkn clean build run -dtKOgp cpu -a "-std=c++20" test -W 9

.sublime-project

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
{
1010
"ClangFormat":
1111
{
12-
"binary": "clang-format-15",
12+
"binary": "clang-format",
1313
"format_on_save": true,
1414
"style": "file"
1515
},

LICENSE.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Copyright (c) 2020, Philip Deegan.
1+
Copyright (c) 2024, Philip Deegan.
22
All rights reserved.
33

44
Redistribution and use in source and binary forms, with or without

inc/mkn/gpu.hpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
Copyright (c) 2020, Philip Deegan.
2+
Copyright (c) 2024, Philip Deegan.
33
All rights reserved.
44
55
Redistribution and use in source and binary forms, with or without
@@ -31,27 +31,23 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3131
#ifndef _MKN_GPU_HPP_
3232
#define _MKN_GPU_HPP_
3333

34-
#if defined(MKN_GPU_ROCM)
35-
#include "mkn/gpu/rocm.hpp"
36-
#elif defined(MKN_GPU_CUDA)
37-
#include "mkn/gpu/cuda.hpp"
38-
#elif defined(MKN_GPU_CPU)
39-
#include "mkn/gpu/cpu.hpp"
40-
#elif !defined(MKN_GPU_FN_PER_NS) || MKN_GPU_FN_PER_NS == 0
41-
#error "UNKNOWN GPU / define MKN_GPU_ROCM or MKN_GPU_CUDA"
42-
#endif
34+
#include "mkn/gpu/defines.hpp"
4335

4436
namespace mkn::gpu {
4537

4638
__device__ uint32_t idx() {
47-
#if defined(MKN_GPU_ROCM)
39+
#if MKN_GPU_ROCM
4840
return mkn::gpu::hip::idx();
49-
#elif defined(MKN_GPU_CUDA)
41+
42+
#elif MKN_GPU_CUDA
5043
return mkn::gpu::cuda::idx();
51-
#elif defined(MKN_GPU_CPU)
44+
45+
#elif MKN_GPU_CPU
5246
return mkn::gpu::cpu::idx();
47+
5348
#else
5449
#error "UNKNOWN GPU / define MKN_GPU_ROCM or MKN_GPU_CUDA"
50+
5551
#endif
5652
}
5753

inc/mkn/gpu/alloc.hpp

Lines changed: 84 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
Copyright (c) 2020, Philip Deegan.
2+
Copyright (c) 2024, Philip Deegan.
33
All rights reserved.
44
55
Redistribution and use in source and binary forms, with or without
@@ -31,9 +31,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3131
#ifndef _MKN_GPU_ALLOC_HPP_
3232
#define _MKN_GPU_ALLOC_HPP_
3333

34-
template <typename T, std::int32_t alignment = 32>
35-
class ManagedAllocator {
36-
using This = ManagedAllocator<T, alignment>;
34+
template <typename T, std::int32_t alignment>
35+
class MknGPUAllocator {
36+
using This = MknGPUAllocator<T, alignment>;
3737

3838
public:
3939
using pointer = T*;
@@ -44,7 +44,7 @@ class ManagedAllocator {
4444

4545
template <typename U>
4646
struct rebind {
47-
using other = ManagedAllocator<U, alignment>;
47+
using other = MknGPUAllocator<U, alignment>;
4848
};
4949

5050
T* allocate(std::size_t const n) const {
@@ -70,20 +70,91 @@ class ManagedAllocator {
7070
}
7171
};
7272

73+
template <typename T, std::int32_t alignment = 32>
74+
class NoConstructAllocator : public MknGPUAllocator<T, alignment> {
75+
public:
76+
template <typename U>
77+
struct rebind {
78+
using other = NoConstructAllocator<U, alignment>;
79+
};
80+
81+
template <typename U, typename... Args>
82+
void construct(U* /*ptr*/, Args&&... /*args*/) {} // nothing
83+
template <typename U>
84+
void construct(U* /*ptr*/) noexcept(std::is_nothrow_default_constructible<U>::value) {}
85+
};
86+
87+
template <typename T, std::int32_t align>
88+
std::vector<T, MknGPUAllocator<T, align>>& as_super(
89+
std::vector<T, NoConstructAllocator<T, align>>& v) {
90+
return *reinterpret_cast<std::vector<T, MknGPUAllocator<T, align>>*>(&v);
91+
}
92+
93+
template <typename T, std::int32_t alignment = 32>
94+
class ManagedAllocator : public MknGPUAllocator<T, alignment> {
95+
public:
96+
template <typename U>
97+
struct rebind {
98+
using other = ManagedAllocator<U, alignment>;
99+
};
100+
};
101+
102+
template <typename T, std::int32_t align>
103+
std::vector<T, MknGPUAllocator<T, align>>& as_super(std::vector<T, ManagedAllocator<T, align>>& v) {
104+
return *reinterpret_cast<std::vector<T, MknGPUAllocator<T, align>>*>(&v);
105+
}
106+
73107
template <typename T, typename Size>
74-
void copy(T* const dst, T const* const src, Size size) {
75-
auto dst_p = Pointer{dst};
76-
auto src_p = Pointer{src};
108+
void copy(T* dst, T* src, Size size) {
109+
assert(dst and src);
77110

78-
bool to_send = dst_p.is_device_ptr() && src_p.is_host_ptr();
79-
bool to_take = dst_p.is_host_ptr() && src_p.is_device_ptr();
111+
Pointer src_p{src};
112+
Pointer dst_p{dst};
80113

81-
if (to_send)
114+
auto to_send = [&]() { return dst_p.is_device_ptr() && src_p.is_host_ptr(); };
115+
auto to_take = [&]() { return dst_p.is_host_ptr() && src_p.is_device_ptr(); };
116+
auto on_host = [&]() { return dst_p.is_host_ptr() && src_p.is_host_ptr(); };
117+
auto on_device = [&]() { return dst_p.is_device_ptr() && src_p.is_device_ptr(); };
118+
119+
if (on_host())
120+
std::copy(src, src + size, dst);
121+
else if (on_device())
122+
copy_on_device(dst, src, size);
123+
else if (to_send())
82124
send(dst, src, size);
83-
else if (to_take)
84-
take(dst, src, size);
125+
else if (to_take())
126+
take(src, dst, size);
85127
else
86128
throw std::runtime_error("Unsupported operation (PR welcome)");
87129
}
88130

131+
template <typename T, std::int32_t align>
132+
auto& reserve(std::vector<T, NoConstructAllocator<T, align>>& v, std::size_t const& s,
133+
bool mem_copy = true) {
134+
if (s <= v.capacity()) {
135+
v.reserve(s);
136+
return v;
137+
}
138+
std::vector<T, NoConstructAllocator<T, align>> cpy{NoConstructAllocator<T, align>{}};
139+
cpy.reserve(s);
140+
cpy.resize(v.size());
141+
if (mem_copy and v.size()) copy(cpy.data(), v.data(), v.size());
142+
v = std::move(cpy);
143+
return v;
144+
}
145+
146+
template <typename T, std::int32_t align>
147+
auto& resize(std::vector<T, NoConstructAllocator<T, align>>& v, std::size_t const& s,
148+
bool mem_copy = true) {
149+
if (s <= v.capacity()) {
150+
v.resize(s);
151+
return v;
152+
}
153+
std::vector<T, NoConstructAllocator<T, align>> cpy{NoConstructAllocator<T, align>{}};
154+
cpy.resize(s);
155+
if (mem_copy and v.size()) copy(cpy.data(), v.data(), v.size());
156+
v = std::move(cpy);
157+
return v;
158+
}
159+
89160
#endif /* _MKN_GPU_ALLOC_HPP_ */

inc/mkn/gpu/asio.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
Copyright (c) 2020, Philip Deegan.
2+
Copyright (c) 2024, Philip Deegan.
33
All rights reserved.
44
55
Redistribution and use in source and binary forms, with or without

inc/mkn/gpu/cli.hpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/**
2+
Copyright (c) 2024, Philip Deegan.
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are
7+
met:
8+
9+
* Redistributions of source code must retain the above copyright
10+
notice, this list of conditions and the following disclaimer.
11+
* Redistributions in binary form must reproduce the above
12+
copyright notice, this list of conditions and the following disclaimer
13+
in the documentation and/or other materials provided with the
14+
distribution.
15+
* Neither the name of Philip Deegan nor the names of its
16+
contributors may be used to endorse or promote products derived from
17+
this software without specific prior written permission.
18+
19+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30+
*/
31+
// IWYU pragma: private, include "mkn/gpu.hpp"
32+
#ifndef _MKN_GPU_CLI_HPP_
33+
#define _MKN_GPU_CLI_HPP_
34+
35+
#include "mkn/kul/env.hpp"
36+
#include "mkn/kul/string.hpp"
37+
38+
namespace mkn::gpu {
39+
40+
template <typename Device>
41+
struct Cli {
42+
constexpr static inline char const* MKN_GPU_BX_THREADS = "MKN_GPU_BX_THREADS";
43+
44+
auto bx_threads() const {
45+
if (kul::env::EXISTS(MKN_GPU_BX_THREADS))
46+
return kul::String::INT32(kul::env::GET(MKN_GPU_BX_THREADS));
47+
return dev.maxThreadsPerBlock;
48+
}
49+
50+
Device const& dev;
51+
};
52+
53+
} /* namespace mkn::gpu */
54+
55+
#endif /*_MKN_GPU_CLI_HPP_*/

inc/mkn/gpu/cpu.hpp

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
Copyright (c) 2020, Philip Deegan.
2+
Copyright (c) 2024, Philip Deegan.
33
All rights reserved.
44
55
Redistribution and use in source and binary forms, with or without
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3838
#include "mkn/kul/assert.hpp"
3939
#include "mkn/kul/threads.hpp"
4040

41+
#include "mkn/gpu/cli.hpp"
4142
#include "mkn/gpu/def.hpp"
4243

4344
#include <cassert>
@@ -82,6 +83,15 @@ struct dim3 {
8283
std::size_t x = 1, y = 1, z = 1;
8384
};
8485

86+
void setLimitMallocHeapSize(std::size_t const& /*bytes*/) {} /*noop*/
87+
88+
void setDevice(std::size_t const& /*dev*/) {} /*noop*/
89+
90+
auto supportsCooperativeLaunch(int const /*dev*/ = 0) {
91+
int supportsCoopLaunch = 0;
92+
return supportsCoopLaunch;
93+
}
94+
8595
struct Stream {
8696
Stream() {}
8797
~Stream() {}
@@ -93,6 +103,24 @@ struct Stream {
93103
std::size_t stream = 0;
94104
};
95105

106+
struct StreamEvent {
107+
StreamEvent(Stream&) {}
108+
~StreamEvent() {}
109+
110+
auto& operator()() { return event; };
111+
auto& record() {
112+
++stage;
113+
return *this;
114+
}
115+
auto& wait() { return *this; }
116+
bool finished() const { return stage == 2; }
117+
void reset() { stage = 0; }
118+
119+
Stream stream;
120+
std::size_t event = 0;
121+
std::uint16_t stage = 0;
122+
};
123+
96124
template <typename T>
97125
struct Pointer {
98126
Pointer(T* _t) : t{_t} {}
@@ -129,7 +157,7 @@ void alloc_managed(T*& p, Size size) {
129157
MKN_GPU_ASSERT(p = reinterpret_cast<T*>(std::malloc(size * sizeof(T))));
130158
}
131159

132-
void destroy(void* p) {
160+
void inline destroy(void* p) {
133161
KLOG(TRC);
134162
std::free(p);
135163
}
@@ -146,6 +174,12 @@ void destroy_host(T*& p) {
146174
std::free(p);
147175
}
148176

177+
template <typename T, typename Size>
178+
void copy_on_device(T* dst, T const* src, Size size = 1) {
179+
KLOG(TRC);
180+
MKN_GPU_ASSERT(std::memcpy(dst, src, size * sizeof(T)));
181+
}
182+
149183
template <typename Size>
150184
void send(void* p, void* t, Size size = 1) {
151185
KLOG(TRC);
@@ -177,7 +211,7 @@ void take_async(T* p, Span& span, Stream& /*stream*/, std::size_t start) {
177211
take(p, span.data(), span.size(), start);
178212
}
179213

180-
void sync() {}
214+
void inline sync() {}
181215

182216
#include "mkn/gpu/alloc.hpp"
183217
#include "mkn/gpu/device.hpp"
@@ -186,7 +220,7 @@ namespace detail {
186220
static thread_local std::size_t idx = 0;
187221
}
188222

189-
template <typename F, typename... Args>
223+
template <bool _sync = true, bool _coop = false, typename F, typename... Args>
190224
void launch(F f, dim3 g, dim3 b, std::size_t /*ds*/, std::size_t /*stream*/, Args&&... args) {
191225
std::size_t N = (g.x * g.y * g.z) * (b.x * b.y * b.z);
192226
KLOG(TRC) << N;
@@ -252,6 +286,8 @@ static void global_gd_kernel(F& f, std::size_t s, Args... args) {
252286

253287
#include "launchers.hpp"
254288

289+
void grid_sync() {}
290+
255291
} /* namespace MKN_GPU_NS */
256292

257293
#undef MKN_GPU_ASSERT

0 commit comments

Comments
 (0)