Skip to content

Commit 92e4020

Browse files
committed
Add no-iostreams version
1 parent 4c0e74c commit 92e4020

File tree

1 file changed

+258
-0
lines changed

1 file changed

+258
-0
lines changed
Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
#include <algorithm>
2+
#include <array>
3+
#include <cstdint>
4+
#include <limits>
5+
#include <memory>
6+
#include <optional>
7+
#include <cstdio>
8+
9+
// This is a simple conway's game-of-life implementation
10+
// that is constexpr friendly and can work as a benchmark
11+
// for parallel computation models in C++
12+
//
13+
// Notes I learned along the way while learning AdaptiveCpp
14+
//
15+
// AMD GPU Install notes:
16+
// * AMD focuses on LTS ubuntu releases, if you have a different release,
17+
// expect a little pain
18+
// * I had good luck installing the AMDGPU Installer option here:
19+
// https://rocm.docs.amd.com/projects/install-on-linux/en/latest/tutorial/quick-start.html#amdgpu-ubuntu
20+
// * The rocm-gdb package would not install on my OS because of some outdated
21+
// dependencies
22+
// * The amdgpu-install tool will set up the apt repositories that you need
23+
// * If your OS is fully supported, just install the copy level package
24+
// * Honestly, I just kept installing random ROCm packages until I got things
25+
// working,
26+
// which was I think everything except for the gdb package that I could not
27+
// install
28+
//
29+
// After You've Installed ROCm
30+
// * add yourself to the render group
31+
// * consider rebooting probably
32+
// * run `rocminfo` and make sure it sees your GPUs
33+
//
34+
// Other GPUs:
35+
// * I have no input here
36+
//
37+
// Use the "automatic installation script" to install llvm >= 14
38+
// * https://apt.llvm.org/
39+
// * You probably want to install "all"
40+
// ```sh
41+
// wget https://apt.llvm.org/llvm.sh
42+
// chmod +x llvm.sh
43+
// sudo ./llvm.sh <version number> all
44+
// ```
45+
//
46+
// Now Build And Install AdaptiveCpp
47+
// * https://github.com/AdaptiveCpp/AdaptiveCpp/blob/develop/doc/installing.md#a-standard-installation
48+
// * Run `acpp-info` and make sure you get output similar to what `rocminfo`
49+
// gave you
50+
//
51+
// Install nvtop to monitor GPU usage and make sure this is doing what you want.
52+
//
53+
// To Compare with GCC
54+
// * install libttb-dev
55+
//
56+
// Theoretically you are ready to go now?!
57+
//
58+
//
59+
// To compile with all optimizations and parallel std lib support enabled:
60+
//
61+
// ```sh
62+
// # AdaptiveCpp
63+
// acpp -std=c++23 ./game_of_life.cpp -O3 -march=native --acpp-stdpar
64+
//
65+
// # gcc/clang. If you don't have ttb installed/linked it falls back to single
66+
// threaded silently g++ -std=c++23 ./game_of_life.cpp -O3 -march=native -lttb
67+
// clang++ -std=c++23 ./game_of_life.cpp -O3 -march=native -lttb
68+
//
69+
// # Depending on clang version you might need to add -fexperimental-library
70+
// ```
71+
//
72+
// Run, watch nvtop, htop, run with /usr/bin/time to see total CPU utilization,
73+
// etc and see how it scales on your platform
74+
75+
// Handy modulo operator that wraps around automatically
76+
[[nodiscard]] constexpr auto floor_modulo(auto dividend, auto divisor) {
77+
return ((dividend % divisor) + divisor) % divisor;
78+
}
79+
80+
// This is probably unnecessary, but the min_int
81+
// utilities exist to make the `Point` type as compact as possible
82+
// so that we only use int16 if that's all we need, for example
83+
template <std::size_t value> auto min_int() {
84+
if constexpr (value <= std::numeric_limits<std::int8_t>::max()) {
85+
return std::int8_t{};
86+
} else if constexpr (value <= std::numeric_limits<std::int16_t>::max()) {
87+
return std::int16_t{};
88+
} else if constexpr (value <= std::numeric_limits<std::int32_t>::max()) {
89+
return std::int32_t{};
90+
} else {
91+
return std::int64_t{};
92+
}
93+
}
94+
95+
template <std::size_t value> using min_int_t = decltype(min_int<value>());
96+
97+
// templated on size mostly to give the compiler extra hints
98+
// about the code, so it knows what it can unroll, etc.
99+
template <std::size_t Width, std::size_t Height> struct GameBoard {
100+
// These are the properly sized things necessary to hold coordinates
101+
// that work with this particular size of board
102+
using x_index_t = min_int_t<Width>;
103+
using y_index_t = min_int_t<Height>;
104+
105+
static constexpr x_index_t width = Width;
106+
static constexpr y_index_t height = Height;
107+
108+
std::array<bool, Width * Height> data;
109+
110+
struct Point {
111+
x_index_t x;
112+
y_index_t y;
113+
[[nodiscard]] constexpr Point operator+(Point rhs) const {
114+
return Point{static_cast<x_index_t>(x + rhs.x),
115+
static_cast<y_index_t>(y + rhs.y)};
116+
}
117+
};
118+
119+
// The 8 relative positions for neighbors for a given point
120+
constexpr static std::array<Point, 8> neighbors{
121+
Point{-1, -1}, Point{0, -1}, Point{1, -1}, Point{-1, 0},
122+
Point{1, 0}, Point{-1, 1}, Point{0, 1}, Point{1, 1}};
123+
124+
// Takes the input point, wraps it vertically/horizontally and takes
125+
// the new location and maps that to the linear address of the point
126+
// in the underlying array
127+
[[nodiscard]] constexpr static std::size_t index(Point p) {
128+
return static_cast<std::size_t>(floor_modulo(p.y, height) * width +
129+
floor_modulo(p.x, width));
130+
}
131+
132+
[[nodiscard]] constexpr bool operator[](Point p) const noexcept {
133+
return data[index(p)];
134+
}
135+
136+
constexpr void set(Point p) noexcept { data[index(p)] = true; }
137+
138+
[[nodiscard]] constexpr std::size_t count_neighbors(Point p) const {
139+
return static_cast<std::size_t>(
140+
std::count_if(neighbors.begin(), neighbors.end(),
141+
[&](auto offset) { return (*this)[p + offset]; }));
142+
}
143+
144+
// Pre-compute all of the Point coordinates that exist in this particular
145+
// gameboard. We use this later to iterate over every location in the
146+
// gameboard.
147+
[[nodiscard]] static auto make_indexes() {
148+
auto result = std::make_unique<std::array<Point, Width * Height>>();
149+
150+
std::size_t output_index = 0;
151+
152+
for (y_index_t y = 0; y < height; ++y) {
153+
for (x_index_t x = 0; x < width; ++x) {
154+
(*result)[output_index] = Point{x, y};
155+
++output_index;
156+
}
157+
}
158+
return result;
159+
};
160+
161+
// https://en.wikipedia.org/wiki/Conway's_Game_of_Life#Examples_of_patterns
162+
163+
// Add a glider at a given location on the game board
164+
constexpr void add_glider(Point p) {
165+
set(p);
166+
set(p + Point{1, 1});
167+
set(p + Point{2, 1});
168+
set(p + Point{0, 2});
169+
set(p + Point{1, 2});
170+
}
171+
};
172+
173+
template <typename BoardType>
174+
constexpr void iterate_board(const BoardType &input, BoardType &output,
175+
auto &indices) {
176+
177+
const auto rules = [&](const auto &index) {
178+
const auto neighbor_count = input.count_neighbors(index);
179+
const auto is_alive = input[index];
180+
181+
if (is_alive) {
182+
if (neighbor_count < 2) {
183+
return false;
184+
} else if (neighbor_count <= 3) {
185+
return true;
186+
} else {
187+
return false;
188+
}
189+
} else {
190+
if (neighbor_count == 3) {
191+
return true;
192+
} else {
193+
return false;
194+
}
195+
}
196+
197+
return true;
198+
};
199+
200+
std::transform(indices.begin(), indices.end(), output.data.begin(), rules);
201+
}
202+
203+
204+
template <typename BoardType> auto print_board(const BoardType &board) {
205+
for (int y = 0; y < board.height; ++y) {
206+
for (int x = 0; x < board.width; ++x) {
207+
if (board[typename BoardType::Point(x, y)]) {
208+
putchar('*');
209+
} else {
210+
putchar(' ');
211+
}
212+
}
213+
putchar('\n');
214+
}
215+
}
216+
217+
template <std::size_t Width, std::size_t Height, std::size_t Iterations>
218+
void run_board() {
219+
using board_type = GameBoard<Width, Height>;
220+
221+
// I would consider putting these on the stack, but the GPU engine
222+
// requires pointers that it knows how to work with. With AdaptiveCpp
223+
// it swaps out malloc and owns these pointers in a way that can be used
224+
// with the GPU automagically
225+
226+
auto board1 = std::make_unique<board_type>();
227+
board1->add_glider(typename board_type::Point(1, 3));
228+
board1->add_glider(typename board_type::Point(10, 1));
229+
auto board2 = std::make_unique<board_type>();
230+
231+
const auto indices = board_type::make_indexes();
232+
233+
{
234+
for (int i = 0; i < Iterations; ++i) {
235+
// just swapping buffers back and forth
236+
iterate_board(*board1, *board2, *indices);
237+
std::swap(board1, board2);
238+
}
239+
}
240+
241+
// this exists solely to make sure the compiler doesn't optimize out the
242+
// actual work
243+
if ((*board1)[typename board_type::Point(0, 0)]) {
244+
puts("0,0 is Set!");
245+
} else {
246+
puts("0,0 is Not Set!");
247+
}
248+
}
249+
250+
int main() {
251+
run_board<10, 10, 5'000'000>();
252+
run_board<100, 10, 500'000>();
253+
run_board<100, 100, 50'000>();
254+
run_board<100, 1000, 5'000>();
255+
run_board<1000, 1000, 500>();
256+
run_board<10000, 1000, 50>();
257+
run_board<10000, 10000, 5>();
258+
}

0 commit comments

Comments
 (0)