-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathrandom.h
209 lines (173 loc) · 7.24 KB
/
random.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
// Cut-down version of MIT-licensed UTL random module Copyright (c) 2023 Dmitri Bogdanov
// https://github.com/DmitriBogdanov/UTL/blob/master/docs/module_random.md
#ifndef RANDOM_H
#define RANDOM_H
#include <array>
#include <bit>
#include <cstdint>
#include <limits>
#include <type_traits>
namespace utlrandom {
// Helper method to crush large uints to uint32_t,
// inspired by Melissa E. O'Neil's randutils https://gist.github.com/imneme/540829265469e673d045
template <class T>
requires(std::is_integral_v<T> && sizeof(T) <= 8)
[[nodiscard]] constexpr auto _crush_to_uint32(T value) -> std::uint32_t {
if constexpr (sizeof(value) <= 4) {
return std::uint32_t(value);
} else {
std::uint64_t res = value;
res *= 0xbc2ad017d719504d;
return static_cast<std::uint32_t>(res ^ (res >> 32));
}
}
template <class ResultType>
[[nodiscard]] constexpr auto _mix_seed(ResultType seed) -> ResultType {
std::uint64_t state = (static_cast<std::uint64_t>(seed) + 0x9E3779B97f4A7C15);
state = (state ^ (state >> 30)) * 0xBF58476D1CE4E5B9;
state = (state ^ (state >> 27)) * 0x94D049BB133111EB;
return static_cast<ResultType>(state ^ (state >> 31));
// some of the 16/32-bit PRNGs have bad correlation on the successive seeds, this usually
// can be alleviated by using a signle iteration of a "good" PRNG to pre-mix the seed
}
template <class T>
constexpr T _default_seed = (std::numeric_limits<T>::max() / 2) + 1;
// an "overall decent" default seed - doesn't gave too many zeroes,
// unlikely to accidentaly match with a user-defined seed
namespace generators {
// Implementation of 32-bit splitmix adopted from MurmurHash3, based on paper by Guy L. Steele,
// Doug Lea, and Christine H. Flood. 2014. "Fast splittable pseudorandom number generators"
// see http://marc-b-reynolds.github.io/shf/2017/09/27/LPRNS.html
// https://gee.cs.oswego.edu/dl/papers/oopsla14.pdf
// https://github.com/umireon/my-random-stuff/blob/e7b17f992955f4dbb02d4016682113b48b2f6ec1/xorshift/splitmix32.c
//
// Performance: Excellent
// Quality: 3/5
// State: 4 bytes
//
// One of the fastest 32-bit generators that requires only a single 'std::uint32_t' of state,
// making it the smallest state available. Some other PRNGs recommend using it for seeding their state.
// 32-bit version is somewhat lacking in terms of quality estimate data (relative to the widely used
// 64-bit version), however it still seems to be quite decent.
//
class SplitMix32 {
public:
using result_type = std::uint64_t;
private:
result_type s{};
public:
constexpr explicit SplitMix32(result_type seed = _default_seed<result_type>) noexcept { this->seed(seed); }
[[nodiscard]] static constexpr auto min() noexcept -> result_type { return 0; }
[[nodiscard]] static constexpr auto max() noexcept -> result_type { return std::numeric_limits<result_type>::max(); }
constexpr void seed(result_type seed) noexcept {
this->s = _mix_seed(seed);
// naively seeded SplitMix32 has a horrible correlation between successive seeds, we can mostly alleviate
// the issue by pre-mixing the seed with a single iteration of a "better" 64-bit algorithm
}
constexpr auto operator()() noexcept -> result_type {
result_type result = (this->s += 0x9e3779b9);
result = (result ^ (result >> 16)) * 0x21f0aaad;
result = (result ^ (result >> 15)) * 0x735a2d97;
return result ^ (result >> 15);
}
};
// Implementation of Xoshiro128++ suggested by David Blackman and Sebastiano Vigna,
// see https://prng.di.unimi.it/
// https://prng.di.unimi.it/xoshiro256plusplus.c
//
// Performance: Good
// Quality: 4/5
// State: 16 bytes
//
// Excellent choice as a general purpose 32-bit PRNG.
// Battle-tested and provides a good statistical quality at an excellent speed.
//
class Xoshiro128PP {
public:
using result_type = std::uint32_t;
private:
std::array<result_type, 4> s{};
public:
constexpr explicit Xoshiro128PP(result_type seed = _default_seed<result_type>) noexcept { this->seed(seed); }
[[nodiscard]] static constexpr auto min() noexcept -> result_type { return 0; }
// while zero-state is considered invalid, PRNG can still produce 0 as a result
[[nodiscard]] static constexpr auto max() noexcept -> result_type { return std::numeric_limits<result_type>::max(); }
constexpr void seed(result_type seed) noexcept {
SplitMix32 splitmix{seed};
this->s[0] = splitmix(); // Xoshiro family recommends using
this->s[1] = splitmix(); // splitmix to initialize its state
this->s[2] = splitmix();
this->s[3] = splitmix();
}
constexpr auto operator()() noexcept -> result_type {
const result_type result = std::rotl(this->s[0] + this->s[3], 7) + this->s[0];
const result_type t = s[1] << 9;
this->s[2] ^= this->s[0];
this->s[3] ^= this->s[1];
this->s[1] ^= this->s[2];
this->s[0] ^= this->s[3];
this->s[2] ^= t;
this->s[3] = std::rotl(this->s[3], 11);
return result;
}
};
} // namespace generators
template <class Gen>
constexpr auto generate_canonical_float(Gen& gen) noexcept(noexcept(gen())) -> float {
using generated_type = typename Gen::result_type;
constexpr generated_type prng_min = Gen::min();
constexpr generated_type prng_max = Gen::max();
static_assert(prng_min < prng_max);
constexpr generated_type prng_range = prng_max - prng_min;
constexpr generated_type type_range = std::numeric_limits<generated_type>::max();
constexpr bool prng_is_bit_uniform = (prng_range == type_range);
constexpr int exponent_bits_32 = 8;
constexpr float mantissa_hex_32 = 0x1.0p-24F; // == 2^-24, corresponds to 24 significant bits of float
// Note 1: Note hexadecimal float literals, 'p' separates hex-base from the exponent
// Note 2: Floats have 'mantissa_size + 1' significant bits due to having a sign bit
static_assert(std::numeric_limits<float>::digits == 24, "Platform not supported, 'float' is expected to be 32-bit.");
static_assert(prng_is_bit_uniform && sizeof(float) == 4 && sizeof(generated_type) == 4);
// 32-bit float, 32-bit uniform PRNG
// => multiplication algorithm tweaked for 32-bit
return (gen() >> exponent_bits_32) * mantissa_hex_32;
}
} // namespace utlrandom
#ifndef GPU_ON
#include <limits>
#include <random>
inline thread_local auto rng{std::mt19937{std::random_device{}()}};
#else
inline thread_local auto rng{utlrandom::generators::Xoshiro128PP{}};
#endif
inline auto rng_seed(auto&& seedval) { rng.seed(seedval); }
[[nodiscard]] inline auto get_rng_random_seed() -> std::int64_t {
#ifndef GPU_ON
return std::random_device{}();
#else
return utlrandom::_crush_to_uint32(std::chrono::high_resolution_clock::now().time_since_epoch().count());
#endif
}
inline auto rng_uniform() -> float {
while (true) {
#ifndef GPU_ON
// use std::generate_canonical in CPU mode, because it seems to be faster
const auto zrand = std::generate_canonical<float, std::numeric_limits<float>::digits>(rng);
#else
// std::random can't be used in device code on nvc++ (long double not supported)
// so use custom generator instead
const auto zrand = utlrandom::generate_canonical_float(rng);
#endif
if (zrand != 1.) {
return zrand;
}
}
}
inline auto rng_uniform_pos() -> float {
while (true) {
const auto zrand = rng_uniform();
if (zrand > 0) {
return zrand;
}
}
}
#endif // RANDOM_H