|
| 1 | +#ifndef CP_ALGO_MATH_SIEVE_HPP |
| 2 | +#define CP_ALGO_MATH_SIEVE_HPP |
| 3 | +#include "../structures/bit_array.hpp" |
| 4 | +#include "../structures/bit_array_util.hpp" |
| 5 | +#include "../util/bit.hpp" |
| 6 | +#include "../util/checkpoint.hpp" |
| 7 | +#include <cstdint> |
| 8 | +#include <cstddef> |
| 9 | +#include <vector> |
| 10 | +#include <span> |
| 11 | +#include <algorithm> |
| 12 | +#include <cassert> |
| 13 | + |
| 14 | +CP_ALGO_BIT_PRAGMA_PUSH |
| 15 | +namespace cp_algo::math { |
| 16 | + using cp_algo::structures::dynamic_bit_array; |
| 17 | + |
| 18 | + constexpr size_t base_threshold = 1 << 20; |
| 19 | + |
| 20 | + const auto base_prime_bits = []() { |
| 21 | + dynamic_bit_array prime(base_threshold); |
| 22 | + prime.set_all(0xAAAAAAAAAAAAAAAA); // set all odd numbers |
| 23 | + prime.reset(1); |
| 24 | + prime.set(2); |
| 25 | + for(size_t i = 3; i * i < base_threshold; i += 2) { |
| 26 | + if (prime[i]) { |
| 27 | + for(size_t j = i * i; j < base_threshold; j += 2 * i) { |
| 28 | + prime.reset(j); |
| 29 | + } |
| 30 | + } |
| 31 | + } |
| 32 | + return prime; |
| 33 | + }(); |
| 34 | + |
| 35 | + const auto base_primes = []() { |
| 36 | + cp_algo::big_vector<uint32_t> primes = {2}; |
| 37 | + for(uint32_t i = 3; i < base_threshold; i += 2) { |
| 38 | + if (base_prime_bits[i]) { |
| 39 | + primes.push_back(i); |
| 40 | + } |
| 41 | + } |
| 42 | + return primes; |
| 43 | + }(); |
| 44 | + |
| 45 | + constexpr size_t max_wheel_size = 1 << 20; |
| 46 | + struct wheel_t { |
| 47 | + dynamic_bit_array mask; |
| 48 | + uint32_t product = 1; |
| 49 | + }; |
| 50 | + |
| 51 | + inline auto make_wheel(std::vector<uint32_t> primes, uint32_t product) { |
| 52 | + assert(product % 64 == 0); |
| 53 | + wheel_t wheel; |
| 54 | + wheel.product = product; |
| 55 | + wheel.mask.resize(product); |
| 56 | + wheel.mask.set_all(); |
| 57 | + for(auto p: primes) { |
| 58 | + for(size_t j = 0; j < wheel.mask.size(); j += p) { |
| 59 | + wheel.mask.reset(j); |
| 60 | + } |
| 61 | + } |
| 62 | + return wheel; |
| 63 | + } |
| 64 | + |
| 65 | + constexpr size_t sqrt_threshold = 1 << 16; |
| 66 | + const auto sqrt_primes = std::span( |
| 67 | + begin(base_primes), |
| 68 | + std::ranges::upper_bound(base_primes, sqrt_threshold) |
| 69 | + ); |
| 70 | + |
| 71 | + inline std::span<uint32_t const> medium_primes = sqrt_primes; |
| 72 | + inline std::vector<wheel_t> wheels; |
| 73 | + |
| 74 | + void gen_wheels(); |
| 75 | + |
| 76 | + inline void sieve_dense(auto &prime, uint32_t l, uint32_t r, wheel_t const& wheel) { |
| 77 | + if (l >= r) return; |
| 78 | + uint32_t wl = l / (uint32_t)dynamic_bit_array::width; |
| 79 | + uint32_t wr = r / (uint32_t)dynamic_bit_array::width + 1; |
| 80 | + uint32_t N = (uint32_t)wheel.mask.words; |
| 81 | + for(uint32_t i = wl; i < wr; i += N) { |
| 82 | + auto block = std::min(N, wr - i); |
| 83 | + for(uint32_t j = 0; j < block; j++) { |
| 84 | + prime.word(i + j) &= wheel.mask.word(j); |
| 85 | + } |
| 86 | + } |
| 87 | + } |
| 88 | + |
| 89 | + constexpr auto add210 = []() { |
| 90 | + std::array<uint8_t, 210> add; |
| 91 | + auto good = [&](int x) { |
| 92 | + return x % 2 && x % 3 && x % 5 && x % 7; |
| 93 | + }; |
| 94 | + for(int i = 0; i < 210; i++) { |
| 95 | + add[i] = 1; |
| 96 | + while (!good(i + add[i])) { |
| 97 | + add[i]++; |
| 98 | + } |
| 99 | + } |
| 100 | + return add; |
| 101 | + }(); |
| 102 | + |
| 103 | + constexpr uint8_t gap210[] = { |
| 104 | + 10, 2, 4, 2, 4, 6, 2, 6, |
| 105 | + 4, 2, 4, 6, 6, 2, 6, 4, |
| 106 | + 2, 6, 4, 6, 8, 4, 2, 4, |
| 107 | + 2, 4, 8, 6, 4, 6, 2, 4, |
| 108 | + 6, 2, 6, 6, 4, 2, 4, 6, |
| 109 | + 2, 6, 4, 2, 4, 2, 10, 2 |
| 110 | + }; |
| 111 | + |
| 112 | + constexpr auto state210 = []() { |
| 113 | + std::array<uint8_t, 210> state; |
| 114 | + int idx = 0; |
| 115 | + for(int i = 0; i < 210; i++) { |
| 116 | + if (i % 2 && i % 3 && i % 5 && i % 7) { |
| 117 | + state[i] = uint8_t(idx++); |
| 118 | + } else { |
| 119 | + state[i] = -1; |
| 120 | + } |
| 121 | + } |
| 122 | + return state; |
| 123 | + }(); |
| 124 | + |
| 125 | + template <class BitArray> |
| 126 | + inline void sieve210(BitArray &prime, uint32_t l, uint32_t r, uint32_t p, uint8_t state) { |
| 127 | + if (l >= r) return; |
| 128 | + while (l < r) { |
| 129 | + prime.reset(l); |
| 130 | + l += gap210[state] * p; |
| 131 | + state = state == 47 ? 0 : state + 1; |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + inline void gen_wheels() { |
| 136 | + static bool initialized = false; |
| 137 | + if (initialized) return; |
| 138 | + uint32_t product = 64; |
| 139 | + std::vector<uint32_t> current; |
| 140 | + for(size_t i = 1; i < size(sqrt_primes); i++) { |
| 141 | + uint32_t p = sqrt_primes[i]; |
| 142 | + if (product * p > max_wheel_size) { |
| 143 | + if (size(current) == 1) { |
| 144 | + medium_primes = sqrt_primes.subspan(i - 1); |
| 145 | + initialized = true; |
| 146 | + return; |
| 147 | + } |
| 148 | + wheels.push_back(make_wheel(current, product)); |
| 149 | + current = {p}; |
| 150 | + product = 64 * p; |
| 151 | + } else { |
| 152 | + current.push_back(p); |
| 153 | + product *= p; |
| 154 | + } |
| 155 | + } |
| 156 | + assert(false); |
| 157 | + } |
| 158 | + |
| 159 | + // Primes smaller than N |
| 160 | + inline dynamic_bit_array sieve(uint32_t N) { |
| 161 | + dynamic_bit_array prime(N); |
| 162 | + prime.set_all(0xAAAAAAAAAAAAAAAA); |
| 163 | + for(size_t i = 0; i < std::min(prime.words, base_prime_bits.words); i++) { |
| 164 | + prime.word(i) = base_prime_bits.word(i); |
| 165 | + } |
| 166 | + cp_algo::checkpoint("init"); |
| 167 | + gen_wheels(); |
| 168 | + cp_algo::checkpoint("gen wheels"); |
| 169 | + static constexpr uint32_t dense_block = 1 << 23; |
| 170 | + for(uint32_t start = base_threshold; start < N; start += dense_block) { |
| 171 | + uint32_t r = std::min(start + dense_block, N); |
| 172 | + for(auto const& wheel: wheels) { |
| 173 | + auto l = start / wheel.product * wheel.product; |
| 174 | + sieve_dense(prime, l, r, wheel); |
| 175 | + } |
| 176 | + } |
| 177 | + cp_algo::checkpoint("dense sieve"); |
| 178 | + static constexpr uint32_t sparse_block = 1 << 21; |
| 179 | + for(uint32_t start = base_threshold; start < N; start += sparse_block) { |
| 180 | + uint32_t r = std::min(start + sparse_block, N); |
| 181 | + for(auto p: medium_primes) { |
| 182 | + if(p * p >= r) break; |
| 183 | + auto k = std::max(start / p, p); |
| 184 | + if (state210[k % 210] == 0xFF) {k += add210[k % 210];} |
| 185 | + sieve210(prime, k * p, r, p, state210[k % 210]); |
| 186 | + } |
| 187 | + } |
| 188 | + cp_algo::checkpoint("sparse sieve"); |
| 189 | + return prime; |
| 190 | + } |
| 191 | +} |
| 192 | +#pragma GCC pop_options |
| 193 | +#endif // CP_ALGO_MATH_SIEVE_HPP |
0 commit comments