Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ IndentWidth: 4
UseTab: Never
ColumnLimit: 100
IndentPPDirectives: AfterHash
BreakConstructorInitializers: AfterColon
ConstructorInitializerIndentWidth: 4
IncludeBlocks: Regroup
BraceWrapping:
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
build
build
.vscode
.cache
13 changes: 3 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,18 @@ message("-- Found Python ${Python3_EXECUTABLE}")

include(cmake/unicode_data.cmake)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

add_executable(namesgen tools/names.cpp)

find_package(Threads REQUIRED)
find_package(TBB CONFIG REQUIRED)
find_package(range-v3 CONFIG REQUIRED)
find_package(fmt CONFIG REQUIRED)
find_package(pugixml CONFIG REQUIRED)

target_link_libraries(namesgen PRIVATE
Threads::Threads
pugixml
fmt::fmt
TBB::tbb
range-v3-concepts
)

SET(HEADERS_DIR ${PROJECT_BINARY_DIR}/include/cedilla/)
Expand All @@ -37,9 +33,6 @@ add_executable(namesreversegen

target_link_libraries(namesreversegen PRIVATE
pugixml
fmt::fmt
TBB::tbb
range-v3-concepts
Threads::Threads
)
target_compile_options(namesreversegen PRIVATE "-fopenmp-simd" "-march=native")
Expand Down
17 changes: 9 additions & 8 deletions src/cedilla/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <cstdint>
#include <algorithm>
#include <string_view>
#include <cstdint>

namespace uni::detail {

Expand Down Expand Up @@ -72,7 +73,7 @@ struct compact_range {
std::uint32_t _data[N];
constexpr T value(char32_t cp, T default_value) const {
const auto end = std::end(_data);
auto it = detail::upper_bound(std::begin(_data), end, cp, [](char32_t local_cp, uint32_t v) {
auto it = detail::upper_bound(std::begin(_data), end, cp, [](char32_t local_cp, std::uint32_t v) {
char32_t c = (v >> 8);
return local_cp < c;
});
Expand All @@ -91,7 +92,7 @@ struct compact_list {
std::uint32_t _data[N];
constexpr T value(char32_t cp, T default_value) const {
const auto end = std::end(_data);
auto it = detail::lower_bound(std::begin(_data), end, cp, [](uint32_t v, char32_t local_cp) {
auto it = detail::lower_bound(std::begin(_data), end, cp, [](std::uint32_t v, char32_t local_cp) {
char32_t c = (v >> 8);
return c < local_cp;
});
Expand Down Expand Up @@ -119,9 +120,9 @@ using array_t = typename array<T, N>::type;



template<std::size_t r1_s, std::size_t r2_s, int16_t r2_t_f, int16_t r2_t_b, std::size_t r3_s,
std::size_t r4_s, int16_t r4_t_f, int16_t r4_t_b, std::size_t r5_s, int16_t r5_t_f,
int16_t r5_t_b, std::size_t r6_s>
template<std::size_t r1_s, std::size_t r2_s, std::int16_t r2_t_f, std::int16_t r2_t_b, std::size_t r3_s,
std::size_t r4_s, std::int16_t r4_t_f, std::int16_t r4_t_b, std::size_t r5_s, std::int16_t r5_t_f,
std::int16_t r5_t_b, std::size_t r6_s>
struct bool_trie {

// not tries, just bitmaps for all code points 0..0x7FF (UTF-8 1- and 2-byte sequences)
Expand Down Expand Up @@ -206,7 +207,7 @@ struct range_array {
std::uint32_t _data[N];
constexpr bool lookup(char32_t cp) const {
const auto end = std::end(_data);
auto it = detail::upper_bound(std::begin(_data), end, cp, [](char32_t local_cp, uint32_t v) {
auto it = detail::upper_bound(std::begin(_data), end, cp, [](char32_t local_cp, std::uint32_t v) {
char32_t c = (v >> 8);
return local_cp < c;
});
Expand Down Expand Up @@ -269,7 +270,7 @@ struct pair
template <typename A, typename B>
pair(A, B) -> pair<A, B>;

struct string_with_idx { const char* name; uint32_t value; };
struct string_with_idx { const char* name; std::uint32_t value; };


} // namespace uni::detail
Expand All @@ -292,7 +293,7 @@ constexpr bool numeric_value::is_valid() const {
return _d != 0;
}

constexpr numeric_value::numeric_value(long long n, int16_t d) : _n(n), _d(d) {}
constexpr numeric_value::numeric_value(long long n, std::int16_t d) : _n(n), _d(d) {}


} // namespace uni
5 changes: 3 additions & 2 deletions src/cedilla/synopsys.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once
#include <string_view>
#include <cstdint>

#ifndef CTRE_UNICODE_SYNOPSYS_WAS_INCLUDED
namespace uni
Expand Down Expand Up @@ -48,10 +49,10 @@ namespace uni

protected:
constexpr numeric_value() = default;
constexpr numeric_value(long long n, int16_t d);
constexpr numeric_value(long long n, std::int16_t d);

long long _n = 0;
int16_t _d = 0;
std::int16_t _d = 0;
friend constexpr numeric_value cp_numeric_value(char32_t cp);
};

Expand Down
4 changes: 2 additions & 2 deletions src/cedilla/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ constexpr version cp_age(char32_t cp) {

constexpr block cp_block(char32_t cp) {
const auto end = std::end(detail::tables::block_data._data);
auto it = detail::upper_bound(std::begin(detail::tables::block_data._data), end, cp, [](char32_t cp_, uint32_t v) {
auto it = detail::upper_bound(std::begin(detail::tables::block_data._data), end, cp, [](char32_t cp_, std::uint32_t v) {
char32_t c = (v >> 8);
return cp_ < c;
});
Expand Down Expand Up @@ -263,7 +263,7 @@ constexpr numeric_value cp_numeric_value(char32_t cp) {
}())) {
return {};
}
int16_t d = 1;
std::int16_t d = 1;
detail::get_numeric_value(cp, detail::tables::numeric_data_d, d);
return numeric_value(res, d);
}
Expand Down
60 changes: 31 additions & 29 deletions src/name_to_cp.hpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
#include <string_view>
#include <tuple>
#include <charconv>
#include <cstdint>

namespace uni {
namespace details {
struct node {
char32_t value = 0xFFFFFF;
uint32_t children_offset = 0;
std::uint32_t children_offset = 0;
bool has_sibling = false;
uint32_t size = 0;
std::uint32_t size = 0;
std::string_view name;


Expand All @@ -18,49 +20,49 @@ namespace uni {
return children_offset != 0;
}
};
constexpr node read_node(uint32_t offset) {
constexpr node read_node(std::uint32_t offset) {
using namespace uni::details;
const uint32_t origin = offset;
node n;

uint8_t name = index[offset++];
std::uint8_t name = index[offset++];
if(offset + 6 >= sizeof(index))
return n;

const bool long_name = name & 0x40;
const bool has_value = name & 0x80;
name &= ~0xC0;
name = std::uint8_t(name & ~0xC0);
if(long_name) {
uint32_t name_offset = (index[offset++] << 8u);
std::uint32_t name_offset = (index[offset++] << 8u);
name_offset |= index[offset++];
n.name = std::string_view(dict + name_offset, name);
}
else {
n.name = std::string_view(dict + name, 1);
}
if(has_value) {
uint8_t h = index[offset++];
uint8_t m = index[offset++];
uint8_t l = index[offset++];
std::uint8_t h = index[offset++];
std::uint8_t m = index[offset++];
std::uint8_t l = index[offset++];
n.value = uint32_t((h << 16u) | (m << 8u) | l) >> 3u;

bool has_children = l & 0x02;
n.has_sibling = l & 0x01;

if(has_children) {
n.children_offset = uint32_t(index[offset++] << 16u);
n.children_offset |= uint32_t(index[offset++] << 8u);
n.children_offset = std::uint32_t(index[offset++] << 16u);
n.children_offset |= std::uint32_t(index[offset++] << 8u);
n.children_offset |= index[offset++];
}
}
else {
uint8_t h = index[offset++];
std::uint8_t h = index[offset++];
n.has_sibling = h & 0x80;
bool has_children = h & 0x40;
h &= ~0xC0;
h = std::uint8_t(name & ~0xC0);
if(has_children) {
n.children_offset = (h << 16u);
n.children_offset |= (uint32_t(index[offset++]) << 8u);
n.children_offset |= (std::uint32_t(index[offset++]) << 8u);
n.children_offset |= index[offset++];
}
}
Expand All @@ -69,7 +71,7 @@ namespace uni {
}


constexpr int compare(std::string_view str, std::string_view needle, uint32_t start) {
constexpr int compare(std::string_view str, std::string_view needle, std::uint32_t start) {
std::size_t str_i = start;
std::size_t needle_i = 0;
if(needle.size() == 0)
Expand Down Expand Up @@ -101,14 +103,14 @@ namespace uni {
return -1;
}

constexpr std::tuple<node, bool, uint32_t>
compare_node(uint32_t offset, std::string_view name, uint32_t start = 0) {
constexpr std::tuple<node, bool, std::uint32_t>
compare_node(std::uint32_t offset, std::string_view name, std::uint32_t start = 0) {
auto n = details::read_node(offset);
auto cmp = details::compare(name, n.name, start);
if(cmp == -1) {
return {n, false, 0};
}
start = uint32_t(cmp);
start = std::uint32_t(cmp);
if(name.size() == start)
return {n, true, n.value};
if(n.has_children()) {
Expand Down Expand Up @@ -159,8 +161,8 @@ namespace uni {

struct generated_name_data {
std::string_view prefix;
uint32_t start;
uint32_t end;
std::uint32_t start;
std::uint32_t end;
};

constexpr const generated_name_data generated_name_data_table[] = {
Expand All @@ -183,7 +185,7 @@ namespace uni {
return str.size() >= needle.size() && str.compare(0, needle.size(), needle) == 0;
}

constexpr uint32_t find_syllable(std::string_view str, int & pos, int count, int column) {
constexpr std::uint32_t find_syllable(std::string_view str, int & pos, int count, int column) {
int len = -1;
for (int i = 0; i < count; i++) {
std::string_view s(hangul_syllables[i][column]);
Expand All @@ -196,18 +198,18 @@ namespace uni {
}
if (len == -1)
len = 0;
return uint32_t(len);
return std::uint32_t(len);
}

constexpr const char32_t SBase = 0xAC00;
constexpr const char32_t LBase = 0x1100;
constexpr const char32_t VBase = 0x1161;
constexpr const char32_t TBase = 0x11A7;
constexpr const uint32_t LCount = 19;
constexpr const uint32_t VCount = 21;
constexpr const uint32_t TCount = 28;
constexpr const uint32_t NCount = (VCount * TCount);
constexpr const uint32_t SCount = (LCount * NCount);
constexpr const std::uint32_t LCount = 19;
constexpr const std::uint32_t VCount = 21;
constexpr const std::uint32_t TCount = 28;
constexpr const std::uint32_t NCount = (VCount * TCount);
constexpr const std::uint32_t SCount = (LCount * NCount);
}


Expand All @@ -230,7 +232,7 @@ namespace uni {
if (starts_with(name, item.prefix)) {
auto gn = name;
gn.remove_prefix(item.prefix.size());
uint32_t v = 0;
std::uint32_t v = 0;
const auto end = gn.data() + gn.size();
auto [p, ec] = std::from_chars(gn.data(), end , v, 16);
if(ec != std::errc() || p != end || v < item.start || v > item.end)
Expand All @@ -239,7 +241,7 @@ namespace uni {
}
}

uint32_t offset = 0;
std::uint32_t offset = 0;
for(;;) {
auto [n, res, value] = details::compare_node(offset, name);
if(!n.is_valid())
Expand Down
2 changes: 1 addition & 1 deletion tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ endmacro()
create_test(tst_prop_script prop_script.cpp)

create_test(tst_name tst_names.cpp)
target_link_libraries(tst_name fmt::fmt)
target_link_libraries(tst_name)
10 changes: 6 additions & 4 deletions tests/tst_names.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
#include <cedilla/name_to_cp.hpp>
#include <cedilla/cp_to_name.hpp>
#include <iostream>
#include <fmt/format.h>
#include <set>
#include "common.h"
#include <catch2/catch.hpp>
Expand Down Expand Up @@ -36,9 +35,12 @@ TEST_CASE("Verify that all code point have the same name as in the DB") {
continue;
const auto & name = it->second.name;
const auto res = uni::cp_name(c).to_string();
fmt::print("{:0x} : expected {} found {}\n", uint32_t(c)
, name, res );
// We do not use std::print here because these tests are compiled in C++17
// to verify compatibility, unlike the rest of the project.
std::cout << std::hex << std::uint32_t(c)
<< " : expected " << name
<< " found " << res << '\n';
CHECK(res == name);
}
}
}
}
Loading