diff --git a/include/ada/idna/identifier.h b/include/ada/idna/identifier.h index 20428a7..1e5ab04 100644 --- a/include/ada/idna/identifier.h +++ b/include/ada/idna/identifier.h @@ -6,7 +6,6 @@ namespace ada::idna { -// Access the first code point of the input string. // Verify if it is valid name code point given a Unicode code point and a // boolean first: If first is true return the result of checking if code point // is contained in the IdentifierStart set of code points. Otherwise return the @@ -14,7 +13,7 @@ namespace ada::idna { // code points. Returns false if the input is empty or the code point is not // valid. There is minimal Unicode error handling: the input should be valid // UTF-8. https://urlpattern.spec.whatwg.org/#is-a-valid-name-code-point -bool valid_name_code_point(std::string_view input, bool first); +bool valid_name_code_point(char32_t code_point, bool first); } // namespace ada::idna diff --git a/src/identifier.cpp b/src/identifier.cpp index f9e9b74..1e591eb 100644 --- a/src/identifier.cpp +++ b/src/identifier.cpp @@ -62,22 +62,17 @@ bool is_ascii_letter_or_digit(char c) { (c >= '0' && c <= '9'); } -bool valid_name_code_point(std::string_view input, bool first) { - // https://urlpattern.spec.whatwg.org/#is-a-valid-name-code-point - if (input.empty()) { - return false; - } +bool valid_name_code_point(char32_t code_point, bool first) { // https://tc39.es/ecma262/#prod-IdentifierStart // Fast paths: if (first && - (input[0] == '$' || input[0] == '_' || is_ascii_letter(input[0]))) { + (code_point == '$' || code_point == '_' || is_ascii_letter(code_point))) { return true; } - if (!first && (input[0] == '$' || is_ascii_letter_or_digit(input[0]))) { + if (!first && (code_point == '$' || is_ascii_letter_or_digit(code_point))) { return true; } // Slow path... - uint32_t code_point = get_first_code_point(input); if (code_point == 0xffffffff) { return false; // minimal error handling } diff --git a/tests/identifier_tests.cpp b/tests/identifier_tests.cpp index d526986..23c5dcb 100644 --- a/tests/identifier_tests.cpp +++ b/tests/identifier_tests.cpp @@ -1,9 +1,24 @@ #include -#include "ada/idna/identifier.h" +#include "idna.h" + +std::u32string to_utf32(std::string_view ut8_string) { + size_t utf32_length = + ada::idna::utf32_length_from_utf8(ut8_string.data(), ut8_string.size()); + std::u32string utf32(utf32_length, '\0'); + size_t actual_utf32_length = ada::idna::utf8_to_utf32( + ut8_string.data(), ut8_string.size(), utf32.data()); + return utf32; +} void verify(std::string_view input, bool first, bool expected) { - if (ada::idna::valid_name_code_point(input, first) != expected) { + std::u32string first_code_point = to_utf32(input); + if (first_code_point.empty()) { + std::cerr << "bug" << input << std::endl; + exit(-1); + } + if (ada::idna::valid_name_code_point(first_code_point[0], first) != + expected) { std::cerr << "bug" << input << std::endl; exit(-1); }