Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update verifier API #53

Merged
merged 1 commit into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions include/ada/idna/identifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@

namespace ada::idna {

// Access the first code point of the input string.
// Verify if it is valid name code point given a Unicode code point and a
// boolean first: If first is true return the result of checking if code point
// is contained in the IdentifierStart set of code points. Otherwise return the
// result of checking if code point is contained in the IdentifierPart set of
// code points. Returns false if the input is empty or the code point is not
// valid. There is minimal Unicode error handling: the input should be valid
// UTF-8. https://urlpattern.spec.whatwg.org/#is-a-valid-name-code-point
bool valid_name_code_point(std::string_view input, bool first);
bool valid_name_code_point(char32_t code_point, bool first);

} // namespace ada::idna

Expand Down
11 changes: 3 additions & 8 deletions src/identifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,22 +62,17 @@ bool is_ascii_letter_or_digit(char c) {
(c >= '0' && c <= '9');
}

bool valid_name_code_point(std::string_view input, bool first) {
// https://urlpattern.spec.whatwg.org/#is-a-valid-name-code-point
if (input.empty()) {
return false;
}
bool valid_name_code_point(char32_t code_point, bool first) {
// https://tc39.es/ecma262/#prod-IdentifierStart
// Fast paths:
if (first &&
(input[0] == '$' || input[0] == '_' || is_ascii_letter(input[0]))) {
(code_point == '$' || code_point == '_' || is_ascii_letter(code_point))) {
return true;
}
if (!first && (input[0] == '$' || is_ascii_letter_or_digit(input[0]))) {
if (!first && (code_point == '$' || is_ascii_letter_or_digit(code_point))) {
return true;
}
// Slow path...
uint32_t code_point = get_first_code_point(input);
if (code_point == 0xffffffff) {
return false; // minimal error handling
}
Expand Down
19 changes: 17 additions & 2 deletions tests/identifier_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,24 @@
#include <iostream>

#include "ada/idna/identifier.h"
#include "idna.h"

std::u32string to_utf32(std::string_view ut8_string) {
size_t utf32_length =
ada::idna::utf32_length_from_utf8(ut8_string.data(), ut8_string.size());
std::u32string utf32(utf32_length, '\0');
size_t actual_utf32_length = ada::idna::utf8_to_utf32(
ut8_string.data(), ut8_string.size(), utf32.data());
return utf32;
}

void verify(std::string_view input, bool first, bool expected) {
if (ada::idna::valid_name_code_point(input, first) != expected) {
std::u32string first_code_point = to_utf32(input);
if (first_code_point.empty()) {
std::cerr << "bug" << input << std::endl;
exit(-1);
}
if (ada::idna::valid_name_code_point(first_code_point[0], first) !=
expected) {
std::cerr << "bug" << input << std::endl;
exit(-1);
}
Expand Down
Loading