Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Intl.Collator implementation using ICU4C #1413

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ workflows:
- test-e2e
- test-e2e-intl
- test-macos-test262
- test-linux-test262

# Default settings for Apple jobs (apple-runtime, test-apple-runtime)
apple_defaults: &apple_defaults
Expand Down Expand Up @@ -729,3 +730,30 @@ jobs:
cmake --build ./build
cmake --build ./build --target check-hermes
python3 hermes/utils/testsuite/run_testsuite.py --test-intl test262/test -b build/bin

test-linux-test262:
docker:
- image: debian:bookworm
working_directory: /root
steps:
- checkout:
path: hermes
- run:
name: Setup dependencies
command: |
apt update
apt install -y git openssh-client cmake build-essential \
libreadline-dev libicu-dev zip python3
# Check out test262 at a pinned revision to reduce flakiness
git clone https://github.com/tc39/test262
cd test262
git checkout 62626e083bd506124aac6c799464d76c2c42851b
- run:
name: Run test262 with Intl
command: |
cmake -S hermes -B build -DHERMES_ENABLE_INTL=ON -DCMAKE_CXX_FLAGS=-O2 -DCMAKE_C_FLAGS=-O2
cmake --build ./build -j 4
# Not running Hermes test until more of Intl is built out:
# toLocaleLowerCase and toLocaleUpperCase are the two main ones.
# cmake --build ./build --target check-hermes -j 4
python3 hermes/utils/testsuite/run_testsuite.py --test-intl test262/test -b build/bin
27 changes: 25 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ jobs:
cd "$HERMES_WS_DIR"
git clone https://github.com/tc39/test262
cd test262
git checkout 19da3ca0757248f7595ee09d532bb83dd438f2b5
git checkout 62626e083bd506124aac6c799464d76c2c42851b
- name: Build Hermes Compiler
run: |-
cd "$HERMES_WS_DIR"
Expand All @@ -580,10 +580,33 @@ jobs:
# Check out test262 at a pinned revision to reduce flakiness
git clone https://github.com/tc39/test262
cd test262
git checkout 19da3ca0757248f7595ee09d532bb83dd438f2b5
git checkout 62626e083bd506124aac6c799464d76c2c42851b
- name: Run Hermes tests and test262 with Intl
run: |-
cmake -S hermes -B build -GNinja -DHERMES_ENABLE_INTL=ON
cmake --build ./build
cmake --build ./build --target check-hermes
python3 hermes/utils/testsuite/run_testsuite.py --test-intl test262/test -b build/bin
test-linux-test262:
runs-on: ubuntu-20.04
steps:
- uses: actions/[email protected]
with:
path: hermes
- name: Setup dependencies
run: |-
sudo apt update
sudo apt install -y git openssh-client cmake build-essential \
libreadline-dev libicu-dev zip python3
# Check out test262 at a pinned revision to reduce flakiness
git clone https://github.com/tc39/test262
cd test262
git checkout 62626e083bd506124aac6c799464d76c2c42851b
- name: Run test262 with Intl
run: |-
cmake -S hermes -B build -DHERMES_ENABLE_INTL=ON -DCMAKE_CXX_FLAGS=-O2 -DCMAKE_C_FLAGS=-O2
cmake --build ./build -j 4
# Not running Hermes test until more of Intl is built out:
# toLocaleLowerCase and toLocaleUpperCase are the two main ones.
# cmake --build ./build --target check-hermes -j 4
python3 hermes/utils/testsuite/run_testsuite.py --test-intl test262/test -b build/bin
68 changes: 0 additions & 68 deletions include/hermes/Platform/Intl/PlatformIntlShared.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,57 +14,6 @@
namespace hermes {
namespace platform_intl {

struct LocaleMatch {
std::u16string locale;
std::map<std::u16string, std::u16string> extensions;
};

struct ResolvedLocale {
std::u16string locale;
std::u16string dataLocale;
std::unordered_map<std::u16string, std::u16string> extensions;
};

/// https://402.ecma-international.org/8.0/#sec-canonicalizelocalelist
vm::CallResult<std::vector<std::u16string>> canonicalizeLocaleList(
vm::Runtime &runtime,
const std::vector<std::u16string> &locales);

/// https://402.ecma-international.org/8.0/#sec-intl.getcanonicallocales
vm::CallResult<std::vector<std::u16string>> getCanonicalLocales(
vm::Runtime &runtime,
const std::vector<std::u16string> &locales);

/// https://402.ecma-international.org/8.0/#sec-bestavailablelocale
std::optional<std::u16string> bestAvailableLocale(
const std::vector<std::u16string> &availableLocales,
const std::u16string &locale);

/// https://402.ecma-international.org/8.0/#sec-lookupsupportedlocales
std::vector<std::u16string> lookupSupportedLocales(
const std::vector<std::u16string> &availableLocales,
const std::vector<std::u16string> &requestedLocales);

/// https://402.ecma-international.org/8.0/#sec-supportedlocales
std::vector<std::u16string> supportedLocales(
const std::vector<std::u16string> &availableLocales,
const std::vector<std::u16string> &requestedLocales);

/// https://402.ecma-international.org/8.0/#sec-getoption
vm::CallResult<std::optional<std::u16string>> getOptionString(
vm::Runtime &runtime,
const Options &options,
const std::u16string &property,
llvh::ArrayRef<std::u16string_view> values,
std::optional<std::u16string_view> fallback);

/// https://402.ecma-international.org/8.0/#sec-getoption
std::optional<bool> getOptionBool(
vm::Runtime &runtime,
const Options &options,
const std::u16string &property,
std::optional<bool> fallback);

/// https://402.ecma-international.org/8.0/#sec-todatetimeoptions
vm::CallResult<Options> toDateTimeOptions(
vm::Runtime &runtime,
Expand All @@ -75,23 +24,6 @@ vm::CallResult<Options> toDateTimeOptions(
/// https://402.ecma-international.org/8.0/#sec-case-sensitivity-and-case-mapping
std::u16string toASCIIUppercase(std::u16string_view tz);

/// https://402.ecma-international.org/8.0/#sec-defaultnumberoption
vm::CallResult<std::optional<uint8_t>> defaultNumberOption(
vm::Runtime &runtime,
const std::u16string &property,
std::optional<Option> value,
const std::uint8_t minimum,
const std::uint8_t maximum,
std::optional<uint8_t> fallback);

/// https://402.ecma-international.org/8.0/#sec-getoption
vm::CallResult<std::optional<uint8_t>> getNumberOption(
vm::Runtime &runtime,
const Options &options,
const std::u16string &property,
const std::uint8_t minimum,
const std::uint8_t maximum,
std::optional<uint8_t> fallback);
} // namespace platform_intl
} // namespace hermes

Expand Down
10 changes: 9 additions & 1 deletion lib/Platform/Intl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,15 @@ if(HERMES_ENABLE_INTL)
set_target_properties(hermesPlatformIntl PROPERTIES UNITY_BUILD false)
target_compile_options(hermesPlatformIntl PRIVATE -fobjc-arc)
else()
add_hermes_library(hermesPlatformIntl STATIC PlatformIntlICU.cpp PlatformIntlShared.cpp
add_hermes_library(hermesPlatformIntl STATIC
PlatformIntlICU.cpp
PlatformIntlShared.cpp
impl_icu/Collator.cpp
impl_icu/IntlUtils.cpp
impl_icu/LocaleConverter.cpp
impl_icu/LocaleBCP47Object.cpp
impl_icu/LocaleResolver.cpp
impl_icu/OptionHelpers.cpp
LINK_LIBS
hermesBCP47Parser
hermesPublic
Expand Down
153 changes: 97 additions & 56 deletions lib/Platform/Intl/PlatformIntlApple.mm
Original file line number Diff line number Diff line change
Expand Up @@ -271,50 +271,6 @@ ResolvedLocale resolveLocale(
return result;
}

/// https://402.ecma-international.org/8.0/#sec-lookupsupportedlocales
std::vector<std::u16string> lookupSupportedLocales(
const std::vector<std::u16string> &availableLocales,
const std::vector<std::u16string> &requestedLocales) {
// 1. Let subset be a new empty List.
std::vector<std::u16string> subset;
// 2. For each element locale of requestedLocales in List order, do
for (const std::u16string &locale : requestedLocales) {
// a. Let noExtensionsLocale be the String value that is locale with all
// Unicode locale extension sequences removed.
// We can skip this step, see the comment in lookupMatcher.
// b. Let availableLocale be BestAvailableLocale(availableLocales,
// noExtensionsLocale).
std::optional<std::u16string> availableLocale =
bestAvailableLocale(availableLocales, locale);
// c. If availableLocale is not undefined, append locale to the end of
// subset.
if (availableLocale) {
subset.push_back(locale);
}
}
// 3. Return subset.
return subset;
}

/// https://402.ecma-international.org/8.0/#sec-supportedlocales
std::vector<std::u16string> supportedLocales(
const std::vector<std::u16string> &availableLocales,
const std::vector<std::u16string> &requestedLocales) {
// 1. Set options to ? CoerceOptionsToObject(options).
// 2. Let matcher be ? GetOption(options, "localeMatcher", "string", «
// "lookup", "best fit" », "best fit").
// 3. If matcher is "best fit", then
// a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
// requestedLocales).
// 4. Else,
// a. Let supportedLocales be LookupSupportedLocales(availableLocales,
// requestedLocales).
// 5. Return CreateArrayFromList(supportedLocales).

// We do not implement a BestFitMatcher, so we can just use LookupMatcher.
return lookupSupportedLocales(availableLocales, requestedLocales);
}

/// https://402.ecma-international.org/8.0/#sec-canonicalizelocalelist
vm::CallResult<std::vector<std::u16string>> canonicalizeLocaleList(
vm::Runtime &runtime,
Expand Down Expand Up @@ -482,6 +438,60 @@ ResolvedLocale resolveLocale(
return std::optional<uint8_t>(defaultNumber.getValue());
}

/// https://402.ecma-international.org/8.0/#sec-lookupsupportedlocales
std::vector<std::u16string> lookupSupportedLocales(
const std::vector<std::u16string> &availableLocales,
const std::vector<std::u16string> &requestedLocales) {
// 1. Let subset be a new empty List.
std::vector<std::u16string> subset;
// 2. For each element locale of requestedLocales in List order, do
for (const std::u16string &locale : requestedLocales) {
// a. Let noExtensionsLocale be the String value that is locale with all
// Unicode locale extension sequences removed.
// We can skip this step, see the comment in lookupMatcher.
// b. Let availableLocale be BestAvailableLocale(availableLocales,
// noExtensionsLocale).
std::optional<std::u16string> availableLocale =
bestAvailableLocale(availableLocales, locale);
// c. If availableLocale is not undefined, append locale to the end of
// subset.
if (availableLocale) {
subset.push_back(locale);
}
}
// 3. Return subset.
return subset;
}

/// https://402.ecma-international.org/8.0/#sec-supportedlocales
vm::CallResult<std::vector<std::u16string>> supportedLocales(
vm::Runtime &runtime,
const std::vector<std::u16string> &availableLocales,
const std::vector<std::u16string> &requestedLocales,
const Options &options) {
// 1. Set options to ? CoerceOptionsToObject(options).
// 2. Let matcher be ? GetOption(options, "localeMatcher", "string", «
// "lookup", "best fit" », "best fit").
auto matcherRes = getOptionString(
runtime,
options,
u"localeMatcher",
{u"lookup", u"best fit"},
u"best fit");
if (LLVM_UNLIKELY(matcherRes == vm::ExecutionStatus::EXCEPTION))
return vm::ExecutionStatus::EXCEPTION;
// 3. If matcher is "best fit", then
// a. Let supportedLocales be BestFitSupportedLocales(availableLocales,
// requestedLocales).
// 4. Else,
// a. Let supportedLocales be LookupSupportedLocales(availableLocales,
// requestedLocales).
// 5. Return CreateArrayFromList(supportedLocales).

// We do not implement a BestFitMatcher, so we can just use LookupMatcher.
return lookupSupportedLocales(availableLocales, requestedLocales);
}

// Implementation of
// https://402.ecma-international.org/8.0/#sec-todatetimeoptions
vm::CallResult<Options> toDateTimeOptions(
Expand Down Expand Up @@ -985,7 +995,8 @@ uint8_t getCurrencyDigits(std::u16string_view code) {
if (LLVM_UNLIKELY(requestedLocalesRes == vm::ExecutionStatus::EXCEPTION))
return vm::ExecutionStatus::EXCEPTION;
// 3. Return ? SupportedLocales(availableLocales, requestedLocales, options)
return supportedLocales(availableLocales, *requestedLocalesRes);
return supportedLocales(
runtime, availableLocales, *requestedLocalesRes, options);
}

/// https://402.ecma-international.org/8.0/#sec-initializecollator
Expand Down Expand Up @@ -1061,21 +1072,43 @@ uint8_t getCurrencyDigits(std::u16string_view code) {
opt.emplace(u"kf", *caseFirstOpt);
// 18. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]].
static constexpr std::u16string_view relevantExtensionKeys[] = {
u"co", u"kn", u"kf"};
u"co", u"kf", u"kn"};
static_assert(
isSorted(relevantExtensionKeys),
"keep relevantExtensionKeys sorted for canonical form");
// 19. Let r be ResolveLocale(%Collator%.[[AvailableLocales]],
// requestedLocales, opt,relevantExtensionKeys, localeData).
auto r = resolveLocale(
getAvailableLocales(), *requestedLocalesRes, opt, relevantExtensionKeys);
// 20. Set collator.[[Locale]] to r.[[locale]].
locale_ = std::move(r.locale);
// 21. Let collation be r.[[co]].
auto coIt = r.extensions.find(u"co");
// 22. If collation is null, let collation be "default".
// 23. Set collator.[[Collation]] to collation.
if (coIt == r.extensions.end())
// If usage is search, any specified collation option is dropped
// because in spec steps 5 - 6, when usage is search, the [[SearchLocaleData]]
// is to be used. The only way to specify to the collator that it should
// use the search collation rule is through the collation unicode extension
// in the data locale. Since only one collation unicode extension value can be
// specified in a locale, any specifiec collation option then needs to be
// dropped in favour of search.
if (usage_ == u"search") {
collation_ = u"default";
else
collation_ = std::move(coIt->second);
// If locale_ has a collation unicode extension, remove it.
auto parsed = ParsedLocaleIdentifier::parse(locale_);
if (parsed.has_value()) {
auto nodeHandle = parsed->unicodeExtensionKeywords.extract(u"co");
if (!nodeHandle.empty()) {
locale_ = parsed->canonicalize();
}
}
} else {
// 21. Let collation be r.[[co]].
auto coIt = r.extensions.find(u"co");
// 22. If collation is null, let collation be "default".
// 23. Set collator.[[Collation]] to collation.
if (coIt == r.extensions.end())
collation_ = u"default";
else
collation_ = std::move(coIt->second);
}
// 24. If relevantExtensionKeys contains "kn", then
// a. Set collator.[[Numeric]] to ! SameValue(r.[[kn]], "true").
auto knIt = r.extensions.find(u"kn");
Expand Down Expand Up @@ -1308,7 +1341,8 @@ uint8_t getCurrencyDigits(std::u16string_view code) {
auto requestedLocales = getCanonicalLocales(runtime, locales);
const std::vector<std::u16string> &availableLocales = getAvailableLocales();
// 3. Return ? (availableLocales, requestedLocales, options).
return supportedLocales(availableLocales, requestedLocales.getValue());
return supportedLocales(
runtime, availableLocales, requestedLocales.getValue(), options);
}

// Implementation of
Expand Down Expand Up @@ -1393,7 +1427,10 @@ uint8_t getCurrencyDigits(std::u16string_view code) {
// requestedLocales, opt, %DateTimeFormat%.[[RelevantExtensionKeys]],
// localeData).
static constexpr std::u16string_view relevantExtensionKeys[] = {
u"ca", u"nu", u"hc"};
u"ca", u"hc", u"nu"};
static_assert(
isSorted(relevantExtensionKeys),
"keep relevantExtensionKeys sorted for canonical form");
auto r = resolveLocale(
getAvailableLocales(), *requestedLocalesRes, opt, relevantExtensionKeys);
// 18. Set dateTimeFormat.[[Locale]] to r.[[locale]].
Expand Down Expand Up @@ -2043,7 +2080,8 @@ uint8_t getCurrencyDigits(std::u16string_view code) {
// 2. Let requestedLocales be ? CanonicalizeLocaleList(locales).
auto requestedLocales = getCanonicalLocales(runtime, locales);
// 3. Return ? (availableLocales, requestedLocales, options).
return supportedLocales(availableLocales, requestedLocales.getValue());
return supportedLocales(
runtime, availableLocales, requestedLocales.getValue(), options);
}

// https://402.ecma-international.org/8.0/#sec-setnumberformatunitoptions
Expand Down Expand Up @@ -2296,6 +2334,9 @@ uint8_t getCurrencyDigits(std::u16string_view code) {
// requestedLocales, opt, %NumberFormat%.[[RelevantExtensionKeys]],
// localeData).
static constexpr std::u16string_view relevantExtensionKeys[] = {u"nu"};
static_assert(
isSorted(relevantExtensionKeys),
"keep relevantExtensionKeys sorted for canonical form");
auto r = resolveLocale(
getAvailableLocales(), *requestedLocales, opt, relevantExtensionKeys);
// 11. Set numberFormat.[[Locale]] to r.[[locale]].
Expand Down
Loading