Skip to content

Commit 54d1ecd

Browse files
Update to ICU 76104.3 (#80)
* Update to icu76 (original) * do not build with darwin platform flag to avoid entering the path that requires darwin internal SDK * Fix Linux build * Reapply renaming expansion like #63 * Fix windows build: Use PI defined in the source in case M_PI isn't defined. * Add the same flag that we added to Package.swift to cmakefile * Update to c++17 * Fix cmake flag * Reapply WASI fix from PR #35 * fix WASI build --------- Co-authored-by: Yuta Saito <[email protected]>
1 parent 122f088 commit 54d1ecd

File tree

597 files changed

+895211
-833320
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

597 files changed

+895211
-833320
lines changed

CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ project(SwiftFoundationICU
1919

2020
option(BUILD_SHARED_LIBS "build shared libraries" ON)
2121

22-
set(CMAKE_CXX_STANDARD 14)
22+
set(CMAKE_CXX_STANDARD 17)
2323
set(CMAKE_CXX_STANDARD_REQUIRED ON)
2424

2525
include(CheckLinkerFlag)
@@ -52,7 +52,9 @@ add_compile_definitions(
5252
$<$<COMPILE_LANGUAGE:C,CXX>:U_IO_IMPLEMENTATION>
5353
$<$<COMPILE_LANGUAGE:C,CXX>:ICU_DATA_DIR="/usr/share/icu">
5454
$<$<COMPILE_LANGUAGE:C,CXX>:USE_PACKAGE_DATA=1>
55-
$<$<COMPILE_LANGUAGE:C,CXX>:APPLE_ICU_CHANGES=1>)
55+
$<$<COMPILE_LANGUAGE:C,CXX>:APPLE_ICU_CHANGES=1>
56+
$<$<COMPILE_LANGUAGE:C,CXX>:UCHAR_TYPE=char16_t>
57+
$<$<COMPILE_LANGUAGE:C,CXX>:U_PLATFORM_IS_DARWIN_BASED=0>)
5658

5759
# Linux specific settings
5860
if(CMAKE_SYSTEM_NAME STREQUAL Linux)

Package.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ var buildSettings: [CXXSetting] = [
6363
.define("ICU_DATA_DIR", to: "\"/usr/share/icu/\""),
6464
.define("USE_PACKAGE_DATA", to: "1"),
6565
.define("APPLE_ICU_CHANGES", to: "1"),
66+
.define("UCHAR_TYPE", to: "char16_t"),
67+
.define("U_PLATFORM_IS_DARWIN_BASED", to: "0"),
6668

6769
.headerSearchPath("common"),
6870
.headerSearchPath("io"),
@@ -101,5 +103,5 @@ let package = Package(
101103
linkerSettings: linkerSettings
102104
)
103105
],
104-
cxxLanguageStandard: .cxx14
106+
cxxLanguageStandard: .cxx17
105107
)

icuSources/common/appendable.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Appendable::~Appendable() {}
2525
UBool
2626
Appendable::appendCodePoint(UChar32 c) {
2727
if(c<=0xffff) {
28-
return appendCodeUnit((char16_t)c);
28+
return appendCodeUnit(static_cast<char16_t>(c));
2929
} else {
3030
return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
3131
}

icuSources/common/bmpset.cpp

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
7575
int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.
7676

7777
// Set one bit indicating an all-one block.
78-
uint32_t bits=(uint32_t)1<<lead;
78+
uint32_t bits = static_cast<uint32_t>(1) << lead;
7979
if((start+1)==limit) { // Single-character shortcut.
8080
table[trail]|=bits;
8181
return;
@@ -100,9 +100,9 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
100100
++lead;
101101
}
102102
if(lead<limitLead) {
103-
bits=~(((unsigned)1<<lead)-1);
103+
bits = ~((static_cast<unsigned>(1) << lead) - 1);
104104
if(limitLead<0x20) {
105-
bits&=((unsigned)1<<limitLead)-1;
105+
bits &= (static_cast<unsigned>(1) << limitLead) - 1;
106106
}
107107
for(trail=0; trail<64; ++trail) {
108108
table[trail]|=bits;
@@ -111,7 +111,7 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
111111
// limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
112112
// In that case, bits=1<<limitLead is undefined but the bits value
113113
// is not used because trail<limitTrail is already false.
114-
bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
114+
bits = static_cast<uint32_t>(1) << ((limitLead == 0x20) ? (limitLead - 1) : limitLead);
115115
for(trail=0; trail<limitTrail; ++trail) {
116116
table[trail]|=bits;
117117
}
@@ -290,22 +290,22 @@ int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {
290290

291291
UBool
292292
BMPSet::contains(UChar32 c) const {
293-
if((uint32_t)c<=0xff) {
294-
return (UBool)latin1Contains[c];
295-
} else if((uint32_t)c<=0x7ff) {
296-
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
297-
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
293+
if (static_cast<uint32_t>(c) <= 0xff) {
294+
return latin1Contains[c];
295+
} else if (static_cast<uint32_t>(c) <= 0x7ff) {
296+
return (table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0;
297+
} else if (static_cast<uint32_t>(c) < 0xd800 || (c >= 0xe000 && c <= 0xffff)) {
298298
int lead=c>>12;
299299
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
300300
if(twoBits<=1) {
301301
// All 64 code points with the same bits 15..6
302302
// are either in the set or not.
303-
return (UBool)twoBits;
303+
return twoBits;
304304
} else {
305305
// Look up the code point in its 4k block of code points.
306306
return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
307307
}
308-
} else if((uint32_t)c<=0x10ffff) {
308+
} else if (static_cast<uint32_t>(c) <= 0x10ffff) {
309309
// surrogate or supplementary code point
310310
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
311311
} else {
@@ -332,7 +332,7 @@ BMPSet::span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCon
332332
break;
333333
}
334334
} else if(c<=0x7ff) {
335-
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
335+
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
336336
break;
337337
}
338338
} else if(c<0xd800 || c>=0xe000) {
@@ -372,7 +372,7 @@ BMPSet::span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCon
372372
break;
373373
}
374374
} else if(c<=0x7ff) {
375-
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
375+
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
376376
break;
377377
}
378378
} else if(c<0xd800 || c>=0xe000) {
@@ -421,7 +421,7 @@ BMPSet::spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spa
421421
break;
422422
}
423423
} else if(c<=0x7ff) {
424-
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
424+
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
425425
break;
426426
}
427427
} else if(c<0xd800 || c>=0xe000) {
@@ -464,7 +464,7 @@ BMPSet::spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spa
464464
break;
465465
}
466466
} else if(c<=0x7ff) {
467-
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
467+
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
468468
break;
469469
}
470470
} else if(c<0xd800 || c>=0xe000) {
@@ -527,7 +527,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
527527
b=*s;
528528
} while(U8_IS_SINGLE(b));
529529
}
530-
length=(int32_t)(limit-s);
530+
length = static_cast<int32_t>(limit - s);
531531
}
532532

533533
if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
@@ -547,7 +547,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
547547
* the truncated sequence.
548548
*/
549549
b=*(limit-1);
550-
if((int8_t)b<0) {
550+
if (static_cast<int8_t>(b) < 0) {
551551
// b>=0x80: lead or trail byte
552552
if(b<0xc0) {
553553
// single trail byte, check for preceding 3- or 4-byte lead byte
@@ -602,15 +602,15 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
602602
if(b>=0xe0) {
603603
if(b<0xf0) {
604604
if( /* handle U+0000..U+FFFF inline */
605-
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
606-
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f
605+
(t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
606+
(t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f
607607
) {
608608
b&=0xf;
609609
uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
610610
if(twoBits<=1) {
611611
// All 64 code points with this lead byte and middle trail byte
612612
// are either in the set or not.
613-
if(twoBits!=(uint32_t)spanCondition) {
613+
if (twoBits != static_cast<uint32_t>(spanCondition)) {
614614
return s-1;
615615
}
616616
} else {
@@ -624,12 +624,12 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
624624
continue;
625625
}
626626
} else if( /* handle U+10000..U+10FFFF inline */
627-
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
628-
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
629-
(t3=(uint8_t)(s[2]-0x80)) <= 0x3f
627+
(t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
628+
(t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f &&
629+
(t3 = static_cast<uint8_t>(s[2] - 0x80)) <= 0x3f
630630
) {
631631
// Give an illegal sequence the same value as the result of contains(FFFD).
632-
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
632+
UChar32 c = (static_cast<UChar32>(b - 0xf0) << 18) | (static_cast<UChar32>(t1) << 12) | (t2 << 6) | t3;
633633
if( ( (0x10000<=c && c<=0x10ffff) ?
634634
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
635635
containsFFFD
@@ -643,9 +643,9 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
643643
} else {
644644
if( /* handle U+0000..U+07FF inline */
645645
b>=0xc0 &&
646-
(t1=(uint8_t)(*s-0x80)) <= 0x3f
646+
(t1 = static_cast<uint8_t>(*s - 0x80)) <= 0x3f
647647
) {
648-
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
648+
if (static_cast<USetSpanCondition>((table7FF[t1] & (static_cast<uint32_t>(1) << (b & 0x1f))) != 0) != spanCondition) {
649649
return s-1;
650650
}
651651
++s;
@@ -711,7 +711,7 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
711711
c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
712712
// c is a valid code point, not ASCII, not a surrogate
713713
if(c<=0x7ff) {
714-
if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
714+
if (static_cast<USetSpanCondition>((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) != spanCondition) {
715715
return prev+1;
716716
}
717717
} else if(c<=0xffff) {
@@ -720,7 +720,7 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
720720
if(twoBits<=1) {
721721
// All 64 code points with the same bits 15..6
722722
// are either in the set or not.
723-
if(twoBits!=(uint32_t)spanCondition) {
723+
if (twoBits != static_cast<uint32_t>(spanCondition)) {
724724
return prev+1;
725725
}
726726
} else {

icuSources/common/bmpset.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ class BMPSet : public UMemory {
156156
};
157157

158158
inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
159-
return (UBool)(findCodePoint(c, lo, hi) & 1);
159+
return findCodePoint(c, lo, hi) & 1;
160160
}
161161

162162
U_NAMESPACE_END

icuSources/common/brkeng.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ UnhandledEngine::findBreaks( UText *text,
8686
if (U_FAILURE(status)) return 0;
8787
utext_setNativeIndex(text, startPos);
8888
UChar32 c = utext_current32(text);
89-
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
89+
while (static_cast<int32_t>(utext_getNativeIndex(text)) < endPos && fHandled->contains(c)) {
9090
utext_next32(text); // TODO: recast loop to work with post-increment operations.
9191
c = utext_current32(text);
9292
}
@@ -114,13 +114,11 @@ UnhandledEngine::handleCharacter(UChar32 c) {
114114
*/
115115

116116
ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
117-
fEngines = 0;
117+
fEngines = nullptr;
118118
}
119119

120120
ICULanguageBreakFactory::~ICULanguageBreakFactory() {
121-
if (fEngines != 0) {
122-
delete fEngines;
123-
}
121+
delete fEngines;
124122
}
125123

126124
void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {
@@ -148,7 +146,7 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) {
148146
Mutex m(&gBreakEngineMutex);
149147
int32_t i = fEngines->size();
150148
while (--i >= 0) {
151-
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
149+
lbe = static_cast<const LanguageBreakEngine*>(fEngines->elementAt(i));
152150
if (lbe != nullptr && lbe->handles(c, locale)) {
153151
return lbe;
154152
}
@@ -261,7 +259,7 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
261259
CharString ext;
262260
const char16_t *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot
263261
if (extStart != nullptr) {
264-
int32_t len = (int32_t)(extStart - dictfname);
262+
int32_t len = static_cast<int32_t>(extStart - dictfname);
265263
ext.appendInvariantChars(UnicodeString(false, extStart + 1, dictnlength - len - 1), status);
266264
dictnlength = len;
267265
}
@@ -271,18 +269,18 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
271269
UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
272270
if (U_SUCCESS(status)) {
273271
// build trie
274-
const uint8_t *data = (const uint8_t *)udata_getMemory(file);
275-
const int32_t *indexes = (const int32_t *)data;
272+
const uint8_t* data = static_cast<const uint8_t*>(udata_getMemory(file));
273+
const int32_t* indexes = reinterpret_cast<const int32_t*>(data);
276274
const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
277275
const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
278276
DictionaryMatcher *m = nullptr;
279277
if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
280278
const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
281-
const char *characters = (const char *)(data + offset);
279+
const char* characters = reinterpret_cast<const char*>(data + offset);
282280
m = new BytesDictionaryMatcher(characters, transform, file);
283281
}
284282
else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
285-
const char16_t *characters = (const char16_t *)(data + offset);
283+
const char16_t* characters = reinterpret_cast<const char16_t*>(data + offset);
286284
m = new UCharsDictionaryMatcher(characters, file);
287285
}
288286
if (m == nullptr) {
@@ -339,12 +337,12 @@ int32_t BreakEngineWrapper::findBreaks(
339337
// extends towards the start or end of the text, depending on 'reverse'.
340338

341339
utext_setNativeIndex(text, startPos);
342-
int32_t start = (int32_t)utext_getNativeIndex(text);
340+
int32_t start = static_cast<int32_t>(utext_getNativeIndex(text));
343341
int32_t current;
344342
int32_t rangeStart;
345343
int32_t rangeEnd;
346344
UChar32 c = utext_current32(text);
347-
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && delegate->handles(c)) {
345+
while ((current = static_cast<int32_t>(utext_getNativeIndex(text))) < endPos && delegate->handles(c)) {
348346
utext_next32(text); // TODO: recast loop for postincrement
349347
c = utext_current32(text);
350348
}

icuSources/common/brkiter.cpp

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
8585
// Get the actual string
8686
brkfname = ures_getString(brkName, &size, &status);
8787
U_ASSERT((size_t)size<sizeof(fnbuff));
88-
if ((size_t)size>=sizeof(fnbuff)) {
88+
if (static_cast<size_t>(size) >= sizeof(fnbuff)) {
8989
size=0;
9090
if (U_SUCCESS(status)) {
9191
status = U_BUFFER_OVERFLOW_ERROR;
@@ -99,7 +99,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
9999
char16_t* extStart=u_strchr(brkfname, 0x002e);
100100
int len = 0;
101101
if (extStart != nullptr){
102-
len = (int)(extStart-brkfname);
102+
len = static_cast<int>(extStart - brkfname);
103103
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
104104
u_UCharsToChars(brkfname, fnbuff, len);
105105
}
@@ -442,34 +442,19 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
442442
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
443443
uprv_strcpy(lb_lw, "line");
444444
UErrorCode kvStatus = U_ZERO_ERROR;
445-
CharString value;
446-
CharStringByteSink valueSink(&value);
447-
loc.getKeywordValue("lb", valueSink, kvStatus);
445+
auto value = loc.getKeywordValue<CharString>("lb", kvStatus);
448446
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
449447
uprv_strcat(lb_lw, "_");
450448
uprv_strcat(lb_lw, value.data());
451449
}
452-
#if APPLE_ICU_CHANGES
453-
// rdar://36667210 Add ubrk_setLineWordOpts to programmatically set @lw options, add lw=keep-hangul support via keyword or function
454-
value.clear();
455-
kvStatus = U_ZERO_ERROR;
456-
loc.getKeywordValue("lw", valueSink, kvStatus);
457-
// lw=phrase is only supported in Japanese.
458-
if (U_SUCCESS(kvStatus) && value == "phrase" && (uprv_strcmp(loc.getLanguage(), "ja") == 0 || uprv_strcmp(loc.getLanguage(), "ko") == 0)) {
459-
uprv_strcat(lb_lw, "_");
460-
uprv_strcat(lb_lw, value.data());
461-
}
462-
#else
463450
// lw=phrase is only supported in Japanese.
464451
if (uprv_strcmp(loc.getLanguage(), "ja") == 0 || uprv_strcmp(loc.getLanguage(), "ko") == 0) {
465-
value.clear();
466-
loc.getKeywordValue("lw", valueSink, kvStatus);
452+
value = loc.getKeywordValue<CharString>("lw", kvStatus);
467453
if (U_SUCCESS(kvStatus) && value == "phrase") {
468454
uprv_strcat(lb_lw, "_");
469455
uprv_strcat(lb_lw, value.data());
470456
}
471457
}
472-
#endif // APPLE_ICU_CHANGES
473458
result = BreakIterator::buildInstance(loc, lb_lw, status);
474459
#if APPLE_ICU_CHANGES
475460
// rdar://36667210 Add ubrk_setLineWordOpts to programmatically set @lw options, add lw=keep-hangul support via keyword or function
@@ -532,7 +517,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
532517
Locale
533518
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
534519
if (type == ULOC_REQUESTED_LOCALE) {
535-
return Locale(requestLocale);
520+
return {requestLocale};
536521
}
537522
U_LOCALE_BASED(locBased, *this);
538523
return locBased.getLocale(type, status);

0 commit comments

Comments
 (0)