From e3237bb5bde47a19c540389f29a1ece9a6e38198 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Sat, 7 Jun 2025 15:36:38 +0900 Subject: [PATCH 01/11] [RFC] Add a locale for grapheme case-insensitive functions --- ext/intl/grapheme/grapheme_string.c | 33 +++++++++++-------- ext/intl/grapheme/grapheme_util.c | 4 +-- ext/intl/grapheme/grapheme_util.h | 2 +- ext/intl/php_intl.stub.php | 6 ++-- ext/intl/php_intl_arginfo.h | 18 +++++++--- .../grapheme_stripos_locale_dependency.phpt | 14 ++++++++ .../grapheme_stristr_locale_dependency.phpt | 15 +++++++++ .../grapheme_strripos_locale_dependency.phpt | 14 ++++++++ 8 files changed, 82 insertions(+), 24 deletions(-) create mode 100644 ext/intl/tests/grapheme_stripos_locale_dependency.phpt create mode 100644 ext/intl/tests/grapheme_stristr_locale_dependency.phpt create mode 100644 ext/intl/tests/grapheme_strripos_locale_dependency.phpt diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index 34dd2ed369cfc..96992c865eb61 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -84,6 +84,7 @@ PHP_FUNCTION(grapheme_strpos) char *haystack, *needle; size_t haystack_len, needle_len; const char *found; + char *locale = ""; zend_long loffset = 0; int32_t offset = 0; size_t noffset = 0; @@ -121,7 +122,7 @@ PHP_FUNCTION(grapheme_strpos) } /* do utf16 part of the strpos */ - ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0 /* last */ ); + ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0, locale /* last */ ); if ( ret_pos >= 0 ) { RETURN_LONG(ret_pos); @@ -134,19 +135,20 @@ PHP_FUNCTION(grapheme_strpos) /* {{{ Find position of first occurrence of a string within another, ignoring case differences */ PHP_FUNCTION(grapheme_stripos) { - char *haystack, *needle; - size_t haystack_len, needle_len; + char *haystack, *needle, *locale = ""; + size_t haystack_len, needle_len, locale_len = 0; const char *found; zend_long loffset = 0; int32_t offset = 0; zend_long ret_pos; int is_ascii; - ZEND_PARSE_PARAMETERS_START(2, 3) + ZEND_PARSE_PARAMETERS_START(2, 4) Z_PARAM_STRING(haystack, haystack_len) Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_LONG(loffset) + Z_PARAM_STRING_OR_NULL(locale, locale_len) ZEND_PARSE_PARAMETERS_END(); if ( OUTSIDE_STRING(loffset, haystack_len) ) { @@ -185,7 +187,7 @@ PHP_FUNCTION(grapheme_stripos) } /* do utf16 part of the strpos */ - ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0 /*last */ ); + ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0, locale /*last */ ); if ( ret_pos >= 0 ) { RETURN_LONG(ret_pos); @@ -200,6 +202,7 @@ PHP_FUNCTION(grapheme_stripos) PHP_FUNCTION(grapheme_strrpos) { char *haystack, *needle; + char *locale = ""; size_t haystack_len, needle_len; zend_long loffset = 0; int32_t offset = 0; @@ -242,7 +245,7 @@ PHP_FUNCTION(grapheme_strrpos) /* else we need to continue via utf16 */ } - ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1/* last */); + ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1, locale /* last */); if ( ret_pos >= 0 ) { RETURN_LONG(ret_pos); @@ -257,18 +260,19 @@ PHP_FUNCTION(grapheme_strrpos) /* {{{ Find position of last occurrence of a string within another, ignoring case */ PHP_FUNCTION(grapheme_strripos) { - char *haystack, *needle; - size_t haystack_len, needle_len; + char *haystack, *needle, *locale = ""; + size_t haystack_len, needle_len, locale_len = 0; zend_long loffset = 0; int32_t offset = 0; zend_long ret_pos; int is_ascii; - ZEND_PARSE_PARAMETERS_START(2, 3) + ZEND_PARSE_PARAMETERS_START(2, 4) Z_PARAM_STRING(haystack, haystack_len) Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_LONG(loffset) + Z_PARAM_STRING_OR_NULL(locale, locale_len) ZEND_PARSE_PARAMETERS_END(); if ( OUTSIDE_STRING(loffset, haystack_len) ) { @@ -309,7 +313,7 @@ PHP_FUNCTION(grapheme_strripos) /* else we need to continue via utf16 */ } - ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1 /*last */); + ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1, locale /*last */); if ( ret_pos >= 0 ) { RETURN_LONG(ret_pos); @@ -537,17 +541,18 @@ PHP_FUNCTION(grapheme_substr) /* {{{ strstr_common_handler */ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_case) { - char *haystack, *needle; + char *haystack, *needle, *locale = ""; const char *found; - size_t haystack_len, needle_len; + size_t haystack_len, needle_len, locale_len = 0; int32_t ret_pos, uchar_pos; bool part = false; - ZEND_PARSE_PARAMETERS_START(2, 3) + ZEND_PARSE_PARAMETERS_START(2, 4) Z_PARAM_STRING(haystack, haystack_len) Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_BOOL(part) + Z_PARAM_STRING_OR_NULL(locale, locale_len) ZEND_PARSE_PARAMETERS_END(); if ( !f_ignore_case ) { @@ -574,7 +579,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas } /* need to work in utf16 */ - ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ ); + ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0, locale /*last */ ); if ( ret_pos < 0 ) { RETURN_FALSE; diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c index 501b9dfb221d0..87facb9c35bbb 100644 --- a/ext/intl/grapheme/grapheme_util.c +++ b/ext/intl/grapheme/grapheme_util.c @@ -94,7 +94,7 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char /* {{{ grapheme_strpos_utf16 - strrpos using utf16*/ -int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last) +int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last, char* locale) { UChar *uhaystack = NULL, *uneedle = NULL; int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0; @@ -136,7 +136,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, } status = U_ZERO_ERROR; - src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, "", bi, &status); + src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, locale, bi, &status); STRPOS_CHECK_STATUS(status, "Error creating search object"); if(f_ignore_case) { diff --git a/ext/intl/grapheme/grapheme_util.h b/ext/intl/grapheme/grapheme_util.h index d03194621acf3..00ef90cdffef0 100644 --- a/ext/intl/grapheme/grapheme_util.h +++ b/ext/intl/grapheme/grapheme_util.h @@ -26,7 +26,7 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char zend_long grapheme_strrpos_ascii(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset); int32_t grapheme_strrpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int f_ignore_case); -int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last); +int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last, char* locale); int32_t grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len ); diff --git a/ext/intl/php_intl.stub.php b/ext/intl/php_intl.stub.php index dfb05a2b50ac5..1bdd100c6ff2a 100644 --- a/ext/intl/php_intl.stub.php +++ b/ext/intl/php_intl.stub.php @@ -433,17 +433,17 @@ function grapheme_strlen(string $string): int|false|null {} function grapheme_strpos(string $haystack, string $needle, int $offset = 0): int|false {} -function grapheme_stripos(string $haystack, string $needle, int $offset = 0): int|false {} +function grapheme_stripos(string $haystack, string $needle, int $offset = 0, ?string $locale = null): int|false {} function grapheme_strrpos(string $haystack, string $needle, int $offset = 0): int|false {} -function grapheme_strripos(string $haystack, string $needle, int $offset = 0): int|false {} +function grapheme_strripos(string $haystack, string $needle, int $offset = 0, ?string $locale = null): int|false {} function grapheme_substr(string $string, int $offset, ?int $length = null): string|false {} function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false): string|false {} -function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false): string|false {} +function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, ?string $locale = null): string|false {} function grapheme_str_split(string $string, int $length = 1): array|false {} diff --git a/ext/intl/php_intl_arginfo.h b/ext/intl/php_intl_arginfo.h index b710084910733..d9924b2eacf01 100644 --- a/ext/intl/php_intl_arginfo.h +++ b/ext/intl/php_intl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 0d5b028a1ab8f35e8ee1b51ce3141b6ef782af28 */ + * Stub hash: 5a5f639aa38192b427a66eb2c3facd326192e0b1 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_intlcal_create_instance, 0, 0, IntlCalendar, 1) ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, timezone, "null") @@ -464,11 +464,16 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_strpos, 0, 2, MAY_BE_LO ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, offset, IS_LONG, 0, "0") ZEND_END_ARG_INFO() -#define arginfo_grapheme_stripos arginfo_grapheme_strpos +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_stripos, 0, 2, MAY_BE_LONG|MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) + ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, offset, IS_LONG, 0, "0") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 1, "null") +ZEND_END_ARG_INFO() #define arginfo_grapheme_strrpos arginfo_grapheme_strpos -#define arginfo_grapheme_strripos arginfo_grapheme_strpos +#define arginfo_grapheme_strripos arginfo_grapheme_stripos ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_substr, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) @@ -482,7 +487,12 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_strstr, 0, 2, MAY_BE_ST ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, beforeNeedle, _IS_BOOL, 0, "false") ZEND_END_ARG_INFO() -#define arginfo_grapheme_stristr arginfo_grapheme_strstr +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_stristr, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) + ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, beforeNeedle, _IS_BOOL, 0, "false") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 1, "null") +ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_str_split, 0, 1, MAY_BE_ARRAY|MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) diff --git a/ext/intl/tests/grapheme_stripos_locale_dependency.phpt b/ext/intl/tests/grapheme_stripos_locale_dependency.phpt new file mode 100644 index 0000000000000..e868e6f085c03 --- /dev/null +++ b/ext/intl/tests/grapheme_stripos_locale_dependency.phpt @@ -0,0 +1,14 @@ +--TEST-- +grapheme_stripos() function locale dependency test +--EXTENSIONS-- +intl +--FILE-- + +--EXPECT-- +int(0) +int(0) +bool(false) diff --git a/ext/intl/tests/grapheme_stristr_locale_dependency.phpt b/ext/intl/tests/grapheme_stristr_locale_dependency.phpt new file mode 100644 index 0000000000000..ea329f7d18c15 --- /dev/null +++ b/ext/intl/tests/grapheme_stristr_locale_dependency.phpt @@ -0,0 +1,15 @@ +--TEST-- +grapheme_stristr() function locale dependency test +--EXTENSIONS-- +intl +--FILE-- + +--EXPECT-- +string(3) "abc" +string(1) "i" +bool(false) + diff --git a/ext/intl/tests/grapheme_strripos_locale_dependency.phpt b/ext/intl/tests/grapheme_strripos_locale_dependency.phpt new file mode 100644 index 0000000000000..aecc74b5ab001 --- /dev/null +++ b/ext/intl/tests/grapheme_strripos_locale_dependency.phpt @@ -0,0 +1,14 @@ +--TEST-- +grapheme_strripos() function locale dependency test +--EXTENSIONS-- +intl +--FILE-- + +--EXPECT-- +int(0) +int(0) +bool(false) From 7973e725819446bc62266c01571bfcee3fa08db0 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Sat, 7 Jun 2025 17:38:51 +0900 Subject: [PATCH 02/11] Separate parameter of grapheme_strstr and grapheme_stristr --- ext/intl/grapheme/grapheme_string.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index 96992c865eb61..be62fc648a88f 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -547,13 +547,23 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas int32_t ret_pos, uchar_pos; bool part = false; - ZEND_PARSE_PARAMETERS_START(2, 4) - Z_PARAM_STRING(haystack, haystack_len) - Z_PARAM_STRING(needle, needle_len) - Z_PARAM_OPTIONAL - Z_PARAM_BOOL(part) - Z_PARAM_STRING_OR_NULL(locale, locale_len) - ZEND_PARSE_PARAMETERS_END(); + if (f_ignore_case == 1) { + ZEND_PARSE_PARAMETERS_START(2, 4) + Z_PARAM_STRING(haystack, haystack_len) + Z_PARAM_STRING(needle, needle_len) + Z_PARAM_OPTIONAL + Z_PARAM_BOOL(part) + Z_PARAM_STRING_OR_NULL(locale, locale_len) + ZEND_PARSE_PARAMETERS_END(); + } else { + ZEND_PARSE_PARAMETERS_START(2, 3) + Z_PARAM_STRING(haystack, haystack_len) + Z_PARAM_STRING(needle, needle_len) + Z_PARAM_OPTIONAL + Z_PARAM_BOOL(part) + Z_PARAM_STRING_OR_NULL(locale, locale_len) + ZEND_PARSE_PARAMETERS_END(); + } if ( !f_ignore_case ) { From 608d448b47e37fde5219661a532c41ba77c08ea4 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Mon, 16 Jun 2025 15:27:36 +0900 Subject: [PATCH 03/11] Add locale for grapheme_levenshtein function --- ext/intl/grapheme/grapheme_string.c | 5 ++++- ext/intl/php_intl.stub.php | 2 +- ext/intl/php_intl_arginfo.h | 3 ++- ext/intl/tests/grapheme_levenshtein.phpt | 7 +++++++ 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index be62fc648a88f..776b7a3ec6ae7 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -934,6 +934,8 @@ PHP_FUNCTION(grapheme_levenshtein) zend_long cost_ins = 1; zend_long cost_rep = 1; zend_long cost_del = 1; + char *locale = ""; + size_t locale_len = 0; ZEND_PARSE_PARAMETERS_START(2, 5) Z_PARAM_STR(string1) @@ -942,6 +944,7 @@ PHP_FUNCTION(grapheme_levenshtein) Z_PARAM_LONG(cost_ins) Z_PARAM_LONG(cost_rep) Z_PARAM_LONG(cost_del) + Z_PARAM_STRING_OR_NULL(locale, locale_len) ZEND_PARSE_PARAMETERS_END(); if (cost_ins <= 0 || cost_ins > UINT_MAX / 4) { @@ -1058,7 +1061,7 @@ PHP_FUNCTION(grapheme_levenshtein) RETVAL_FALSE; goto out_bi2; } - UCollator *collator = ucol_open("", &ustatus); + UCollator *collator = ucol_open(locale, &ustatus); if (U_FAILURE(ustatus)) { intl_error_set_code(NULL, ustatus); diff --git a/ext/intl/php_intl.stub.php b/ext/intl/php_intl.stub.php index 1bdd100c6ff2a..6db2dfcf96dd4 100644 --- a/ext/intl/php_intl.stub.php +++ b/ext/intl/php_intl.stub.php @@ -447,7 +447,7 @@ function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = function grapheme_str_split(string $string, int $length = 1): array|false {} -function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1): int|false {} +function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1, ?string $locale = null): int|false {} /** @param int $next */ function grapheme_extract(string $haystack, int $size, int $type = GRAPHEME_EXTR_COUNT, int $offset = 0, &$next = null): string|false {} diff --git a/ext/intl/php_intl_arginfo.h b/ext/intl/php_intl_arginfo.h index d9924b2eacf01..e014c54b3aa15 100644 --- a/ext/intl/php_intl_arginfo.h +++ b/ext/intl/php_intl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 5a5f639aa38192b427a66eb2c3facd326192e0b1 */ + * Stub hash: b063534d9cc4b624fe742672858c30828db6a9b9 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_intlcal_create_instance, 0, 0, IntlCalendar, 1) ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, timezone, "null") @@ -505,6 +505,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_levenshtein, 0, 2, MAY_ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, insertion_cost, IS_LONG, 0, "1") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, replacement_cost, IS_LONG, 0, "1") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, deletion_cost, IS_LONG, 0, "1") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 1, "null") ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_extract, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) diff --git a/ext/intl/tests/grapheme_levenshtein.phpt b/ext/intl/tests/grapheme_levenshtein.phpt index 4ff7dbb607bcd..719e177d7afc4 100644 --- a/ext/intl/tests/grapheme_levenshtein.phpt +++ b/ext/intl/tests/grapheme_levenshtein.phpt @@ -80,6 +80,10 @@ try { } catch (ValueError $e) { echo $e->getMessage() . PHP_EOL; } + +echo '--- Locale string ---' . \PHP_EOL; +var_dump(grapheme_stripos("i", "\u{0130}", 0, "tr_TR")); +var_dump(grapheme_stripos("i", "\u{0130}", 0, "en_US")); ?> --EXPECTF-- --- Equal --- @@ -126,3 +130,6 @@ int(0) grapheme_levenshtein(): Argument #3 ($insertion_cost) must be greater than 0 and less than or equal to %d grapheme_levenshtein(): Argument #4 ($replacement_cost) must be greater than 0 and less than or equal to %d grapheme_levenshtein(): Argument #5 ($deletion_cost) must be greater than 0 and less than or equal to %d +--- Locale string --- +int(0) +bool(false) From ff994a501455b394bc8c21be8ddebac88006cad1 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Tue, 1 Jul 2025 01:06:07 +0900 Subject: [PATCH 04/11] Fix signatures --- ext/intl/grapheme/grapheme_string.c | 12 +++++------- ext/intl/php_intl.stub.php | 8 ++++---- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index 776b7a3ec6ae7..ba1075ea4f634 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -84,7 +84,6 @@ PHP_FUNCTION(grapheme_strpos) char *haystack, *needle; size_t haystack_len, needle_len; const char *found; - char *locale = ""; zend_long loffset = 0; int32_t offset = 0; size_t noffset = 0; @@ -122,7 +121,7 @@ PHP_FUNCTION(grapheme_strpos) } /* do utf16 part of the strpos */ - ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0, locale /* last */ ); + ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0, "" /* last */ ); if ( ret_pos >= 0 ) { RETURN_LONG(ret_pos); @@ -148,7 +147,7 @@ PHP_FUNCTION(grapheme_stripos) Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_LONG(loffset) - Z_PARAM_STRING_OR_NULL(locale, locale_len) + Z_PARAM_STRING(locale, locale_len) ZEND_PARSE_PARAMETERS_END(); if ( OUTSIDE_STRING(loffset, haystack_len) ) { @@ -272,7 +271,7 @@ PHP_FUNCTION(grapheme_strripos) Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_LONG(loffset) - Z_PARAM_STRING_OR_NULL(locale, locale_len) + Z_PARAM_STRING(locale, locale_len) ZEND_PARSE_PARAMETERS_END(); if ( OUTSIDE_STRING(loffset, haystack_len) ) { @@ -553,7 +552,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_BOOL(part) - Z_PARAM_STRING_OR_NULL(locale, locale_len) + Z_PARAM_STRING(locale, locale_len) ZEND_PARSE_PARAMETERS_END(); } else { ZEND_PARSE_PARAMETERS_START(2, 3) @@ -561,7 +560,6 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_BOOL(part) - Z_PARAM_STRING_OR_NULL(locale, locale_len) ZEND_PARSE_PARAMETERS_END(); } @@ -944,7 +942,7 @@ PHP_FUNCTION(grapheme_levenshtein) Z_PARAM_LONG(cost_ins) Z_PARAM_LONG(cost_rep) Z_PARAM_LONG(cost_del) - Z_PARAM_STRING_OR_NULL(locale, locale_len) + Z_PARAM_STRING(locale, locale_len) ZEND_PARSE_PARAMETERS_END(); if (cost_ins <= 0 || cost_ins > UINT_MAX / 4) { diff --git a/ext/intl/php_intl.stub.php b/ext/intl/php_intl.stub.php index 6db2dfcf96dd4..ba9b0ddb566a3 100644 --- a/ext/intl/php_intl.stub.php +++ b/ext/intl/php_intl.stub.php @@ -433,21 +433,21 @@ function grapheme_strlen(string $string): int|false|null {} function grapheme_strpos(string $haystack, string $needle, int $offset = 0): int|false {} -function grapheme_stripos(string $haystack, string $needle, int $offset = 0, ?string $locale = null): int|false {} +function grapheme_stripos(string $haystack, string $needle, int $offset = 0, string $locale = ""): int|false {} function grapheme_strrpos(string $haystack, string $needle, int $offset = 0): int|false {} -function grapheme_strripos(string $haystack, string $needle, int $offset = 0, ?string $locale = null): int|false {} +function grapheme_strripos(string $haystack, string $needle, int $offset = 0, string $locale = ""): int|false {} function grapheme_substr(string $string, int $offset, ?int $length = null): string|false {} function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false): string|false {} -function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, ?string $locale = null): string|false {} +function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = ""): string|false {} function grapheme_str_split(string $string, int $length = 1): array|false {} -function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1, ?string $locale = null): int|false {} +function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1, string $locale = ""): int|false {} /** @param int $next */ function grapheme_extract(string $haystack, int $size, int $type = GRAPHEME_EXTR_COUNT, int $offset = 0, &$next = null): string|false {} From f72e59ffa69672a8b898b11660c9932791165098 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Tue, 1 Jul 2025 05:30:45 +0900 Subject: [PATCH 05/11] minor changes --- ext/intl/php_intl_arginfo.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ext/intl/php_intl_arginfo.h b/ext/intl/php_intl_arginfo.h index e014c54b3aa15..8cbf7ef7afec6 100644 --- a/ext/intl/php_intl_arginfo.h +++ b/ext/intl/php_intl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: b063534d9cc4b624fe742672858c30828db6a9b9 */ + * Stub hash: 5ffc04c8baf5d95d97372fc4fa46c2808fee225e */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_intlcal_create_instance, 0, 0, IntlCalendar, 1) ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, timezone, "null") @@ -468,7 +468,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_stripos, 0, 2, MAY_BE_L ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, offset, IS_LONG, 0, "0") - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 1, "null") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 0, "\"\"") ZEND_END_ARG_INFO() #define arginfo_grapheme_strrpos arginfo_grapheme_strpos @@ -491,7 +491,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_stristr, 0, 2, MAY_BE_S ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, beforeNeedle, _IS_BOOL, 0, "false") - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 1, "null") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 0, "\"\"") ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_str_split, 0, 1, MAY_BE_ARRAY|MAY_BE_FALSE) @@ -505,7 +505,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_levenshtein, 0, 2, MAY_ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, insertion_cost, IS_LONG, 0, "1") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, replacement_cost, IS_LONG, 0, "1") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, deletion_cost, IS_LONG, 0, "1") - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 1, "null") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 0, "\"\"") ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_extract, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) From abc304bc8cc3c1317a0cd744edcb1d5849630693 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Sat, 5 Jul 2025 15:46:12 +0900 Subject: [PATCH 06/11] Remove locale for grapheme_levenshetin --- ext/intl/grapheme/grapheme_string.c | 5 +---- ext/intl/php_intl.stub.php | 2 +- ext/intl/php_intl_arginfo.h | 3 +-- ext/intl/tests/grapheme_levenshtein.phpt | 6 ------ 4 files changed, 3 insertions(+), 13 deletions(-) diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index ba1075ea4f634..00c2207d07403 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -932,8 +932,6 @@ PHP_FUNCTION(grapheme_levenshtein) zend_long cost_ins = 1; zend_long cost_rep = 1; zend_long cost_del = 1; - char *locale = ""; - size_t locale_len = 0; ZEND_PARSE_PARAMETERS_START(2, 5) Z_PARAM_STR(string1) @@ -942,7 +940,6 @@ PHP_FUNCTION(grapheme_levenshtein) Z_PARAM_LONG(cost_ins) Z_PARAM_LONG(cost_rep) Z_PARAM_LONG(cost_del) - Z_PARAM_STRING(locale, locale_len) ZEND_PARSE_PARAMETERS_END(); if (cost_ins <= 0 || cost_ins > UINT_MAX / 4) { @@ -1059,7 +1056,7 @@ PHP_FUNCTION(grapheme_levenshtein) RETVAL_FALSE; goto out_bi2; } - UCollator *collator = ucol_open(locale, &ustatus); + UCollator *collator = ucol_open("", &ustatus); if (U_FAILURE(ustatus)) { intl_error_set_code(NULL, ustatus); diff --git a/ext/intl/php_intl.stub.php b/ext/intl/php_intl.stub.php index ba9b0ddb566a3..f437bbcfbf797 100644 --- a/ext/intl/php_intl.stub.php +++ b/ext/intl/php_intl.stub.php @@ -447,7 +447,7 @@ function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = function grapheme_str_split(string $string, int $length = 1): array|false {} -function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1, string $locale = ""): int|false {} +function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1): int|false {} /** @param int $next */ function grapheme_extract(string $haystack, int $size, int $type = GRAPHEME_EXTR_COUNT, int $offset = 0, &$next = null): string|false {} diff --git a/ext/intl/php_intl_arginfo.h b/ext/intl/php_intl_arginfo.h index 8cbf7ef7afec6..04fe96ae00a4d 100644 --- a/ext/intl/php_intl_arginfo.h +++ b/ext/intl/php_intl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 5ffc04c8baf5d95d97372fc4fa46c2808fee225e */ + * Stub hash: 716311270910321b33dfdc3aebe5f5c574e5b697 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_intlcal_create_instance, 0, 0, IntlCalendar, 1) ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, timezone, "null") @@ -505,7 +505,6 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_levenshtein, 0, 2, MAY_ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, insertion_cost, IS_LONG, 0, "1") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, replacement_cost, IS_LONG, 0, "1") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, deletion_cost, IS_LONG, 0, "1") - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 0, "\"\"") ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_extract, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) diff --git a/ext/intl/tests/grapheme_levenshtein.phpt b/ext/intl/tests/grapheme_levenshtein.phpt index 719e177d7afc4..9e2096fb29636 100644 --- a/ext/intl/tests/grapheme_levenshtein.phpt +++ b/ext/intl/tests/grapheme_levenshtein.phpt @@ -81,9 +81,6 @@ try { echo $e->getMessage() . PHP_EOL; } -echo '--- Locale string ---' . \PHP_EOL; -var_dump(grapheme_stripos("i", "\u{0130}", 0, "tr_TR")); -var_dump(grapheme_stripos("i", "\u{0130}", 0, "en_US")); ?> --EXPECTF-- --- Equal --- @@ -130,6 +127,3 @@ int(0) grapheme_levenshtein(): Argument #3 ($insertion_cost) must be greater than 0 and less than or equal to %d grapheme_levenshtein(): Argument #4 ($replacement_cost) must be greater than 0 and less than or equal to %d grapheme_levenshtein(): Argument #5 ($deletion_cost) must be greater than 0 and less than or equal to %d ---- Locale string --- -int(0) -bool(false) From 10b43c2c5ea7d150387669280e12bfc2e9a88c19 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Tue, 8 Jul 2025 15:23:28 +0900 Subject: [PATCH 07/11] Add a collator strength and locale --- ext/intl/grapheme/grapheme_string.c | 75 +++++++++++++++++------------ ext/intl/grapheme/grapheme_util.c | 3 +- ext/intl/grapheme/grapheme_util.h | 4 +- ext/intl/php_intl.stub.php | 16 +++--- ext/intl/php_intl_arginfo.h | 19 +++++--- 5 files changed, 68 insertions(+), 49 deletions(-) diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index 00c2207d07403..688a778f862e2 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -81,19 +81,22 @@ PHP_FUNCTION(grapheme_strlen) /* {{{ Find position of first occurrence of a string within another */ PHP_FUNCTION(grapheme_strpos) { - char *haystack, *needle; - size_t haystack_len, needle_len; + char *haystack, *needle, *locale = ""; + size_t haystack_len, needle_len, locale_len; const char *found; zend_long loffset = 0; int32_t offset = 0; + zend_long strength = UCOL_DEFAULT_STRENGTH; size_t noffset = 0; zend_long ret_pos; - ZEND_PARSE_PARAMETERS_START(2, 3) + ZEND_PARSE_PARAMETERS_START(2, 5) Z_PARAM_STRING(haystack, haystack_len) Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_LONG(loffset) + Z_PARAM_STRING(locale, locale_len) + Z_PARAM_LONG(strength) ZEND_PARSE_PARAMETERS_END(); if ( OUTSIDE_STRING(loffset, haystack_len) ) { @@ -121,7 +124,7 @@ PHP_FUNCTION(grapheme_strpos) } /* do utf16 part of the strpos */ - ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0, "" /* last */ ); + ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0, locale, strength /* last */ ); if ( ret_pos >= 0 ) { RETURN_LONG(ret_pos); @@ -139,15 +142,17 @@ PHP_FUNCTION(grapheme_stripos) const char *found; zend_long loffset = 0; int32_t offset = 0; + zend_long strength = UCOL_DEFAULT_STRENGTH; zend_long ret_pos; int is_ascii; - ZEND_PARSE_PARAMETERS_START(2, 4) + ZEND_PARSE_PARAMETERS_START(2, 5) Z_PARAM_STRING(haystack, haystack_len) Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_LONG(loffset) Z_PARAM_STRING(locale, locale_len) + Z_PARAM_LONG(strength) ZEND_PARSE_PARAMETERS_END(); if ( OUTSIDE_STRING(loffset, haystack_len) ) { @@ -186,7 +191,7 @@ PHP_FUNCTION(grapheme_stripos) } /* do utf16 part of the strpos */ - ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0, locale /*last */ ); + ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0, locale, strength /*last */ ); if ( ret_pos >= 0 ) { RETURN_LONG(ret_pos); @@ -205,14 +210,16 @@ PHP_FUNCTION(grapheme_strrpos) size_t haystack_len, needle_len; zend_long loffset = 0; int32_t offset = 0; + zend_long strength = UCOL_DEFAULT_STRENGTH; zend_long ret_pos; int is_ascii; - ZEND_PARSE_PARAMETERS_START(2, 3) + ZEND_PARSE_PARAMETERS_START(2, 4) Z_PARAM_STRING(haystack, haystack_len) Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_LONG(loffset) + Z_PARAM_LONG(strength) ZEND_PARSE_PARAMETERS_END(); if ( OUTSIDE_STRING(loffset, haystack_len) ) { @@ -244,7 +251,7 @@ PHP_FUNCTION(grapheme_strrpos) /* else we need to continue via utf16 */ } - ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1, locale /* last */); + ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1, locale, strength /* last */); if ( ret_pos >= 0 ) { RETURN_LONG(ret_pos); @@ -263,15 +270,17 @@ PHP_FUNCTION(grapheme_strripos) size_t haystack_len, needle_len, locale_len = 0; zend_long loffset = 0; int32_t offset = 0; + zend_long strength = UCOL_DEFAULT_STRENGTH; zend_long ret_pos; int is_ascii; - ZEND_PARSE_PARAMETERS_START(2, 4) + ZEND_PARSE_PARAMETERS_START(2, 5) Z_PARAM_STRING(haystack, haystack_len) Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_LONG(loffset) Z_PARAM_STRING(locale, locale_len) + Z_PARAM_LONG(strength) ZEND_PARSE_PARAMETERS_END(); if ( OUTSIDE_STRING(loffset, haystack_len) ) { @@ -312,7 +321,7 @@ PHP_FUNCTION(grapheme_strripos) /* else we need to continue via utf16 */ } - ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1, locale /*last */); + ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* f_ignore_case */, 1, locale, strength /*last */); if ( ret_pos >= 0 ) { RETURN_LONG(ret_pos); @@ -327,13 +336,14 @@ PHP_FUNCTION(grapheme_strripos) /* {{{ Returns part of a string */ PHP_FUNCTION(grapheme_substr) { - char *str; + char *str, *locale = ""; zend_string *u8_sub_str; UChar *ustr; - size_t str_len; + size_t str_len, locale_len; int32_t ustr_len; zend_long lstart = 0, length = 0; int32_t start = 0; + zend_long strength = UCOL_DEFAULT; int iter_val; UErrorCode status; unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE]; @@ -342,11 +352,13 @@ PHP_FUNCTION(grapheme_substr) int32_t (*iter_func)(UBreakIterator *); bool no_length = true; - ZEND_PARSE_PARAMETERS_START(2, 3) + ZEND_PARSE_PARAMETERS_START(2, 5) Z_PARAM_STRING(str, str_len) Z_PARAM_LONG(lstart) Z_PARAM_OPTIONAL Z_PARAM_LONG_OR_NULL(length, no_length) + Z_PARAM_STRING(locale, locale_len) + Z_PARAM_LONG(strength) ZEND_PARSE_PARAMETERS_END(); if (lstart < INT32_MIN || lstart > INT32_MAX) { @@ -544,24 +556,17 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas const char *found; size_t haystack_len, needle_len, locale_len = 0; int32_t ret_pos, uchar_pos; + zend_long strength = UCOL_DEFAULT_STRENGTH; bool part = false; - if (f_ignore_case == 1) { - ZEND_PARSE_PARAMETERS_START(2, 4) - Z_PARAM_STRING(haystack, haystack_len) - Z_PARAM_STRING(needle, needle_len) - Z_PARAM_OPTIONAL - Z_PARAM_BOOL(part) - Z_PARAM_STRING(locale, locale_len) - ZEND_PARSE_PARAMETERS_END(); - } else { - ZEND_PARSE_PARAMETERS_START(2, 3) - Z_PARAM_STRING(haystack, haystack_len) - Z_PARAM_STRING(needle, needle_len) - Z_PARAM_OPTIONAL - Z_PARAM_BOOL(part) - ZEND_PARSE_PARAMETERS_END(); - } + ZEND_PARSE_PARAMETERS_START(2, 5) + Z_PARAM_STRING(haystack, haystack_len) + Z_PARAM_STRING(needle, needle_len) + Z_PARAM_OPTIONAL + Z_PARAM_BOOL(part) + Z_PARAM_STRING(locale, locale_len) + Z_PARAM_LONG(strength) + ZEND_PARSE_PARAMETERS_END(); if ( !f_ignore_case ) { @@ -587,7 +592,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas } /* need to work in utf16 */ - ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0, locale /*last */ ); + ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0, locale, strength /*last */ ); if ( ret_pos < 0 ) { RETURN_FALSE; @@ -932,14 +937,19 @@ PHP_FUNCTION(grapheme_levenshtein) zend_long cost_ins = 1; zend_long cost_rep = 1; zend_long cost_del = 1; + char *locale = ""; + size_t locale_len = 0; + zend_long strength = UCOL_DEFAULT_STRENGTH; - ZEND_PARSE_PARAMETERS_START(2, 5) + ZEND_PARSE_PARAMETERS_START(2, 7) Z_PARAM_STR(string1) Z_PARAM_STR(string2) Z_PARAM_OPTIONAL Z_PARAM_LONG(cost_ins) Z_PARAM_LONG(cost_rep) Z_PARAM_LONG(cost_del) + Z_PARAM_STRING(locale, locale_len) + Z_PARAM_LONG(strength) ZEND_PARSE_PARAMETERS_END(); if (cost_ins <= 0 || cost_ins > UINT_MAX / 4) { @@ -1056,7 +1066,7 @@ PHP_FUNCTION(grapheme_levenshtein) RETVAL_FALSE; goto out_bi2; } - UCollator *collator = ucol_open("", &ustatus); + UCollator *collator = ucol_open(locale, &ustatus); if (U_FAILURE(ustatus)) { intl_error_set_code(NULL, ustatus); @@ -1064,6 +1074,7 @@ PHP_FUNCTION(grapheme_levenshtein) RETVAL_FALSE; goto out_collator; } + ucol_setStrength(collator, strength); zend_long *p1, *p2, *tmp; p1 = safe_emalloc((size_t) strlen_2 + 1, sizeof(zend_long), 0); diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c index 87facb9c35bbb..86b06fd4bbbbe 100644 --- a/ext/intl/grapheme/grapheme_util.c +++ b/ext/intl/grapheme/grapheme_util.c @@ -94,7 +94,7 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char /* {{{ grapheme_strpos_utf16 - strrpos using utf16*/ -int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last, char* locale) +int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last, char* locale, int32_t strength) { UChar *uhaystack = NULL, *uneedle = NULL; int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0; @@ -141,6 +141,7 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, if(f_ignore_case) { UCollator *coll = usearch_getCollator(src); + ucol_setStrength(coll, strength); status = U_ZERO_ERROR; ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_SECONDARY, &status); STRPOS_CHECK_STATUS(status, "Error setting collation strength"); diff --git a/ext/intl/grapheme/grapheme_util.h b/ext/intl/grapheme/grapheme_util.h index 00ef90cdffef0..42ee9eb64911d 100644 --- a/ext/intl/grapheme/grapheme_util.h +++ b/ext/intl/grapheme/grapheme_util.h @@ -25,8 +25,8 @@ zend_long grapheme_ascii_check(const unsigned char *day, size_t len); void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char **sub_str, int32_t *sub_str_len); zend_long grapheme_strrpos_ascii(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset); -int32_t grapheme_strrpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int f_ignore_case); -int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last, char* locale); +int32_t grapheme_strrpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int f_ignore_case, char* locale, int32_t strength); +int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last, char* locale, int32_t strength); int32_t grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len ); diff --git a/ext/intl/php_intl.stub.php b/ext/intl/php_intl.stub.php index f437bbcfbf797..d37723ffb10fb 100644 --- a/ext/intl/php_intl.stub.php +++ b/ext/intl/php_intl.stub.php @@ -431,23 +431,23 @@ function numfmt_get_error_message(NumberFormatter $formatter): string {} function grapheme_strlen(string $string): int|false|null {} -function grapheme_strpos(string $haystack, string $needle, int $offset = 0): int|false {} +function grapheme_strpos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {} -function grapheme_stripos(string $haystack, string $needle, int $offset = 0, string $locale = ""): int|false {} +function grapheme_stripos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {} -function grapheme_strrpos(string $haystack, string $needle, int $offset = 0): int|false {} +function grapheme_strrpos(string $haystack, string $needle, int $offset = 0, int $strength = UCOL_DEFAULT_STRENGTH): int|false {} -function grapheme_strripos(string $haystack, string $needle, int $offset = 0, string $locale = ""): int|false {} +function grapheme_strripos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {} -function grapheme_substr(string $string, int $offset, ?int $length = null): string|false {} +function grapheme_substr(string $string, int $offset, ?int $length = null, int $strength = UCOL_DEFAULT_STRENGTH): string|false {} -function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false): string|false {} +function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false, int $strength = UCOL_DEFAULT_STRENGTH): string|false {} -function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = ""): string|false {} +function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): string|false {} function grapheme_str_split(string $string, int $length = 1): array|false {} -function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1): int|false {} +function grapheme_levenshtein(string $string1, string $string2, int $insertion_cost = 1, int $replacement_cost = 1, int $deletion_cost = 1, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {} /** @param int $next */ function grapheme_extract(string $haystack, int $size, int $type = GRAPHEME_EXTR_COUNT, int $offset = 0, &$next = null): string|false {} diff --git a/ext/intl/php_intl_arginfo.h b/ext/intl/php_intl_arginfo.h index 04fe96ae00a4d..c16e461f793c4 100644 --- a/ext/intl/php_intl_arginfo.h +++ b/ext/intl/php_intl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 716311270910321b33dfdc3aebe5f5c574e5b697 */ + * Stub hash: 145ee08cacd0ee630fe4f7468c2dd58302a6f3d7 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_intlcal_create_instance, 0, 0, IntlCalendar, 1) ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, timezone, "null") @@ -462,29 +462,33 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_strpos, 0, 2, MAY_BE_LO ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, offset, IS_LONG, 0, "0") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 0, "\"\"") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_stripos, 0, 2, MAY_BE_LONG|MAY_BE_FALSE) +#define arginfo_grapheme_stripos arginfo_grapheme_strpos + +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_strrpos, 0, 2, MAY_BE_LONG|MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, offset, IS_LONG, 0, "0") - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 0, "\"\"") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") ZEND_END_ARG_INFO() -#define arginfo_grapheme_strrpos arginfo_grapheme_strpos - -#define arginfo_grapheme_strripos arginfo_grapheme_stripos +#define arginfo_grapheme_strripos arginfo_grapheme_strpos ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_substr, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, offset, IS_LONG, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, length, IS_LONG, 1, "null") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_strstr, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, beforeNeedle, _IS_BOOL, 0, "false") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_stristr, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) @@ -492,6 +496,7 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_stristr, 0, 2, MAY_BE_S ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, beforeNeedle, _IS_BOOL, 0, "false") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 0, "\"\"") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_str_split, 0, 1, MAY_BE_ARRAY|MAY_BE_FALSE) @@ -505,6 +510,8 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_levenshtein, 0, 2, MAY_ ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, insertion_cost, IS_LONG, 0, "1") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, replacement_cost, IS_LONG, 0, "1") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, deletion_cost, IS_LONG, 0, "1") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 0, "\"\"") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_extract, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) From 6656fabf3bd11ee4dc109d81e356502882288ff7 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Tue, 8 Jul 2025 19:51:41 +0900 Subject: [PATCH 08/11] Fix php_intl.stub.php --- ext/intl/php_intl.stub.php | 5 +++++ ext/intl/php_intl_arginfo.h | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/ext/intl/php_intl.stub.php b/ext/intl/php_intl.stub.php index d37723ffb10fb..eb6e1341dec27 100644 --- a/ext/intl/php_intl.stub.php +++ b/ext/intl/php_intl.stub.php @@ -166,6 +166,11 @@ * @cvalue UIDNA_ERROR_CONTEXTJ */ const IDNA_ERROR_CONTEXTJ = UNKNOWN; +/** + * @var int + * @cvalue UCOL_DEFAULT_STRENGTH + */ +const UCOL_DEFAULT_STRENGTH = UNKNOWN; class IntlException extends Exception { diff --git a/ext/intl/php_intl_arginfo.h b/ext/intl/php_intl_arginfo.h index c16e461f793c4..79c384731f5d0 100644 --- a/ext/intl/php_intl_arginfo.h +++ b/ext/intl/php_intl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 145ee08cacd0ee630fe4f7468c2dd58302a6f3d7 */ + * Stub hash: a1f0967fdff81576f00093766d0830cd056792c6 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_intlcal_create_instance, 0, 0, IntlCalendar, 1) ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, timezone, "null") @@ -1237,6 +1237,7 @@ static void register_php_intl_symbols(int module_number) REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("UCOL_DEFAULT_STRENGTH", UCOL_DEFAULT_STRENGTH, CONST_PERSISTENT); zend_attribute *attribute_Deprecated_func_intlcal_set_0 = zend_add_function_attribute(zend_hash_str_find_ptr(CG(function_table), "intlcal_set", sizeof("intlcal_set") - 1), ZSTR_KNOWN(ZEND_STR_DEPRECATED_CAPITALIZED), 2); From e452334a86d96df2813bdede36a909a2a234999e Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Tue, 8 Jul 2025 20:36:36 +0900 Subject: [PATCH 09/11] Fix zpp mismatch --- ext/intl/grapheme/grapheme_string.c | 5 +++-- ext/intl/php_intl.stub.php | 6 +++--- ext/intl/php_intl_arginfo.h | 19 +++++-------------- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index 688a778f862e2..36f653edeba32 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -207,18 +207,19 @@ PHP_FUNCTION(grapheme_strrpos) { char *haystack, *needle; char *locale = ""; - size_t haystack_len, needle_len; + size_t haystack_len, needle_len, locale_len; zend_long loffset = 0; int32_t offset = 0; zend_long strength = UCOL_DEFAULT_STRENGTH; zend_long ret_pos; int is_ascii; - ZEND_PARSE_PARAMETERS_START(2, 4) + ZEND_PARSE_PARAMETERS_START(2, 5) Z_PARAM_STRING(haystack, haystack_len) Z_PARAM_STRING(needle, needle_len) Z_PARAM_OPTIONAL Z_PARAM_LONG(loffset) + Z_PARAM_STRING(locale, locale_len) Z_PARAM_LONG(strength) ZEND_PARSE_PARAMETERS_END(); diff --git a/ext/intl/php_intl.stub.php b/ext/intl/php_intl.stub.php index eb6e1341dec27..fbecbcbc638ac 100644 --- a/ext/intl/php_intl.stub.php +++ b/ext/intl/php_intl.stub.php @@ -440,13 +440,13 @@ function grapheme_strpos(string $haystack, string $needle, int $offset = 0, stri function grapheme_stripos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {} -function grapheme_strrpos(string $haystack, string $needle, int $offset = 0, int $strength = UCOL_DEFAULT_STRENGTH): int|false {} +function grapheme_strrpos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {} function grapheme_strripos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {} -function grapheme_substr(string $string, int $offset, ?int $length = null, int $strength = UCOL_DEFAULT_STRENGTH): string|false {} +function grapheme_substr(string $string, int $offset, ?int $length = null, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): string|false {} -function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false, int $strength = UCOL_DEFAULT_STRENGTH): string|false {} +function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): string|false {} function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): string|false {} diff --git a/ext/intl/php_intl_arginfo.h b/ext/intl/php_intl_arginfo.h index 79c384731f5d0..4cae7f54db418 100644 --- a/ext/intl/php_intl_arginfo.h +++ b/ext/intl/php_intl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: a1f0967fdff81576f00093766d0830cd056792c6 */ + * Stub hash: 8e7c9f22a29f3110de50a66491fc2cb36a600fd8 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_intlcal_create_instance, 0, 0, IntlCalendar, 1) ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, timezone, "null") @@ -468,12 +468,7 @@ ZEND_END_ARG_INFO() #define arginfo_grapheme_stripos arginfo_grapheme_strpos -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_strrpos, 0, 2, MAY_BE_LONG|MAY_BE_FALSE) - ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) - ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, offset, IS_LONG, 0, "0") - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") -ZEND_END_ARG_INFO() +#define arginfo_grapheme_strrpos arginfo_grapheme_strpos #define arginfo_grapheme_strripos arginfo_grapheme_strpos @@ -481,17 +476,11 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_substr, 0, 2, MAY_BE_ST ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, offset, IS_LONG, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, length, IS_LONG, 1, "null") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 0, "\"\"") ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_strstr, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) - ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) - ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, beforeNeedle, _IS_BOOL, 0, "false") - ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") -ZEND_END_ARG_INFO() - -ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_stristr, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, beforeNeedle, _IS_BOOL, 0, "false") @@ -499,6 +488,8 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_stristr, 0, 2, MAY_BE_S ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") ZEND_END_ARG_INFO() +#define arginfo_grapheme_stristr arginfo_grapheme_strstr + ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_str_split, 0, 1, MAY_BE_ARRAY|MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, length, IS_LONG, 0, "1") From 3431fd7aa68dc53c3bd2ffe0b944409ea4293c96 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Tue, 8 Jul 2025 23:48:16 +0900 Subject: [PATCH 10/11] Add test for strength from Japanese nabe --- ext/intl/tests/grapheme_levenshtein.phpt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ext/intl/tests/grapheme_levenshtein.phpt b/ext/intl/tests/grapheme_levenshtein.phpt index 9e2096fb29636..7cfb489ea1c3c 100644 --- a/ext/intl/tests/grapheme_levenshtein.phpt +++ b/ext/intl/tests/grapheme_levenshtein.phpt @@ -58,6 +58,14 @@ $nabe = '邊'; $nabe_E0100 = "邊󠄀"; var_dump(grapheme_levenshtein($nabe, $nabe_E0100)); +// variable $nabe and $nabe_E0101 is different because that is IVS. +// $nabe_E0101 is variable selector in U+908A U+E0101. +// grapheme_levenshtein can catches different only match strength is Collator::IDENTICAL. +// So result is maybe 1. +$nabe = '邊'; +$nabe_E0101 = "\u{908A}\u{E0101}"; +var_dump(grapheme_levenshtein($nabe, $nabe_E0101, strength: Collator::IDENTICAL)); + // combining character var_dump(grapheme_levenshtein("\u{0065}\u{0301}", "\u{00e9}")); @@ -122,6 +130,7 @@ int(2) --- Variable selector --- int(1) int(0) +int(1) int(0) --- Corner case --- grapheme_levenshtein(): Argument #3 ($insertion_cost) must be greater than 0 and less than or equal to %d From a993d00c4479ef30502fb469ba861450f8a62bc1 Mon Sep 17 00:00:00 2001 From: Yuya Hamada Date: Wed, 9 Jul 2025 11:45:11 +0900 Subject: [PATCH 11/11] Modify default value for grapheme_stri* functions --- ext/intl/grapheme/grapheme_string.c | 12 +++++++++--- ext/intl/grapheme/grapheme_util.c | 13 +++++-------- ext/intl/php_intl.stub.php | 11 ++++++++--- ext/intl/php_intl_arginfo.h | 21 +++++++++++++++++---- 4 files changed, 39 insertions(+), 18 deletions(-) diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index 36f653edeba32..b71daa0dddf57 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -142,7 +142,7 @@ PHP_FUNCTION(grapheme_stripos) const char *found; zend_long loffset = 0; int32_t offset = 0; - zend_long strength = UCOL_DEFAULT_STRENGTH; + zend_long strength = UCOL_SECONDARY; zend_long ret_pos; int is_ascii; @@ -271,7 +271,7 @@ PHP_FUNCTION(grapheme_strripos) size_t haystack_len, needle_len, locale_len = 0; zend_long loffset = 0; int32_t offset = 0; - zend_long strength = UCOL_DEFAULT_STRENGTH; + zend_long strength = UCOL_SECONDARY; zend_long ret_pos; int is_ascii; @@ -557,9 +557,15 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas const char *found; size_t haystack_len, needle_len, locale_len = 0; int32_t ret_pos, uchar_pos; - zend_long strength = UCOL_DEFAULT_STRENGTH; + zend_long strength; bool part = false; + if (f_ignore_case) { + strength = UCOL_SECONDARY; + } else { + strength = UCOL_DEFAULT_STRENGTH; + } + ZEND_PARSE_PARAMETERS_START(2, 5) Z_PARAM_STRING(haystack, haystack_len) Z_PARAM_STRING(needle, needle_len) diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c index 86b06fd4bbbbe..7ba52605f9d19 100644 --- a/ext/intl/grapheme/grapheme_util.c +++ b/ext/intl/grapheme/grapheme_util.c @@ -139,14 +139,11 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, locale, bi, &status); STRPOS_CHECK_STATUS(status, "Error creating search object"); - if(f_ignore_case) { - UCollator *coll = usearch_getCollator(src); - ucol_setStrength(coll, strength); - status = U_ZERO_ERROR; - ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_SECONDARY, &status); - STRPOS_CHECK_STATUS(status, "Error setting collation strength"); - usearch_reset(src); - } + UCollator *coll = usearch_getCollator(src); + ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status); + STRPOS_CHECK_STATUS(status, "Error setting collation strength"); + status = U_ZERO_ERROR; + usearch_reset(src); if(offset != 0) { offset_pos = grapheme_get_haystack_offset(bi, offset); diff --git a/ext/intl/php_intl.stub.php b/ext/intl/php_intl.stub.php index fbecbcbc638ac..5599d0c2844af 100644 --- a/ext/intl/php_intl.stub.php +++ b/ext/intl/php_intl.stub.php @@ -171,6 +171,11 @@ * @cvalue UCOL_DEFAULT_STRENGTH */ const UCOL_DEFAULT_STRENGTH = UNKNOWN; +/** + * @var int + * @cvalue UCOL_SECONDARY + */ +const UCOL_SECONDARY = UNKNOWN; class IntlException extends Exception { @@ -438,17 +443,17 @@ function grapheme_strlen(string $string): int|false|null {} function grapheme_strpos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {} -function grapheme_stripos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {} +function grapheme_stripos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_SECONDARY): int|false {} function grapheme_strrpos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {} -function grapheme_strripos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): int|false {} +function grapheme_strripos(string $haystack, string $needle, int $offset = 0, string $locale = "", int $strength = UCOL_SECONDARY): int|false {} function grapheme_substr(string $string, int $offset, ?int $length = null, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): string|false {} function grapheme_strstr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): string|false {} -function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = "", int $strength = UCOL_DEFAULT_STRENGTH): string|false {} +function grapheme_stristr(string $haystack, string $needle, bool $beforeNeedle = false, string $locale = "", int $strength = UCOL_SECONDARY): string|false {} function grapheme_str_split(string $string, int $length = 1): array|false {} diff --git a/ext/intl/php_intl_arginfo.h b/ext/intl/php_intl_arginfo.h index 4cae7f54db418..8ac55cb2161ba 100644 --- a/ext/intl/php_intl_arginfo.h +++ b/ext/intl/php_intl_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 8e7c9f22a29f3110de50a66491fc2cb36a600fd8 */ + * Stub hash: 148713dbf0ac8ed207c03cd3f996c97565d85db6 */ ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_intlcal_create_instance, 0, 0, IntlCalendar, 1) ZEND_ARG_INFO_WITH_DEFAULT_VALUE(0, timezone, "null") @@ -466,11 +466,17 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_strpos, 0, 2, MAY_BE_LO ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") ZEND_END_ARG_INFO() -#define arginfo_grapheme_stripos arginfo_grapheme_strpos +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_stripos, 0, 2, MAY_BE_LONG|MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) + ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, offset, IS_LONG, 0, "0") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 0, "\"\"") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_SECONDARY") +ZEND_END_ARG_INFO() #define arginfo_grapheme_strrpos arginfo_grapheme_strpos -#define arginfo_grapheme_strripos arginfo_grapheme_strpos +#define arginfo_grapheme_strripos arginfo_grapheme_stripos ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_substr, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) @@ -488,7 +494,13 @@ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_strstr, 0, 2, MAY_BE_ST ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_DEFAULT_STRENGTH") ZEND_END_ARG_INFO() -#define arginfo_grapheme_stristr arginfo_grapheme_strstr +ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_stristr, 0, 2, MAY_BE_STRING|MAY_BE_FALSE) + ZEND_ARG_TYPE_INFO(0, haystack, IS_STRING, 0) + ZEND_ARG_TYPE_INFO(0, needle, IS_STRING, 0) + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, beforeNeedle, _IS_BOOL, 0, "false") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, locale, IS_STRING, 0, "\"\"") + ZEND_ARG_TYPE_INFO_WITH_DEFAULT_VALUE(0, strength, IS_LONG, 0, "UCOL_SECONDARY") +ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_WITH_RETURN_TYPE_MASK_EX(arginfo_grapheme_str_split, 0, 1, MAY_BE_ARRAY|MAY_BE_FALSE) ZEND_ARG_TYPE_INFO(0, string, IS_STRING, 0) @@ -1229,6 +1241,7 @@ static void register_php_intl_symbols(int module_number) REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("UCOL_DEFAULT_STRENGTH", UCOL_DEFAULT_STRENGTH, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("UCOL_SECONDARY", UCOL_SECONDARY, CONST_PERSISTENT); zend_attribute *attribute_Deprecated_func_intlcal_set_0 = zend_add_function_attribute(zend_hash_str_find_ptr(CG(function_table), "intlcal_set", sizeof("intlcal_set") - 1), ZSTR_KNOWN(ZEND_STR_DEPRECATED_CAPITALIZED), 2);