openjdk · xuemingshen-oracle · Aug 18, 2025 · Oct 3, 2025
diff --git a/make/jdk/src/classes/build/tools/generatecharacter/CaseFolding.java b/make/jdk/src/classes/build/tools/generatecharacter/CaseFolding.java
@@ -22,15 +22,14 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
-
 package build.tools.generatecharacter;
 
-import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.nio.file.StandardOpenOption;
+import java.util.Arrays;
 import java.util.stream.Collectors;
-import java.util.stream.Stream;
+import java.util.stream.IntStream;
 
 public class CaseFolding {
 
@@ -42,32 +41,58 @@ public static void main(String[] args) throws Throwable {
         var templateFile = Paths.get(args[0]);
         var caseFoldingTxt = Paths.get(args[1]);
         var genSrcFile = Paths.get(args[2]);
-        var supportedTypes = "^.*; [CTS]; .*$";
+
+        // java.lang
+        var supportedTypes = "^.*; [CF]; .*$";  // full/1:M case folding
         var caseFoldingEntries = Files.lines(caseFoldingTxt)
-            .filter(line -> !line.startsWith("#") && line.matches(supportedTypes))
-            .map(line -> {
-                String[] cols = line.split("; ");
-                return new String[] {cols[0], cols[1], cols[2]};
-            })
-            .filter(cols -> {
-                //  the folding case doesn't map back to the original char.
-                var cp1 = Integer.parseInt(cols[0], 16);
-                var cp2 = Integer.parseInt(cols[2], 16);
-                return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1;
-            })
-            .map(cols -> String.format("        entry(0x%s, 0x%s)", cols[0], cols[2]))
-            .collect(Collectors.joining(",\n", "", ""));
+                .filter(line -> !line.startsWith("#") && line.matches(supportedTypes))
+                .map(line -> {
+                    var fields = line.split("; ");
+                    var cp = Integer.parseInt(fields[0], 16);
+                    fields = fields[2].trim().split(" ");
+                    var folding = new int[fields.length];
+                    for (int i = 0; i < folding.length; i++) {
+                        folding[i] = Integer.parseInt(fields[i], 16);
+                    }
+                    var foldingChars = Arrays.stream(folding)
+                            .mapToObj(Character::toChars)
+                            .flatMapToInt(chars -> IntStream.range(0, chars.length).map(i -> (int) chars[i]))
+                            .toArray();
+                    return String.format("\t\tnew CaseFoldingEntry(0x%04x, %s)",
+                            cp,
+                            Arrays.stream(foldingChars)
+                                    .mapToObj(c -> String.format("0x%04x", c))
+                                    .collect(Collectors.joining(", ", "new char[] {", "}"))
+                    );
+                })
+                .collect(Collectors.joining(",\n", "", ""));
+        // util.regex
+        var expandedSupportedTypes = "^.*; [CTS]; .*$";
+        var expanded_caseFoldingEntries = Files.lines(caseFoldingTxt)
+                .filter(line -> !line.startsWith("#") && line.matches(expandedSupportedTypes))
+                .map(line -> {
+                    String[] cols = line.split("; ");
+                    return new String[]{cols[0], cols[1], cols[2]};
+                })
+                .filter(cols -> {
+                    // the folding case doesn't map back to the original char.
+                    var cp1 = Integer.parseInt(cols[0], 16);
+                    var cp2 = Integer.parseInt(cols[2], 16);
+                    return Character.toUpperCase(cp2) != cp1 && Character.toLowerCase(cp2) != cp1;
+                })
+                .map(cols -> String.format("        entry(0x%s, 0x%s)", cols[0], cols[2]))
+                .collect(Collectors.joining(",\n", "", ""));
 
         // hack, hack, hack! the logic does not pick 0131. just add manually to support 'I's.
         // 0049; T; 0131; # LATIN CAPITAL LETTER I
         final String T_0x0131_0x49 = String.format("        entry(0x%04x, 0x%04x),\n", 0x0131, 0x49);
 
-        // Generate .java file
         Files.write(
-            genSrcFile,
-            Files.lines(templateFile)
-                .map(line -> line.contains("%%%Entries") ? T_0x0131_0x49 + caseFoldingEntries : line)
-                .collect(Collectors.toList()),
-            StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
+                genSrcFile,
+                Files.lines(templateFile)
+                        .map(line -> line.contains("%%%Entries") ? caseFoldingEntries : line)
+                        .map(line -> line.contains("%%%Expanded_Case_Map_Entries") ? T_0x0131_0x49 + expanded_caseFoldingEntries : line)
+                        .collect(Collectors.toList()),
+                StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
     }
 }
diff --git a/make/modules/java.base/gensrc/GensrcCharacterData.gmk b/make/modules/java.base/gensrc/GensrcCharacterData.gmk
@@ -72,5 +72,22 @@ TARGETS += $(GENSRC_CHARACTERDATA)
 
 ################################################################################
 
+
+GENSRC_STRINGCASEFOLDING := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/java/lang/CaseFolding.java
+
+STRINGCASEFOLDING_TEMPLATE := $(MODULE_SRC)/share/classes/jdk/internal/lang/CaseFolding.java.template
+CASEFOLDINGTXT := $(MODULE_SRC)/share/data/unicodedata/CaseFolding.txt
+
+$(GENSRC_STRINGCASEFOLDING): $(BUILD_TOOLS_JDK) $(STRINGCASEFOLDING_TEMPLATE) $(CASEFOLDINGTXT)
+	$(call LogInfo, Generating $@)
+	$(call MakeTargetDir)
+	$(TOOL_GENERATECASEFOLDING) \
+	    $(STRINGCASEFOLDING_TEMPLATE) \
+	    $(CASEFOLDINGTXT) \
+	    $(GENSRC_STRINGCASEFOLDING)
+
+TARGETS += $(GENSRC_STRINGCASEFOLDING)
+
+
 endif # include guard
 include MakeIncludeEnd.gmk
diff --git a/make/modules/java.base/gensrc/GensrcRegex.gmk b/make/modules/java.base/gensrc/GensrcRegex.gmk
@@ -50,22 +50,5 @@ TARGETS += $(GENSRC_INDICCONJUNCTBREAK)
 
 ################################################################################
 
-GENSRC_CASEFOLDING := $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/util/regex/CaseFolding.java
-
-CASEFOLDINGTEMP := $(MODULE_SRC)/share/classes/jdk/internal/util/regex/CaseFolding.java.template
-CASEFOLDINGTXT := $(MODULE_SRC)/share/data/unicodedata/CaseFolding.txt
-
-$(GENSRC_CASEFOLDING): $(BUILD_TOOLS_JDK) $(CASEFOLDINGTEMP) $(CASEFOLDINGTXT)
-	$(call LogInfo, Generating $@)
-	$(call MakeTargetDir)
-	$(TOOL_GENERATECASEFOLDING) \
-	    $(CASEFOLDINGTEMP) \
-	    $(CASEFOLDINGTXT) \
-	    $(GENSRC_CASEFOLDING)
-
-TARGETS += $(GENSRC_CASEFOLDING)
-
-################################################################################
-
 endif # include guard
 include MakeIncludeEnd.gmk
diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
@@ -2189,6 +2189,56 @@ public boolean equalsIgnoreCase(String anotherString) {
                 && regionMatches(true, 0, anotherString, 0, length());
     }
 
+    /**
+     * Compares this {@code String} to another {@code String} for equality,
+     * using <em>Unicode case folding</em>. Two strings are considered equal
+     * by this method if their case-folded forms are identical.
+     * <p>
+     * Case folding is defined by the Unicode Standard in
+     * <a href="https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt">CaseFolding.txt</a>,
+     * including 1:M mappings. For example, {@code "Maße".equalsFoldCase("MASSE")}
+     * returns {@code true}, since the character {@code U+00DF} (sharp s) folds
+     * to {@code "ss"}.
+     * <p>
+     * Case folding is locale-independent and language-neutral, unlike
+     * locale-sensitive transformations such as {@link #toLowerCase()} or
+     * {@link #toUpperCase()}. It is intended for caseless matching,
+     * searching, and indexing.
+     *
+     * @apiNote
+     * This method is the Unicode-compliant alternative to
+     * {@link #equalsIgnoreCase(String)}. It implements full case folding as
+     * defined by the Unicode Standard, which may differ from the simpler
+     * per-character mapping performed by {@code equalsIgnoreCase}.
+     * For example:
+     * <pre>{@snippet lang=java :
+     * String a = "Maße";
+     * String b = "MASSE";
+     * boolean equalsFoldCase = a.equalsFoldCase(b);       // returns true
+     * boolean equalsIgnoreCase = a.equalsIgnoreCase(b);   // returns false
+     * }</pre>
+     *
+     * @param  anotherString
+     *         The {@code String} to compare this {@code String} against
+     *
+     * @return  {@code true} if the given object is not {@code null} and represents
+     *          the same sequence of characters as this string under Unicode case
+     *          folding; {@code false} otherwise.
+     *
+     * @see     #compareToFoldCase(String)
+     * @see     #equalsIgnoreCase(String)
+     * @since   26
+     */
+    public boolean equalsFoldCase(String anotherString) {
+        if (this == anotherString) {
+            return true;
+        }
+        if (anotherString == null) {
+            return false;
+        }
+        return UNICODE_CASEFOLD_ORDER.compare(this, anotherString) == 0;
+    }
+
     /**
      * Compares two strings lexicographically.
      * The comparison is based on the Unicode value of each character in
@@ -2310,6 +2360,76 @@ public int compareToIgnoreCase(String str) {
         return CASE_INSENSITIVE_ORDER.compare(this, str);
     }
 
+    /**
+     * A Comparator that orders {@code String} objects as by
+     * {@link #compareToFoldCase(String) compareToFoldCase()}.
+     *
+     * @see     #compareToFoldCase(String)
+     * @since   26
+     */
+    public static final Comparator<String> UNICODE_CASEFOLD_ORDER
+            = new FoldCaseComparator();
+
+    private static class FoldCaseComparator implements Comparator<String> {
+
+        @Override
+        public int compare(String s1, String s2) {
+            byte[] v1 = s1.value;
+            byte[] v2 = s2.value;
+            if (s1.coder == s2.coder()) {
+                return s1.coder == LATIN1 ? StringLatin1.compareToFC(v1, v2)
+                                          : StringUTF16.compareToFC(v1, v2);
+            }
+            return s1.coder == LATIN1 ? StringLatin1.compareToFC_UTF16(v1, v2)
+                                      : StringUTF16.compareToFC_Latin1(v1, v2);
+        }
+    }
+
+    /**
+     * Compares two strings lexicographically using <em>Unicode case folding</em>.
+     * This method returns an integer whose sign is that of calling {@code compareTo}
+     * on the Unicode case folded version of the strings. Unicode Case folding
+     * eliminates differences in case according to the Unicode Standard, using the
+     * mappings defined in
+     * <a href="https://www.unicode.org/Public/UCD/latest/ucd/CaseFolding.txt">CaseFolding.txt</a>,
+     * including 1:M mappings, such as {@code"ß"} → {@code }"ss"}.
+     * <p>
+     * Case folding is a locale-independent, language-neutral form of case mapping,
+     * primarily intended for caseless matching. Unlike {@link #compareToIgnoreCase(String)},
+     * which applies a simpler locale-insensitive uppercase mapping. This method
+     * follows the Unicode <em>full</em> case folding, providing stable and
+     * consistent results across all environments.
+     * <p>
+     * Note that this method does <em>not</em> take locale into account, and may
+     * produce results that differ from locale-sensitive ordering. Use
+     * {@link java.text.Collator} for locale-sensitive comparison.
+     *
+     * @apiNote
+     * This method is the Unicode-compliant alternative to
+     * {@link #compareToIgnoreCase(String)}. It implements the <em>full</em> case folding
+     * as defined by the Unicode Standard, which may differ from the simpler
+     * per-character mapping performed by {@code compareToIgnoreCase}.
+     * For example:
+     * <pre>{@snippet lang=java :
+     * String a = "Maße";
+     * String b = "MASSE";
+     * int cmpFoldCase = a.compareToFoldCase(b);     // returns 0
+     * int cmpIgnoreCase = a.compareToIgnoreCase(b); // returns > 0
+     * }</pre>
+     *
+     * @param   str   the {@code String} to be compared.
+     * @return  a negative integer, zero, or a positive integer as the specified
+     *          String is greater than, equal to, or less than this String,
+     *          ignoring case considerations by case folding.
+     * @see     #equalsFoldCase(String)
+     * @see     #compareToIgnoreCase(String)
+     * @see     java.text.Collator
+     * @since   26
+     */
+    public int compareToFoldCase(String str) {
+        return UNICODE_CASEFOLD_ORDER.compare(this, str);
+    }
+
     /**
      * Tests if two string regions are equal.
      * <p>

diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java
@@ -32,6 +32,8 @@
 import java.util.function.IntConsumer;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
+
+import jdk.internal.java.lang.CaseFolding;
 import jdk.internal.util.ArraysSupport;
 import jdk.internal.vm.annotation.IntrinsicCandidate;
 
@@ -62,6 +64,10 @@ static int length(byte[] value) {
         return value.length;
     }
 
+    static int codePointAt(byte[] value, int index, int end) {
+        return value[index] & 0xff;
+    }
+
     static char[] toChars(byte[] value) {
         char[] dst = new char[value.length];
         inflate(value, 0, dst, 0, value.length);
@@ -179,6 +185,100 @@ static int compareToCI_UTF16(byte[] value, byte[] other) {
         return len1 - len2;
     }
 
+    public static int compareToFC(byte[] value, byte[] other) {
+        int len1 = value.length;
+        int len2 = other.length;
+        char[] folded1 = null;
+        char[] folded2 = null;
+        int k1 = 0, k2 = 0, fk1 = 0, fk2 = 0;
+        while ((k1 < len1 || folded1 != null && fk1 < folded1.length) &&
+               (k2 < len2 || folded2 != null && fk2 < folded2.length)) {
+            char c1, c2;
+            if (folded1 != null && fk1 < folded1.length) {
+                c1 = folded1[fk1++];
+            } else {
+                int cp = codePointAt(value, k1++, len1);  // no surrogate
+                folded1 = CaseFolding.foldIfDefined(cp);
+                fk1 = 0;
+                if (folded1 == null) {
+                    c1 = (char)cp;
+                } else {
+                    c1 = folded1[fk1++];
+                }
+            }
+            if (folded2 != null && fk2 < folded2.length) {
+                c2 = folded2[fk2++];
+            } else {
+                int cp = codePointAt(other, k2++, len2);
+                folded2 = CaseFolding.foldIfDefined(cp);
+                fk2 = 0;
+                if (folded2 == null) {
+                    c2 = (char)cp;
+                } else {
+                    c2 = folded2[fk2++];
+                }
+            }
+            if (c1 != c2) {
+                return c1 - c2;
+            }
+        }
+        if (k1 < len1 || folded1 != null && fk1 < folded1.length) {
+            return 1;
+        }
+        if (k2 < len2 || folded2 != null && fk2 < folded2.length) {
+            return -1;
+        }
+        return 0;
+    }
+
+    public static int compareToFC_UTF16(byte[] value, byte[] other) {
+        int len1 = value.length;
+        int len2 = StringUTF16.length(other);
+        char[] folded1 = null;
+        char[] folded2 = null;
+        int k1 = 0, k2 = 0, fk1 = 0, fk2 = 0;
+
+        while ((k1 < len1 || folded1 != null && fk1 < folded1.length) &&
+               (k2 < len2 || folded2 != null && fk2 < folded2.length)) {
+            char c1, c2;
+            if (folded1 != null && fk1 < folded1.length) {
+                c1 = folded1[fk1++];
+            } else {
+                int cp = codePointAt(value, k1++, len1);
+                folded1 = CaseFolding.foldIfDefined(cp);
+                fk1 = 0;
+                if (folded1 == null) {
+                    c1 = (char)cp;
+                } else {
+                    c1 = folded1[fk1++];
+                }
+            }
+            if (folded2 != null && fk2 < folded2.length) {
+                c2 = folded2[fk2++];
+            } else {
+                int cp = StringUTF16.codePointAt(other, k2, len2);
+                k2 += Character.charCount(cp);
+                folded2 = CaseFolding.foldIfDefined(cp);
+                fk2 = 0;
+                if (folded2 == null) {
+                    c2 = (char)cp;
+                } else {
+                    c2 = folded2[fk2++];
+                }
+            }
+            if (c1 != c2) {
+                return c1 - c2;
+            }
+        }
+        if (k1 < len1 || folded1 != null && fk1 < folded1.length) {
+            return 1;
+        }
+        if (k2 < len2 || folded2 != null && fk2 < folded2.length) {
+            return -1;
+        }
+        return 0;
+    }
+
     static int hashCode(byte[] value) {
         return ArraysSupport.hashCodeOfUnsigned(value, 0, value.length, 0);
     }