Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RegexDiff X64] [MihaZupan] Use Contains{Any} in Regex source generator #978

Open
MihuBot opened this issue Feb 1, 2025 · 0 comments
Open

Comments

@MihuBot
Copy link
Owner

MihuBot commented Feb 1, 2025

Job completed in 15 minutes 44 seconds (remote runner delay: 1 minute 26 seconds).
dotnet/runtime#112065
Using arguments: regexdiff

118 out of 18857 patterns have generated source code changes.

Examples of GeneratedRegex source diffs
"^[a-f0-9]{32}$" (4920 uses)
[GeneratedRegex("^[a-f0-9]{32}$")]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigitsLower) >= 0)
+   if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigitsLower))
  {
      return false; // The input didn't match.
  }
"\"([a-fA-F0-9-\\{\\}]{36})\"" (569 uses)
[GeneratedRegex("\"([a-fA-F0-9-\\{\\}]{36})\"", RegexOptions.CultureInvariant)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 36).IndexOfAnyExcept(Utilities.s_ascii_20FF037E0000007E000028) >= 0)
+   if (slice.Slice(0, 36).ContainsAnyExcept(Utilities.s_ascii_20FF037E0000007E000028))
  {
      UncaptureUntil(0);
      return false; // The input didn't match.
"^[a-z0-9]{24}$" (285 uses)
[GeneratedRegex("^[a-z0-9]{24}$", RegexOptions.IgnoreCase)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 24).IndexOfAnyExcept(Utilities.s_asciiLettersAndDigitsAndKelvinSign) >= 0)
+   if (slice.Slice(0, 24).ContainsAnyExcept(Utilities.s_asciiLettersAndDigitsAndKelvinSign))
  {
      return false; // The input didn't match.
  }
"^[0-9a-f]{40}$" (202 uses)
[GeneratedRegex("^[0-9a-f]{40}$", RegexOptions.IgnoreCase)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      return false; // The input didn't match.
  }
"\\A(?:[A-Z0-9]{17})\\z" (182 uses)
[GeneratedRegex("\\A(?:[A-Z0-9]{17})\\z")]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 17).IndexOfAnyExcept(Utilities.s_asciiLettersUpperAndDigits) >= 0)
+   if (slice.Slice(0, 17).ContainsAnyExcept(Utilities.s_asciiLettersUpperAndDigits))
  {
      return false; // The input didn't match.
  }
"^\\\\((?<StoreLocation>CurrentUser|LocalMach ..." (167 uses)
[GeneratedRegex("^\\\\((?<StoreLocation>CurrentUser|LocalMachine)(\\\\(?<StoreName>[a-zA-Z]+)(\\\\(?<Thumbprint>[0-9a-f]{40}))?)?)?$")]
      goto LoopIterationNoMatch2;
  }
  
-   if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigitsLower) >= 0)
+   if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigitsLower))
  {
      goto LoopIterationNoMatch2;
  }
"IR[0-9]{24}" (144 uses)
[GeneratedRegex("IR[0-9]{24}", RegexOptions.IgnoreCase)]
  // Match a character in the set [0-9] exactly 24 times.
  {
-       if (slice.Slice(2, 24).IndexOfAnyExceptInRange('0', '9') >= 0)
+       if (slice.Slice(2, 24).ContainsAnyExceptInRange('0', '9'))
      {
          return false; // The input didn't match.
      }
"^committed\\s+changeset\\s+\\d+:(?<hash>[0-9 ..." (132 uses)
[GeneratedRegex("^committed\\s+changeset\\s+\\d+:(?<hash>[0-9a-f]{40})$", RegexOptions.IgnoreCase)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 40).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 40).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      UncaptureUntil(0);
      return false; // The input didn't match.
"^[A-Fa-f0-9]{32}$|^({|\\()?[A-Fa-f0-9]{8}-([ ..." (130 uses)
[GeneratedRegex("^[A-Fa-f0-9]{32}$|^({|\\()?[A-Fa-f0-9]{8}-([A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12}(}|\\))?$|^({)?[0xA-Fa-f0-9]{3,10}(, {0,1}[0xA-Fa-f0-9]{3,6}){2}, {0,1}({)([0xA-Fa-f0-9]{3,4}, {0,1}){7}[0xA-Fa-f0-9]{3,4}(}})$")]
      goto AlternationBranch;
  }
  
-   if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      goto AlternationBranch;
  }
"asmz://(?<guid>[0-9a-fA-F]{32})/(?<size>[0-9 ..." (99 uses)
[GeneratedRegex("asmz://(?<guid>[0-9a-fA-F]{32})/(?<size>[0-9]+)(/(?<flags>[a-zA-Z0-9]*))?", RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture)]
      return false; // The input didn't match.
  }
  
-   if (slice.Slice(0, 32).IndexOfAnyExcept(Utilities.s_asciiHexDigits) >= 0)
+   if (slice.Slice(0, 32).ContainsAnyExcept(Utilities.s_asciiHexDigits))
  {
      UncaptureUntil(0);
      return false; // The input didn't match.

For more diff examples, see https://gist.github.com/MihuBot/47c9e3e7aadcb5bd85d2a8b35243f90d

Total bytes of base: 53924200
Total bytes of diff: 53924088
Total bytes of delta: -112 (-0.00 % of base)
Total relative delta: -0.02
    diff is an improvement.
    relative diff is an improvement.

For a list of JIT diff improvements, see Improvements.md

Sample source code for further analysis
const string JsonPath = "RegexResults-978.json";
if (!File.Exists(JsonPath))
{
    await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/EogtjSRA");
    using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
    archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}

using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");



record KnownPattern(string Pattern, RegexOptions Options, int Count);

sealed class RegexEntry
{
    public required KnownPattern Regex { get; set; }
    public required string MainSource { get; set; }
    public required string PrSource { get; set; }
    public string? FullDiff { get; set; }
    public string? ShortDiff { get; set; }
    public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
    public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}

Artifacts:

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant