diff --git a/.github/workflows/perf-compare.yml b/.github/workflows/perf-compare.yml index bdb17209..affefdb1 100644 --- a/.github/workflows/perf-compare.yml +++ b/.github/workflows/perf-compare.yml @@ -16,10 +16,13 @@ # # Each metric shows main, PR, a signed Δ (with a 95% CI), and an improvement/ # regression/within-noise status. Further tables follow: a low-mutation skip-floor -# leg, an allocation comparison, the reconciler micro-suite, and a cross-framework -# reference (vanilla WinUI3 (StressPerf.Direct) + the Rust `windows-reactor` port, -# test_reactor_perf from microsoft/windows-rs — both measured live on the same -# runner). See tests/stress_perf/ci/README.md for the authoritative comment layout. +# leg, an allocation comparison, a keyed-list leg (StressPerf.KeyedList — a ~500-row +# stably keyed list whose reorder/insert/remove ticks drive the reconciler's keyed +# LIS diff arm the positional StocksGrid cells never hit), the reconciler micro-suite, +# and a cross-framework reference (vanilla WinUI3 (StressPerf.Direct) + the Rust +# `windows-reactor` port, test_reactor_perf from microsoft/windows-rs — both measured +# live on the same runner). See tests/stress_perf/ci/README.md for the authoritative +# comment layout. # # ── Why `issue_comment` (not a label) ──────────────────────────────────────── # `issue_comment` always runs the workflow from the DEFAULT branch, so the perf @@ -75,8 +78,10 @@ jobs: contains(fromJSON('["OWNER","MEMBER","COLLABORATOR"]'), github.event.comment.author_association)) }} runs-on: windows-latest - # 60 (not 40): the Rust cross-framework leg cold-builds the windows-rs port. - timeout-minutes: 60 + # 75 (not 60): the Rust cross-framework leg cold-builds the windows-rs port, and + # the macro comparison now runs three interleaved A/B legs (headline + skip-floor + # + keyed-list) plus the micro-suite — the keyed leg adds a build + ~Reps runs. + timeout-minutes: 75 env: GH_REPO: ${{ github.repository }} # Pinned microsoft/windows-rs commit whose `test_reactor_perf` crate backs diff --git a/tests/stress_perf/METHODOLOGY.md b/tests/stress_perf/METHODOLOGY.md index a759f253..26857f53 100644 --- a/tests/stress_perf/METHODOLOGY.md +++ b/tests/stress_perf/METHODOLOGY.md @@ -240,6 +240,28 @@ each leg's delta independently cancels time-correlated drift); it is opt-out via `-IncludeSkipFloor $false`. See [`ci/README.md`](ci/README.md#the-comment). +## Keyed-list workload: the keyed child-diff path StocksGrid never hits + +The StocksGrid macro workload (`StressPerf.ReactorOptimized`) renders a fixed grid +of cells mutated **in place by index**. Its child diff therefore always takes +`ChildReconciler.ReconcilePositional` — the positional re-walk. It never exercises +the reconciler's **keyed** arm, so keyed-diff optimizations (the keyed-list LIS +diff, keyed structural-skip) are invisible to it *by construction* — the same blind +spot that made the original headline-only comparison unable to resolve them. + +So `/perf` runs a **third interleaved A/B leg** on `StressPerf.KeyedList`: a ~500-row +list of **stably keyed** children that are reordered / inserted / removed each tick. +Because every child carries a key, the child reconciler takes its keyed arm +(`ReconcileKeyed` → `ReconcileKeyedMiddle`, the LIS-based minimal-move pass) and runs +a real keyed diff every tick. The workload is deterministic (fixed RNG seed, constant +row count — insertions paired with removals) so `main` and PR compare identical edit +sequences, and its rows' labels are content-stable so a moved row's text never changes +— isolating the **structural** (keyed-diff) signal from per-cell property updates. It +reports the four headline metrics in its own table under the same interleaving, reps, +warm-up, and 95%-CI gating as the headline leg, and is opt-out via +`-IncludeKeyedList $false`. See +[`ci/README.md`](ci/README.md#the-comment). + ## Reconciler micro-benchmarks: ns-resolution Core path Every metric above is measured **across a live WinUI render pipeline**, which is diff --git a/tests/stress_perf/ci/PerfLib.Tests.ps1 b/tests/stress_perf/ci/PerfLib.Tests.ps1 index d2585d70..52b40f04 100644 --- a/tests/stress_perf/ci/PerfLib.Tests.ps1 +++ b/tests/stress_perf/ci/PerfLib.Tests.ps1 @@ -450,6 +450,63 @@ $floorComment1 = Format-PerfComment -Main $main -Pr $pr -WinUI3 $null -Rust $nul Assert-Match $floorComment1 '--percent 1' 'skip-floor heading reflects Context.SkipFloorPercent' +# ── Format-PerfKeyedListSection + Format-PerfComment: keyed-list workload ────── +# 12 paired keyed-list runs exercising ALL FOUR headline metrics by direction AND by +# significance: rps/reconcile/diff move DOWN main->PR, while memory carries a small +# SYMMETRIC per-pair jitter (mean Δ ~0). So the verdicts must split: rps (higher- +# better) DOWN = regression; reconcile/diff (lower-better) DOWN = improvement; memory's +# paired CI straddles 0 = within noise — proving the keyed section reuses Table 1's +# direction-aware paired-CI machinery, not a hard-coded verdict. The small jitter on +# the directional metrics keeps each of their paired CIs off 0. +$keyedMainRuns = @(); $keyedPrRuns = @() +1..12 | ForEach-Object { + $j = ($_ % 4) * 0.05 + $mj = ((($_ % 2) * 2) - 1) * 0.2 # alternating +0.2 / -0.2 so the paired memory Δ straddles 0 + $keyedMainRuns += [pscustomobject]@{ RendersPerSec = 8.0 + $j; AvgReconcileMs = 9.0 + $j; AvgDiffMs = 7.0 + $j; AvgMemoryMB = 250 + $mj; TotalRenders = 80; DurationSeconds = 10 } + $keyedPrRuns += [pscustomobject]@{ RendersPerSec = 7.0 + $j; AvgReconcileMs = 7.0 + $j; AvgDiffMs = 5.0 + $j; AvgMemoryMB = 250 - $mj; TotalRenders = 70; DurationSeconds = 10 } +} +$keyedMain = Measure-PerfRuns -Runs $keyedMainRuns +$keyedPr = Measure-PerfRuns -Runs $keyedPrRuns + +# Direct section renderer: empty when either side is null, populated when both present. +Assert-Equal 0 @(Format-PerfKeyedListSection -MainKeyed $null -PrKeyed $keyedPr -Percent 50).Count 'keyed section empty when main keyed null' +Assert-Equal 0 @(Format-PerfKeyedListSection -MainKeyed $keyedMain -PrKeyed $null -Percent 50).Count 'keyed section empty when pr keyed null' +$keyedSection = Format-PerfKeyedListSection -MainKeyed $keyedMain -PrKeyed $keyedPr -Percent 50 +$keyedSectionText = $keyedSection -join "`n" +Assert-Match $keyedSectionText 'Keyed-list workload' 'keyed section has heading' +Assert-Match $keyedSectionText 'StressPerf.KeyedList' 'keyed heading names the workload' +Assert-Match $keyedSectionText 'Avg Reconcile' 'keyed section has reconcile row' +Assert-Match $keyedSectionText 'keyed arm' 'keyed preamble explains the keyed arm' +Assert-Match $keyedSectionText 'LIS' 'keyed preamble cites the LIS minimal-move pass' +# Direction-awareness: rps and reconcile both DECREASE main->PR, yet rps (higher-is- +# better) must read regression while reconcile (lower-is-better) reads improvement. +$keyedRpsRow = ($keyedSection | Where-Object { $_ -match 'Renders/sec' }) -join ' ' +$keyedReconRow = ($keyedSection | Where-Object { $_ -match 'Avg Reconcile' }) -join ' ' +$keyedDiffRow = ($keyedSection | Where-Object { $_ -match 'Avg Diff' }) -join ' ' +$keyedMemRow = ($keyedSection | Where-Object { $_ -match 'Avg Memory' }) -join ' ' +Assert-Match $keyedRpsRow 'regression' 'keyed: rps DOWN reads regression (higher-is-better honored)' +Assert-Match $keyedReconRow 'improvement' 'keyed: reconcile DOWN reads improvement (lower-is-better honored)' +Assert-Match $keyedDiffRow 'improvement' 'keyed: diff DOWN reads improvement (lower-is-better honored)' +Assert-Match $keyedMemRow 'within noise' 'keyed: symmetric memory Δ reads within noise (paired CI straddles 0)' +# -Percent threads into the heading independently of the methodology line. +$keyedSection75 = (Format-PerfKeyedListSection -MainKeyed $keyedMain -PrKeyed $keyedPr -Percent 75) -join "`n" +Assert-Match $keyedSection75 'Keyed-list workload*--percent 75' 'keyed heading reflects the -Percent argument' + +# Threaded through Format-PerfComment: present when keyed aggregates present, sitting +# after the regression/skip-floor tables and before the cross-framework table. +$keyedComment = Format-PerfComment -Main $main -Pr $pr -WinUI3 $null -Rust $null -MainFloor $floorMain -PrFloor $floorPr -MainKeyed $keyedMain -PrKeyed $keyedPr -Context $ctx +Assert-Match $keyedComment 'Keyed-list workload' 'comment renders keyed-list table when keyed aggregates present' +$idxRegK = $keyedComment.IndexOf('Regression vs') +$idxFloorK = $keyedComment.IndexOf('Low-mutation skip-floor') +$idxKeyed = $keyedComment.IndexOf('Keyed-list workload') +$idxXfwK = $keyedComment.IndexOf('Cross-framework reference') +Assert-True (($idxRegK -lt $idxKeyed) -and ($idxFloorK -lt $idxKeyed) -and ($idxKeyed -lt $idxXfwK)) 'keyed-list table sits after the regression + skip-floor tables and before cross-framework' + +# Omitted entirely when keyed aggregates are absent (keyed-list leg disabled / build omitted). +$noKeyedComment = Format-PerfComment -Main $main -Pr $pr -WinUI3 $null -Rust $null -MainKeyed $null -PrKeyed $null -Context $ctx +Assert-True (-not ($noKeyedComment -like '*Keyed-list workload*')) 'keyed-list table omitted when keyed aggregates null' + + # ── Reconciler micro-suite: Read-MicroBenchResults / comparison / render ────── function New-MicroRow { param([string]$BenchId, [string]$Name, [string]$Variant, [int]$Rep, [double]$MeanNs, [double]$AllocBytes, [string]$Status = 'ok', [int]$Iterations = 1) diff --git a/tests/stress_perf/ci/PerfLib.ps1 b/tests/stress_perf/ci/PerfLib.ps1 index 9b9a27a3..4dd7d4af 100644 --- a/tests/stress_perf/ci/PerfLib.ps1 +++ b/tests/stress_perf/ci/PerfLib.ps1 @@ -742,6 +742,58 @@ function Format-PerfSkipFloorSection { return $lines.ToArray() } +function Format-PerfKeyedListSection { + <# + .SYNOPSIS + Render the keyed-list workload table: the four headline metrics measured on + StressPerf.KeyedList — a ~500-row stably keyed list whose rows are reordered / + inserted / removed each tick. Empty array when there is nothing to show. + .DESCRIPTION + Unlike the positional StocksGrid headline/skip-floor legs (whose cells mutate + in place by index, always taking ChildReconciler.ReconcilePositional), this is + a SEPARATE macro workload that drives the child reconciler's KEYED arm + (ReconcileKeyed → ReconcileKeyedMiddle, the LIS-based minimal-move pass). It is + the sensitive macro measure for keyed-diff optimizations (keyed-list diff, + keyed structural-skip) that the StocksGrid workload can never exercise. Reuses + the same paired-Δ 95% CI machinery (Get-PerfDelta over the index-aligned + per-run samples) as the headline table. Returns an empty array when either + aggregate is $null (keyed-list leg disabled, build omitted, or one side + produced no metrics), so the caller renders nothing. + .PARAMETER MainKeyed Aggregated baseline keyed-list metrics (Measure-PerfRuns), or $null. + .PARAMETER PrKeyed Aggregated PR-head keyed-list metrics, or $null. + .PARAMETER Percent The mutation percent the keyed-list leg ran at (heading / preamble). + #> + param( + [AllowNull()][pscustomobject]$MainKeyed, + [AllowNull()][pscustomobject]$PrKeyed, + [double]$Percent = 50 + ) + if ($null -eq $MainKeyed -or $null -eq $PrKeyed) { return @() } + + $lines = [System.Collections.Generic.List[string]]::new() + $lines.Add("### Keyed-list workload (``StressPerf.KeyedList``, ``--percent $Percent``)") + $lines.Add('') + $lines.Add("A separate macro workload: a ~500-row **stably keyed** list whose rows are reordered / inserted / removed each tick. Because every child carries a key, the child reconciler takes its **keyed arm** (``ReconcileKeyed`` → ``ReconcileKeyedMiddle``, the LIS-based minimal-move pass) instead of the positional re-walk the StocksGrid tables above measure — so this is the sensitive macro signal for **keyed-diff** work the positional cells can never reach. Same interleaved paired-Δ 95% CI as the headline table.") + $lines.Add('') + $lines.Add('| Metric | `main` (baseline) | This PR | Δ (95% CI) | Status |') + $lines.Add('|---|--:|--:|--:|:--|') + foreach ($m in $script:PerfMetricSpec) { + $bVal = $MainKeyed.($m.Key) + $pVal = $PrKeyed.($m.Key) + $spread = [math]::Max([double]$MainKeyed."$($m.Key)Spread", [double]$PrKeyed."$($m.Key)Spread") + $delta = Get-PerfDelta -Baseline $bVal -Candidate $pVal -LowerIsBetter $m.LowerIsBetter -SpreadPct $spread ` + -BaselineSamples $MainKeyed."$($m.Key)Samples" -CandidateSamples $PrKeyed."$($m.Key)Samples" + $lines.Add(('| {0} {1} | {2} | {3} | {4} | {5} |' -f ` + $m.Label, $m.Arrow, ` + (Format-PerfNumber $bVal $m.Digits), ` + (Format-PerfNumber $pVal $m.Digits), ` + (Format-PerfDeltaCell $delta), ` + (Get-PerfStatusGlyph $delta.Status))) + } + $lines.Add('') + return $lines.ToArray() +} + function Format-PerfComment { <# .SYNOPSIS @@ -756,6 +808,8 @@ function Format-PerfComment { (Get-PerfMicroComparison output), or $null when not run. .PARAMETER MainFloor Aggregated baseline low-mutation skip-floor metrics, or $null. .PARAMETER PrFloor Aggregated PR-head low-mutation skip-floor metrics, or $null. + .PARAMETER MainKeyed Aggregated baseline keyed-list workload metrics, or $null. + .PARAMETER PrKeyed Aggregated PR-head keyed-list workload metrics, or $null. .PARAMETER Context Hashtable: Percent, Duration, Reps, Warmup, SkipFloorPercent, BaseSha, HeadSha, Runner, Cpu, Cores, MemoryGB, RunUrl, Timestamp, Note. @@ -768,6 +822,8 @@ function Format-PerfComment { [AllowNull()][object[]]$Micro, [AllowNull()][pscustomobject]$MainFloor, [AllowNull()][pscustomobject]$PrFloor, + [AllowNull()][pscustomobject]$MainKeyed, + [AllowNull()][pscustomobject]$PrKeyed, [Parameter(Mandatory)][hashtable]$Context ) @@ -842,6 +898,14 @@ function Format-PerfComment { & $add '' } + # ── Keyed-list workload table (StressPerf.KeyedList) ───────────────────── + # A separate macro workload driving the child reconciler's KEYED arm + # (ReconcileKeyed → ReconcileKeyedMiddle, the LIS minimal-move pass) that the + # positional StocksGrid cells above never reach. The sensitive macro signal for + # keyed-diff optimizations. Rendered only when both keyed aggregates are present. + $keyedPct = if ($Context.ContainsKey('Percent')) { [double]$Context.Percent } else { 50 } + foreach ($kline in (Format-PerfKeyedListSection -MainKeyed $MainKeyed -PrKeyed $PrKeyed -Percent $keyedPct)) { & $add $kline } + # ── Reconciler micro-benchmarks (ns-resolution, WinUI-undiluted) ────────── # Rendered only when the PerfBench.ControlModel micro leg produced results for # both sides. Resolves Core/Reconciler time + allocation deltas the macro diff --git a/tests/stress_perf/ci/README.md b/tests/stress_perf/ci/README.md index 3715c2cc..c8413427 100644 --- a/tests/stress_perf/ci/README.md +++ b/tests/stress_perf/ci/README.md @@ -150,7 +150,8 @@ git worktree remove ../main | `-MicroIterations` | `10000` | Inner iterations per repetition inside each micro-bench (amortises timer resolution). | | `-IncludeSkipFloor` | `$true` | Run a **second interleaved A/B leg** at `-SkipFloorPercent` and append a low-mutation skip-floor table (compare mode). Set `$false` to skip it (halves the macro runtime). | | `-SkipFloorPercent` | `0` | Mutation percent for the skip-floor leg. At `0` the workload still mutates one cell/tick (`StockDataSource.Update` clamps the count to `Math.Max(1, …)`), so reconcile/diff isolate the O(n) per-tick child skip-walk floor the 50% leg dilutes. | -| `-Apps` | `ReactorOptimized,Direct` | Single-tree mode only: which harnesses to run. | +| `-IncludeKeyedList` | `$true` | Run a **third interleaved A/B leg** on `StressPerf.KeyedList` — a ~500-row stably keyed list reordered/inserted/removed each tick — and append its own table (compare mode). Drives the child reconciler's **keyed arm** (`ReconcileKeyed` → `ReconcileKeyedMiddle`, the LIS minimal-move pass) the positional StocksGrid cells never reach. Build is best-effort; set `$false` to skip the leg. | +| `-Apps` | `ReactorOptimized,Direct` | Single-tree mode only: which harnesses to run (`ReactorOptimized`, `Direct`, `KeyedList`). | | `-Platform` | host arch | Target architecture (`x64` or `ARM64`). Defaults to your machine's native arch so the WinUI harness runs without emulation. | | `-SelfContained` | `$true` | Build with the bundled WinApp runtime (no machine-wide install). | | `-SkipBuild` | off | Reuse existing binaries (skip `dotnet build`). | @@ -224,6 +225,16 @@ Several tables plus footnotes: `main` vs PR with the same paired-CI band. Rendered only when the harness reports the metric (n/a for pre-metric PR heads). This is the table that moves for allocation-reduction PRs. +- **Keyed-list workload (`StressPerf.KeyedList`)** — the four headline metrics + from a **third interleaved A/B leg** on a separate ~500-row **stably keyed** list + whose rows are reordered / inserted / removed each tick. Because every child + carries a key, the child reconciler takes its **keyed arm** (`ReconcileKeyed` → + `ReconcileKeyedMiddle`, the LIS-based minimal-move pass) instead of the positional + re-walk the StocksGrid tables measure — so this is the sensitive macro signal for + **keyed-diff** optimizations (keyed-list diff, keyed structural-skip) that the + positional cells can never exercise. Same paired-CI gating as Table 1; omitted + when `-IncludeKeyedList $false`, the workload build fails, or a side produces no + metrics. - **Reconciler micro-benchmarks** — per-bench `ns/op` and `B/op` from the `PerfBench.ControlModel` micro-suite (M1–M13), `main` vs PR. ns-resolution and WinUI-undiluted, so it resolves Core/Reconciler time and allocation deltas the diff --git a/tests/stress_perf/ci/Run-PerfBenchmark.ps1 b/tests/stress_perf/ci/Run-PerfBenchmark.ps1 index 223eb8a3..9024dfdc 100644 --- a/tests/stress_perf/ci/Run-PerfBenchmark.ps1 +++ b/tests/stress_perf/ci/Run-PerfBenchmark.ps1 @@ -89,10 +89,21 @@ so reconcile/diff isolate the O(n) per-tick child skip-walk floor the 50% leg dilutes — the fixed cost a structural-skip optimization targets. +.PARAMETER IncludeKeyedList + Run a third interleaved A/B leg on StressPerf.KeyedList — a ~500-row stably + keyed list whose rows are REORDERED / inserted / removed each tick — and append + its own PR-vs-main table to the comment (compare mode). This drives the child + reconciler's KEYED arm (ReconcileKeyed → ReconcileKeyedMiddle, the LIS-based + minimal-move pass) that the positional StocksGrid cells can never reach, so it + is the sensitive macro measure for keyed-diff optimizations. Default $true; + build is best-effort (a KeyedList build failure just omits the table). Disable + with -IncludeKeyedList:$false to skip the extra leg. + .PARAMETER Apps - Which harnesses to run in single-tree mode: ReactorOptimized, Direct. + Which harnesses to run in single-tree mode: ReactorOptimized, Direct, KeyedList. Ignored in compare mode (which always does ReactorOptimized both sides + - Direct once for the WinUI3 column). + Direct once for the WinUI3 column, and — unless -IncludeKeyedList:$false — + KeyedList both sides). .PARAMETER OutDir Where logs, comment.md and result.json land. Defaults to ci\out next to this @@ -163,7 +174,8 @@ param( [bool]$IncludeMicro = $true, [double]$SkipFloorPercent = 0, [bool]$IncludeSkipFloor = $true, - [ValidateSet('ReactorOptimized', 'Direct')] + [bool]$IncludeKeyedList = $true, + [ValidateSet('ReactorOptimized', 'Direct', 'KeyedList')] [string[]]$Apps = @('ReactorOptimized', 'Direct'), [string]$OutDir, [switch]$SkipBuild, @@ -195,6 +207,7 @@ $tfmGuess = 'net10.0-windows10.0.22621.0' $AppRegistry = @{ ReactorOptimized = @{ AppName = 'StressPerf.ReactorOptimized'; ProjectRel = 'tests\stress_perf\StressPerf.ReactorOptimized\StressPerf.ReactorOptimized.csproj' } Direct = @{ AppName = 'StressPerf.Direct'; ProjectRel = 'tests\stress_perf\StressPerf.Direct\StressPerf.Direct.csproj' } + KeyedList = @{ AppName = 'StressPerf.KeyedList'; ProjectRel = 'tests\stress_perf\StressPerf.KeyedList\StressPerf.KeyedList.csproj' } MicroControlModel = @{ AppName = 'PerfBench.ControlModel'; ProjectRel = 'tests\perf_bench\PerfBench.ControlModel\PerfBench.ControlModel.csproj' } } @@ -716,7 +729,18 @@ if ($DefenderExclude) { $runner = Get-RunnerInfo Write-Log ("runner: {0} | {1} cores | {2} GB | {3}" -f $runner.Cpu, $runner.Cores, $runner.MemoryGB, ($runner.Runner ?? 'local')) 'Cyan' -Write-Log ("mode: {0} | platform={1} | percent={2} duration={3} reps={4} warmup={5} | skip-floor={6}" -f ($(if ($Compare) { 'COMPARE' } else { 'LOCAL' })), $Platform, $Percent, $Duration, $Reps, $Warmup, $(if ($IncludeSkipFloor) { "on (--percent $SkipFloorPercent)" } else { 'off' })) 'Cyan' +$modeSuffix = if ($Compare) { + # COMPARE mode runs the interleaved A/B legs, so the skip-floor / keyed-list + # opt-out switches are what actually decide which legs run. + "skip-floor={0} | keyed-list={1}" -f ` + $(if ($IncludeSkipFloor) { "on (--percent $SkipFloorPercent)" } else { 'off' }), ` + $(if ($IncludeKeyedList) { 'on' } else { 'off' }) +} else { + # LOCAL mode ignores the interleaved-leg switches entirely; the workload set is + # whatever -Apps selects, so report that instead of a misleading on/off. + "apps={0}" -f ($Apps -join ',') +} +Write-Log ("mode: {0} | platform={1} | percent={2} duration={3} reps={4} warmup={5} | {6}" -f ($(if ($Compare) { 'COMPARE' } else { 'LOCAL' })), $Platform, $Percent, $Duration, $Reps, $Warmup, $modeSuffix) 'Cyan' $exit = 0 try { @@ -724,6 +748,7 @@ try { # ---- Compare mode: interleaved ReactorOptimized A/B + WinUI3 once ----- $ro = $AppRegistry.ReactorOptimized $direct = $AppRegistry.Direct + $keyed = $AppRegistry.KeyedList $microMeta = $AppRegistry.MicroControlModel if (-not $SkipBuild) { @@ -740,6 +765,15 @@ try { $IncludeMicro = $false } } + if ($IncludeKeyedList -and -not $SkipBuild) { + try { + Build-Harness -TreeRoot $BaselineRoot -AppMeta $keyed + Build-Harness -TreeRoot $Root -AppMeta $keyed + } catch { + Write-Log "keyed-list workload build failed ($_) — omitting the keyed-list table" 'Yellow' + $IncludeKeyedList = $false + } + } $mainExe = Resolve-HarnessExe -TreeRoot $BaselineRoot -AppMeta $ro $prExe = Resolve-HarnessExe -TreeRoot $Root -AppMeta $ro $directExe = Resolve-HarnessExe -TreeRoot $Root -AppMeta $direct @@ -778,6 +812,35 @@ try { } } + # Third interleaved A/B leg: the keyed-list workload. StressPerf.KeyedList + # renders a ~500-row stably KEYED list and reorders/inserts/removes rows each + # tick, driving the child reconciler's keyed arm (ReconcileKeyed → + # ReconcileKeyedMiddle, the LIS minimal-move pass) that StocksGrid's positional + # cells never reach. Same paired interleaving + drop-both alignment as above. + # Best-effort: if either exe is missing (build omitted/failed) the leg is + # skipped and the keyed-list table is omitted — the StocksGrid comparison is + # unaffected. + $mainKeyedRuns = @(); $prKeyedRuns = @() + if ($IncludeKeyedList) { + $mainKeyedExe = Resolve-HarnessExe -TreeRoot $BaselineRoot -AppMeta $keyed + $prKeyedExe = Resolve-HarnessExe -TreeRoot $Root -AppMeta $keyed + if (-not $mainKeyedExe -or -not $prKeyedExe) { + Write-Log "keyed-list exe not found (main=$([bool]$mainKeyedExe) pr=$([bool]$prKeyedExe)) — omitting the keyed-list table" 'Yellow' + } else { + Write-Log "interleaving main/PR keyed-list (--percent $Percent; $($Warmup) warmup + $($Reps) measured each)" 'Green' + for ($i = 1; $i -le ($Warmup + $Reps); $i++) { + $mm = Invoke-OneRun -Exe $mainKeyedExe -AppMeta $keyed -Index $i -Tag 'main-keyed' + $pm = Invoke-OneRun -Exe $prKeyedExe -AppMeta $keyed -Index $i -Tag 'pr-keyed' + if ($i -le $Warmup) { Write-Log " (keyed warmup pair #$i discarded)" 'DarkGray'; continue } + if ($mm -and $pm) { $mainKeyedRuns += $mm; $prKeyedRuns += $pm } + elseif ($mm -or $pm) { Write-Log " keyed pair #$i incomplete (main=$([bool]$mm) pr=$([bool]$pm)) — dropped to keep the paired CI aligned" 'Yellow' } + } + if ($mainKeyedRuns.Count -lt $Reps -or $prKeyedRuns.Count -lt $Reps) { + Write-Log " keyed-list leg short (main $($mainKeyedRuns.Count)/$Reps, PR $($prKeyedRuns.Count)/$Reps) — its paired CI uses fewer samples" 'Yellow' + } + } + } + $winRuns = @() if ($directExe) { Write-Log "vanilla WinUI3 (StressPerf.Direct)" 'Green' @@ -819,6 +882,8 @@ try { $winui3 = if ($winRuns.Count) { Measure-PerfRuns -Runs $winRuns } else { $null } $mainFloor = if ($mainFloorRuns.Count) { Measure-PerfRuns -Runs $mainFloorRuns } else { $null } $prFloor = if ($prFloorRuns.Count) { Measure-PerfRuns -Runs $prFloorRuns } else { $null } + $mainKeyed = if ($mainKeyedRuns.Count) { Measure-PerfRuns -Runs $mainKeyedRuns } else { $null } + $prKeyed = if ($prKeyedRuns.Count) { Measure-PerfRuns -Runs $prKeyedRuns } else { $null } $note = $null if ($prRuns.Count -eq 0 -or $mainRuns.Count -eq 0) { @@ -841,17 +906,18 @@ try { Platform = $Platform MainSamples = $mainRuns.Count; PrSamples = $prRuns.Count MainFloorSamples = $mainFloorRuns.Count; PrFloorSamples = $prFloorRuns.Count + MainKeyedSamples = $mainKeyedRuns.Count; PrKeyedSamples = $prKeyedRuns.Count BaseSha = $(if ($BaseSha) { $BaseSha.Substring(0, [Math]::Min(7, $BaseSha.Length)) } else { '' }) HeadSha = $(if ($HeadSha) { $HeadSha.Substring(0, [Math]::Min(7, $HeadSha.Length)) } else { '' }) Runner = $runner.Runner; Cpu = $runner.Cpu; Cores = $runner.Cores; MemoryGB = $runner.MemoryGB RunUrl = $RunUrl; Timestamp = (Get-Date).ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ssZ'); Note = $note } - $comment = Format-PerfComment -Main $main -Pr $pr -WinUI3 $winui3 -Rust $rust -Micro $micro -MainFloor $mainFloor -PrFloor $prFloor -Context $ctx + $comment = Format-PerfComment -Main $main -Pr $pr -WinUI3 $winui3 -Rust $rust -Micro $micro -MainFloor $mainFloor -PrFloor $prFloor -MainKeyed $mainKeyed -PrKeyed $prKeyed -Context $ctx $commentPath = Join-Path $OutDir 'comment.md' Set-Content -LiteralPath $commentPath -Value $comment -Encoding UTF8 Write-Log "comment.md written -> $commentPath" 'Green' - $result = [pscustomobject]@{ main = $main; pr = $pr; winui3 = $winui3; mainFloor = $mainFloor; prFloor = $prFloor; rust = $rust; micro = $micro; runner = $runner; context = $ctx } + $result = [pscustomobject]@{ main = $main; pr = $pr; winui3 = $winui3; mainFloor = $mainFloor; prFloor = $prFloor; mainKeyed = $mainKeyed; prKeyed = $prKeyed; rust = $rust; micro = $micro; runner = $runner; context = $ctx } $result | ConvertTo-Json -Depth 6 | Set-Content -LiteralPath (Join-Path $OutDir 'result.json') -Encoding UTF8 Write-Host "`n----- comment.md -----" -ForegroundColor DarkGray diff --git a/tests/stress_perf/ci/RunPerfBenchmark.Tests.ps1 b/tests/stress_perf/ci/RunPerfBenchmark.Tests.ps1 index 7e1ec9f2..b836836f 100644 --- a/tests/stress_perf/ci/RunPerfBenchmark.Tests.ps1 +++ b/tests/stress_perf/ci/RunPerfBenchmark.Tests.ps1 @@ -321,6 +321,35 @@ finally { Remove-Item function:Format-PerfNumber -ErrorAction SilentlyContinue } +# =========================================================================== +# Keyed-list leg — static wiring contract (param + registry + leg + comment) +# =========================================================================== +# The keyed-list leg lives in the orchestrator's main run flow (not a dot-sourceable +# function), exactly like the headline + skip-floor legs, so — as with those — its +# Invoke-OneRun threading is covered by the -RunPercent test above (the keyed leg omits +# -RunPercent, so it inherits $Percent). What is NEW and worth locking here is the +# static wiring: the opt-out switch defaults on, the registry resolves the right +# exe/csproj, the interleave runs both sides, and the aggregates reach the renderer. +$kp = $ast.ParamBlock.Parameters | Where-Object { $_.Name.VariablePath.UserPath -eq 'IncludeKeyedList' } | Select-Object -First 1 +Assert-True ($null -ne $kp) '[keyed] -IncludeKeyedList parameter exists' +Assert-True ($kp -and $kp.DefaultValue -and $kp.DefaultValue.Extent.Text -eq '$true') '[keyed] -IncludeKeyedList defaults to $true (on unless opted out)' +Assert-True ($src -match "KeyedList\s*=\s*@\{\s*AppName\s*=\s*'StressPerf\.KeyedList';\s*ProjectRel\s*=\s*'tests\\stress_perf\\StressPerf\.KeyedList") '[keyed] AppRegistry maps KeyedList -> StressPerf.KeyedList exe + csproj' +Assert-True ($src -match "-Tag 'main-keyed'") '[keyed] leg interleaves the main side (main-keyed)' +Assert-True ($src -match "-Tag 'pr-keyed'") '[keyed] leg interleaves the PR side (pr-keyed)' +Assert-True ($src -match '-MainKeyed \$mainKeyed') '[keyed] Format-PerfComment receives the main keyed aggregate' +Assert-True ($src -match '-PrKeyed \$prKeyed') '[keyed] Format-PerfComment receives the PR keyed aggregate' + +# Opt-out + best-effort build fallback: the keyed build is guarded by the switch, and a +# build failure flips the switch off (omit the table, never throw) so the leg is skipped. +Assert-True ($src -match 'if \(\$IncludeKeyedList -and -not \$SkipBuild\)') '[keyed] build is guarded by -IncludeKeyedList (and -SkipBuild)' +Assert-True ($src -match '(?s)keyed-list workload build failed.*?\$IncludeKeyedList = \$false') '[keyed] a build failure flips -IncludeKeyedList off (best-effort: omit table, never throw)' +Assert-True ($src -match '\$mainKeyedRuns = @\(\); \$prKeyedRuns = @\(\)\s*\r?\n\s*if \(\$IncludeKeyedList\)') '[keyed] the run leg is skipped unless -IncludeKeyedList is on' + +# Paired drop-both alignment: a complete keyed pair appends BOTH sides; a one-sided +# failure drops BOTH halves so the paired CI's main[i]/pr[i] zip stays index-aligned. +Assert-True ($src -match 'if \(\$mm -and \$pm\) \{ \$mainKeyedRuns \+= \$mm; \$prKeyedRuns \+= \$pm \}') '[keyed] a complete pair appends both main + pr samples' +Assert-True ($src -match 'elseif \(\$mm -or \$pm\) \{ Write-Log " keyed pair #\$i incomplete') '[keyed] a one-sided keyed run drops both halves (paired CI stays aligned)' + # cleanup foreach ($d in @($baseTree, $prTree, $OutDir, $exeDir)) { if (Test-Path $d) { Remove-Item $d -Recurse -Force -ErrorAction SilentlyContinue }