diff --git a/Reactor.slnx b/Reactor.slnx
index 5cea5a46..ec358f5c 100644
--- a/Reactor.slnx
+++ b/Reactor.slnx
@@ -396,6 +396,10 @@
+
+
+
+
diff --git a/tests/stress_perf/StressPerf.Flex/FlexSceneSource.cs b/tests/stress_perf/StressPerf.Flex/FlexSceneSource.cs
new file mode 100644
index 00000000..c882f063
--- /dev/null
+++ b/tests/stress_perf/StressPerf.Flex/FlexSceneSource.cs
@@ -0,0 +1,135 @@
+using System.Globalization;
+
+namespace StressPerf.Flex;
+
+///
+/// One leaf cell of the deep flex tree. , and
+/// are the per-child flex inputs pushed onto the Yoga node every
+/// render (via .Flex(grow, basis, ...) + .Width(...)); mutating a
+/// fraction of them each tick forces a real Yoga relayout. is the
+/// display text, computed once at construction so the per-render hot path only pays
+/// for the element-record allocation + the layout pass under measurement — never
+/// per-render string formatting (mirrors how KeyedRow pre-bakes its label).
+///
+public readonly record struct FlexLeaf(int Id, string Label, double Grow, double Basis, double Width);
+
+///
+/// Deterministic deep-flex-tree workload data source for the /perf macro benchmark.
+///
+/// Where mutates a flat positional
+/// grid and the keyed-list source reorders keyed rows, this source backs a DEEP
+/// NESTED flex tree (sections → rows → leaf cells) whose leaves' flex inputs
+/// (grow / basis / width) are mutated each tick. When the rendered tree pushes those
+/// inputs onto the Yoga nodes, a real Yoga measure/layout pass runs every frame — the
+/// FlexPanel/Yoga LAYOUT engine the positional StocksGrid and keyed-list workloads
+/// never exercise.
+///
+/// The tree SHAPE (section / row / column counts) is fixed for the whole run, so the
+/// child reconciler takes its cheap positional arm and the cost under measurement is
+/// the layout-engine work, not child diffing. Deterministic (fixed RNG seed) so
+/// main-vs-PR /perf runs replay identical mutation sequences; leaf count is held
+/// constant so working-set and render-count stay stable across the run.
+///
+public sealed class FlexSceneSource
+{
+ // Tree shape. The single knob below — DefaultLeafTarget — drives the whole scale;
+ // bump it (and nothing else) if a smoke run shows the alloc-bytes/render or the
+ // memory delta is too small to clear harness noise. The inner shape (rows × cols per
+ // section) is fixed so the tree stays a deep three-level nest (root-column →
+ // section-column → row → leaf); the number of SECTIONS is derived to reach the leaf
+ // target. ~2000 leaves at meaningful depth so per-node inline-array memory
+ // (#142/#143) and per-frame list/line pooling (#141/#144) are at a scale that
+ // survives the noisy Avg-Memory-MB metric — node count is exactly what makes the
+ // inline-per-node-storage win visible.
+ public const int DefaultLeafTarget = 2000;
+ public const int RowsPerSection = 10;
+ public const int ColsPerRow = 10;
+
+ public int Sections { get; }
+ public int Rows { get; }
+ public int Cols { get; }
+
+ private readonly FlexLeaf[] _leaves;
+ private readonly Random _rng = new(42); // deterministic seed (matches StockDataSource / KeyedListSource)
+
+ public FlexSceneSource(int leafTarget = DefaultLeafTarget)
+ {
+ if (leafTarget < 1) leafTarget = 1;
+ Rows = RowsPerSection;
+ Cols = ColsPerRow;
+ int perSection = Rows * Cols;
+ // Round the section count UP so the realized leaf count is >= the requested
+ // target (e.g. target 2000 → 20 sections → exactly 2000 leaves).
+ Sections = Math.Max(1, (leafTarget + perSection - 1) / perSection);
+
+ _leaves = new FlexLeaf[Sections * Rows * Cols];
+ for (int i = 0; i < _leaves.Length; i++)
+ _leaves[i] = MakeLeaf(i);
+ }
+
+ /// Total leaf-cell count (held constant across ticks).
+ public int Count => _leaves.Length;
+
+ ///
+ /// Re-roll the flex inputs (grow / basis / width) on a percentage of the leaf cells
+ /// for one tick.
+ ///
+ /// sets the layout-churn budget
+ /// k = round(N * percent / 100) (clamped to [0, N]). Those k
+ /// leaves get NEW grow/basis/width values, so their Yoga nodes are re-dirtied and a
+ /// real measure/layout pass runs. The remaining N - k leaves keep their
+ /// EXACT current values — when the tree re-pushes those unchanged inputs each render,
+ /// that is precisely the path #670's YogaNode setter equality guards optimize
+ /// (unchanged setter → no re-dirty → the frame-level layout cache HITS). So
+ /// percent == 0 is the ALL-UNCHANGED FLOOR (every node cache-eligible) and the
+ /// win grows visible as the churn fraction varies. still
+ /// allocates a fresh array each tick, so a full render (and layout pass) runs
+ /// regardless.
+ ///
+ /// The layout-churn budget actually applied (for logging parity).
+ public int Update(double percent)
+ {
+ var rng = _rng;
+ int n = _leaves.Length;
+ int k = (int)Math.Round(n * percent / 100.0, MidpointRounding.AwayFromZero);
+ if (k < 0) k = 0;
+ if (k > n) k = n;
+
+ // Re-roll k distinct-ish leaves. We don't require strict distinctness — a
+ // repeated index just re-rolls the same leaf twice, which is harmless and keeps
+ // the per-tick work O(k) with no allocation.
+ for (int i = 0; i < k; i++)
+ {
+ int idx = rng.Next(n);
+ var leaf = _leaves[idx];
+ _leaves[idx] = leaf with
+ {
+ Grow = rng.Next(0, 3), // 0..2
+ Basis = 40 + rng.Next(0, 80), // 40..119
+ Width = 40 + rng.Next(0, 80), // 40..119
+ };
+ }
+
+ return k;
+ }
+
+ ///
+ /// Immutable snapshot of the current leaf inputs. The harness rebuilds its full
+ /// nested flex child tree from this each render (no positional memo fast-path), so a
+ /// real Yoga layout pass runs every tick.
+ ///
+ public FlexLeaf[] Snapshot() => (FlexLeaf[])_leaves.Clone();
+
+ private FlexLeaf MakeLeaf(int id)
+ {
+ // Deterministic, content-stable label derived from identity, so a leaf's text
+ // never changes — isolating the LAYOUT-engine signal from per-cell text updates.
+ string label = string.Create(CultureInfo.InvariantCulture, $"L{id} · {id % 97:000}");
+ // Deterministic initial flex inputs from the seeded RNG so main and PR start from
+ // the identical scene before any mutation.
+ double grow = _rng.Next(0, 3);
+ double basis = 40 + _rng.Next(0, 80);
+ double width = 40 + _rng.Next(0, 80);
+ return new FlexLeaf(id, label, grow, basis, width);
+ }
+}
diff --git a/tests/stress_perf/StressPerf.Flex/Program.cs b/tests/stress_perf/StressPerf.Flex/Program.cs
new file mode 100644
index 00000000..a04116ce
--- /dev/null
+++ b/tests/stress_perf/StressPerf.Flex/Program.cs
@@ -0,0 +1,264 @@
+// StressPerf.Flex — a /perf macro workload that exercises Reactor's FlexPanel / Yoga
+// LAYOUT engine (the Flex/ + Yoga/ subsystems #670 optimizes) that NO existing /perf
+// workload reaches.
+//
+// The StocksGrid workload (StressPerf.ReactorOptimized) mutates a fixed positional
+// Grid in place; the keyed-list workload (StressPerf.KeyedList) reorders keyed rows.
+// Neither drives a real Yoga measure/layout pass each frame, so #670's layout-cache
+// guards (#138), inline per-node arrays (#142/#143), attached-DP push caching (#147)
+// and per-frame list/line pooling (#141/#144) cannot be measured. The existing
+// FlexPanel-heavy StressPerf.VirtualList is vsync-capped AND virtualized, which is
+// exactly why it can't surface them either.
+//
+// This harness renders a DEEP NESTED, fully-realized (non-virtualized) flex tree
+// (sections → rows → leaf cells, ~2000 leaves) and, every tick, re-rolls the flex
+// inputs (grow / basis / width) on a `--percent` fraction of the leaves — forcing a
+// real Yoga relayout each frame. The remaining leaves re-push their UNCHANGED inputs,
+// which is precisely the YogaNode setter-equality-guard (cache-hit) path #670 targets.
+// The win shows up as lower per-frame ALLOCATION + Gen0 on the deep tree and lower
+// inline-per-node MEMORY — captured by the shared PerfTracker.
+//
+// MEASUREMENT CAVEAT (for maintainers + whoever measures #670 against this leg): the
+// PerfTracker phase hook (OnRenderComplete, wired below) fires at the END of Reactor's
+// render/reconcile, which is BEFORE WinUI runs layout. The real Yoga Measure/Arrange
+// work (#138 cache guards, #141/#144 list/line pooling, the layout-side of #147) runs
+// LATER, in FlexPanel.MeasureOverride/ArrangeOverride — so it is NOT reflected in the
+// `avgReconcileMs` / `avgDiffMs` numbers. It IS captured by `allocBytesPerRender` /
+// `gen0` (PerfTracker reads process-wide GC counters across the whole run, layout pass
+// included) and largely by `rendersPerSec`. So judge layout-engine wins on the
+// allocation table + Renders/sec, NOT the reconcile/diff ms rows; `avgMemoryMB` is too
+// coarse to resolve the inline-per-node-array gain at this node count. A post-layout
+// timing hook is deliberately NOT added here — that would touch the shared PerfTracker
+// (used by the other legs); add it as a follow-up only if #670's /perf actually needs a
+// layout-time signal (measure-then-escalate).
+//
+// The harness contract is mirrored byte-for-byte from StressPerf.KeyedList: the same
+// CLI flags (--headless / --percent / --duration / --json via the shared CliOptions),
+// the same shutdown emission (report.txt + metrics.json + the REACTOR_PERF_JSON stdout
+// line) via the shared PerfTracker, and the same OnRenderComplete phase-capture wiring.
+// Only the SCENE and its per-tick mutation differ, so Run-PerfBenchmark.ps1 /
+// PerfLib.ps1 can drive it identically.
+
+using Microsoft.UI.Reactor;
+using Microsoft.UI.Reactor.Core;
+using Microsoft.UI.Reactor.Hooks;
+using Microsoft.UI.Dispatching;
+using Microsoft.UI.Xaml;
+using Microsoft.UI.Xaml.Media;
+using StressPerf.Flex;
+using StressPerf.Shared;
+using static Microsoft.UI.Reactor.Factories;
+
+// Parse CLI args before WinUI starts
+var cliOptions = CliOptions.Parse(args);
+if (cliOptions.Headless)
+ ConsoleHelper.EnsureConsole();
+
+FlexApp.CliOpts = cliOptions;
+
+// Children are built via the direct record initializer (`new TextBlockElement(...)`)
+// to avoid factory overhead in the hot path, which bypasses the factory's lazy
+// handler registration. Opt into the full built-in catalog once at startup so every
+// built-in element record has a registered handler before the first reconcile —
+// the documented one-line prelude for the direct-record idiom (spec-048 §3.4),
+// identical to StressPerf.KeyedList.
+ReactorApp.RegisterAllBuiltIns();
+
+ReactorApp.Run("StressPerf.Flex", fullScreen: true);
+
+// ---------------------------------------------------------------------------
+
+class FlexApp : Component
+{
+ private const string AppName = "StressPerf.Flex";
+
+ public static CliOptions CliOpts { get; set; } = new();
+
+ public override Element Render()
+ {
+ var sourceRef = UseRef(null);
+ if (sourceRef.Current == null)
+ sourceRef.Current = new FlexSceneSource();
+ var source = sourceRef.Current;
+
+ // The full leaf snapshot drives a complete child-tree rebuild every render
+ // (deliberately NO positional memo fast-path), so a real Yoga layout pass runs
+ // each tick.
+ var (data, setData) = UseState(source.Snapshot());
+
+ var (percent, setPercent) = UseState(CliOpts.Percent);
+ var (running, setRunning) = UseState(false);
+ var (fps, setFps) = UseState("FPS: --");
+ var (updateMs, setUpdateMs) = UseState("Update: -- ms");
+ var (mem, setMem) = UseState("Mem: -- MB");
+
+ var perfRef = UseRef(null);
+ var timerRef = UseRef(null);
+ var shutdownRef = UseRef(null);
+ var benchmarkUpdatePending = UseRef(false);
+ var shapeVerifiedRef = UseRef(false);
+
+ if (perfRef.Current == null)
+ {
+ perfRef.Current = new PerfTracker();
+ var perf = perfRef.Current;
+ var pending = benchmarkUpdatePending;
+ ReactorApp.PrimaryWindow!.Host.OnRenderComplete = (treeMs, reconcileMs, effectsMs) =>
+ {
+ if (pending.Current)
+ {
+ pending.Current = false;
+ perf.RecordPhases(treeMs, reconcileMs, effectsMs);
+ }
+ };
+ }
+
+ var renderHooked = UseRef(false);
+ if (!renderHooked.Current)
+ {
+ renderHooked.Current = true;
+ var perf = perfRef.Current;
+ CompositionTarget.Rendering += (_, _) => perf.FrameRendered();
+ }
+
+ UseEffect(() =>
+ {
+ if (running)
+ {
+ var src = sourceRef.Current!;
+ var timer = new DispatcherTimer { Interval = TimeSpan.FromMilliseconds(33) };
+ timer.Tick += (_, _) =>
+ {
+ var perf = perfRef.Current!;
+ perf.BeginUpdate();
+
+ src.Update(percent);
+ benchmarkUpdatePending.Current = true;
+ setData(src.Snapshot());
+
+ perf.EndUpdate();
+
+ setFps($"FPS: {perf.CurrentFps:F0}");
+ setUpdateMs($"Update: {perf.LastUpdateMs:F1} ms");
+ setMem($"Mem: {perf.CurrentMemoryMB} MB");
+ };
+ timer.Start();
+ timerRef.Current = timer;
+ }
+ else
+ {
+ timerRef.Current?.Stop();
+ timerRef.Current = null;
+ }
+
+ return () =>
+ {
+ timerRef.Current?.Stop();
+ timerRef.Current = null;
+ };
+ }, running, percent);
+
+ UseEffect(() =>
+ {
+ if (!CliOpts.Headless) return;
+ setPercent(CliOpts.Percent);
+ setRunning(true);
+
+ var shutdownTimer = new DispatcherTimer { Interval = TimeSpan.FromSeconds(CliOpts.DurationSeconds) };
+ shutdownTimer.Tick += (_, _) =>
+ {
+ setRunning(false);
+ shutdownTimer.Stop();
+ var perf = perfRef.Current!;
+ perf.WriteReportFile(AppName, CliOpts.Percent);
+ if (CliOpts.Json)
+ {
+ perf.WriteMetricsJsonFile(AppName, CliOpts.Percent);
+ // Echo a single marked line so log scrapers have a fallback to the
+ // {AppName}.metrics.json file written next to the exe.
+ Console.WriteLine("REACTOR_PERF_JSON " + perf.GetMetricsJson(AppName, CliOpts.Percent));
+ }
+ Application.Current.Exit();
+ };
+ shutdownTimer.Start();
+ shutdownRef.Current = shutdownTimer;
+ }, Array.Empty