jtn0123 · jtn0123 · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/.github/workflows/memory-diff.yml b/.github/workflows/memory-diff.yml
@@ -77,17 +77,17 @@ jobs:
 
       - name: Checkout base branch into side worktree
         run: |
-          # Stash the memory_diff helpers so the worktree still has them if
-          # the base branch predates this commit. The helpers are standalone
-          # scripts so this is safe.
+          # Stash the memory_diff helpers from the PR and use the same helper
+          # version for both measurements. The helper is measurement tooling;
+          # using the base branch's older helper can make the diff compare two
+          # different sampling methods (for example base top-20 vs PR top-500).
           mkdir -p /tmp/memdiff-scripts
           cp scripts/memory_diff.py /tmp/memdiff-scripts/
           cp scripts/format_memory_diff.py /tmp/memdiff-scripts/
           git worktree add --detach /tmp/base-tree "origin/${{ github.base_ref }}"
-          if [ ! -f /tmp/base-tree/scripts/memory_diff.py ]; then
-            echo "Base branch does not have memory_diff.py yet — copying from PR"
-            cp /tmp/memdiff-scripts/memory_diff.py /tmp/base-tree/scripts/
-          fi
+          mkdir -p /tmp/base-tree/scripts
+          cp /tmp/memdiff-scripts/memory_diff.py /tmp/base-tree/scripts/
+          cp /tmp/memdiff-scripts/format_memory_diff.py /tmp/base-tree/scripts/
 
       - name: Measure base branch startup
         env:

diff --git a/pytest.ini b/pytest.ini
@@ -7,6 +7,7 @@ addopts =
 filterwarnings =
     ignore::DeprecationWarning
 markers =
+    flaky: rerun integration tests that are timing-sensitive in browser or device-adjacent flows
     integration: mark integration tests that may require browser automation or slower flows
     plugin_sweep: click-sweep parametrized over every registered plugin (JTN-698). Runs a bounded set of clicks against /plugin/<id> for each plugin to catch handler regressions. CI may route this to a dedicated job if runtime grows.
     journey: multi-step user-journey tests (JTN-719 epic). Each test drives a full end-to-end flow (e.g. first-run setup, edit settings, recover from error) with step-level assertions, going beyond the click-sweep's "handlers fire without error" guarantee. Gated by SKIP_BROWSER/SKIP_UI.
diff --git a/scripts/format_memory_diff.py b/scripts/format_memory_diff.py
@@ -27,10 +27,11 @@
 # workflow that looks for it.
 STICKY_MARKER = "<!-- memory-diff:jtn-610 -->"
 
-# Delta (in bytes) above which we flag a row with a warning emoji. 1 MiB is a
-# reasonable "noticeable but not alarming" threshold on the Pi Zero 2 W
-# envelope (total budget is 200 MB idle).
-WARN_DELTA_BYTES = 1 * 1024 * 1024
+# Delta (in bytes) above which we flag a row with a warning emoji. Keep this
+# high enough that allocator arena churn does not turn the whole table yellow.
+WARN_DELTA_BYTES = 5 * 1024 * 1024
+DETAIL_DELTA_FLOOR_BYTES = 256 * 1024
+GROUP_TOP_N = 10
 
 
 def _fmt_bytes(n: int) -> str:
@@ -74,6 +75,45 @@ def _short_location(loc: str, max_len: int = 60) -> str:
     return tail.replace("|", "\\|")
 
 
+def _location_group(loc: str) -> str:
+    """Return a reviewer-friendly bucket for a source allocator location."""
+    if not loc:
+        return "<unknown>"
+    normalized = loc.replace("\\", "/")
+    if normalized.startswith("<string>"):
+        return "profile harness"
+    if normalized.startswith("<frozen importlib"):
+        return "python import system"
+    if normalized.startswith("<frozen"):
+        return "python runtime"
+
+    parts = [part for part in normalized.split("/") if part]
+    if "site-packages" in parts:
+        idx = parts.index("site-packages")
+        if idx + 1 < len(parts):
+            package = parts[idx + 1]
+            if package.endswith(".py"):
+                package = package[:-3]
+            return package.replace("|", "\\|")
+    if "src" in parts:
+        idx = parts.index("src")
+        if idx + 1 < len(parts):
+            package = parts[idx + 1]
+            if package.endswith(".py"):
+                package = package[:-3]
+            return f"inkypi:{package}".replace("|", "\\|")
+    for idx, part in enumerate(parts):
+        if part.startswith("python") and idx + 1 < len(parts):
+            return "python stdlib"
+    return _short_location(loc, max_len=40)
+
+
+def _is_displayable_allocator(entry: dict) -> bool:
+    """Drop profiler-wrapper rows that are not actionable app attribution."""
+    loc = str(entry.get("location", ""))
+    return not loc.startswith("<string>")
+
+
 def _load(path: Path) -> dict:
     """Load a JSON summary, returning an empty shell if the file is missing.
 
@@ -135,6 +175,31 @@ def _merge_allocators(base: list[dict], pr: list[dict]) -> list[dict]:
     return rows
 
 
+def _merge_allocator_groups(base: list[dict], pr: list[dict]) -> list[dict]:
+    """Aggregate allocator rows into package/module buckets before diffing."""
+    index: dict[str, dict] = {}
+    for side, entries in (("base_bytes", base), ("pr_bytes", pr)):
+        for entry in entries:
+            group = _location_group(str(entry.get("location", "<unknown>")))
+            bucket = index.setdefault(
+                group,
+                {"location": group, "base_bytes": 0, "pr_bytes": 0},
+            )
+            bucket[side] += int(entry.get("bytes", 0))
+    rows = list(index.values())
+    for row in rows:
+        row["delta"] = row["pr_bytes"] - row["base_bytes"]
+    rows.sort(key=lambda r: abs(r["delta"]), reverse=True)
+    return rows
+
+
+def _significant_rows(rows: list[dict], top: int) -> list[dict]:
+    """Return rows worth showing, suppressing zero-delta allocator noise."""
+    return [
+        row for row in rows if abs(int(row.get("delta", 0))) >= DETAIL_DELTA_FLOOR_BYTES
+    ][:top]
+
+
 def format_comment(base: dict, pr: dict, top: int = 20) -> str:
     """Build the full Markdown comment body.
 
@@ -157,10 +222,31 @@ def format_comment(base: dict, pr: dict, top: int = 20) -> str:
     pr_mods = int(pr.get("module_count", 0) or 0)
     mod_delta = pr_mods - base_mods
 
-    rows = _merge_allocators(
-        list(base.get("allocators", [])),
-        list(pr.get("allocators", [])),
-    )[:top]
+    base_allocators = [
+        entry
+        for entry in list(base.get("allocators", []))
+        if _is_displayable_allocator(entry)
+    ]
+    pr_allocators = [
+        entry
+        for entry in list(pr.get("allocators", []))
+        if _is_displayable_allocator(entry)
+    ]
+    rows = _significant_rows(_merge_allocators(base_allocators, pr_allocators), top)
+    group_rows = _significant_rows(
+        _merge_allocator_groups(base_allocators, pr_allocators),
+        GROUP_TOP_N,
+    )
+    base_sample_count = int(
+        base.get("allocator_sample_limit")
+        or base.get("sampled_allocator_count")
+        or len(base_allocators)
+    )
+    pr_sample_count = int(
+        pr.get("allocator_sample_limit")
+        or pr.get("sampled_allocator_count")
+        or len(pr_allocators)
+    )
 
     lines: list[str] = []
     lines.append(STICKY_MARKER)
@@ -190,15 +276,41 @@ def format_comment(base: dict, pr: dict, top: int = 20) -> str:
     )
     lines.append("")
 
+    lines.append("### Largest grouped allocator deltas")
+    lines.append("")
+    lines.append("| Group | Base | PR | Delta |")
+    lines.append("|---|---|---|---|")
+    if not group_rows:
+        lines.append("| _(no significant grouped allocator deltas)_ | — | — | — |")
+    else:
+        lines.extend(
+            (
+                f"| `{row['location']}` "
+                f"| {_fmt_bytes(row['base_bytes'])} "
+                f"| {_fmt_bytes(row['pr_bytes'])} "
+                f"| {_fmt_delta(row['delta'])} |"
+            )
+            for row in group_rows
+        )
+    lines.append("")
+
     # Collapse the long allocator table by default so the PR conversation
     # stays skimmable. GitHub renders <details> nicely in comments.
     lines.append("<details>")
-    lines.append(f"<summary>Top {len(rows)} startup allocators</summary>")
+    detail_summary = (
+        f"Source-location detail: top {len(rows)} deltas"
+        if rows
+        else "Source-location detail: no significant deltas"
+    )
+    lines.append(
+        f"<summary>{detail_summary} "
+        f"(sampled base={base_sample_count}, PR={pr_sample_count})</summary>"
+    )
     lines.append("")
     lines.append("| # | Location | Base | PR | Delta |")
     lines.append("|---|---|---|---|---|")
     if not rows:
-        lines.append("| — | _(no allocators sampled)_ | — | — | — |")
+        lines.append("| — | _(no significant source-location deltas)_ | — | — | — |")
     else:
         for i, row in enumerate(rows, 1):
             lines.append(
@@ -213,7 +325,8 @@ def format_comment(base: dict, pr: dict, top: int = 20) -> str:
     lines.append(
         f"<sub>JTN-610 · backend=base:{base_backend}, pr:{pr_backend} · "
         "informational only, does not block merge. Hard RSS budgets are enforced "
-        "separately by JTN-608.</sub>"
+        "separately by JTN-608. Source-location rows are sampled allocator "
+        "attribution, not exact module ownership.</sub>"
     )
     return "\n".join(lines) + "\n"