Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .github/workflows/memory-diff.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,17 @@ jobs:

- name: Checkout base branch into side worktree
run: |
# Stash the memory_diff helpers so the worktree still has them if
# the base branch predates this commit. The helpers are standalone
# scripts so this is safe.
# Stash the memory_diff helpers from the PR and use the same helper
# version for both measurements. The helper is measurement tooling;
# using the base branch's older helper can make the diff compare two
# different sampling methods (for example base top-20 vs PR top-500).
mkdir -p /tmp/memdiff-scripts
cp scripts/memory_diff.py /tmp/memdiff-scripts/
cp scripts/format_memory_diff.py /tmp/memdiff-scripts/
git worktree add --detach /tmp/base-tree "origin/${{ github.base_ref }}"
if [ ! -f /tmp/base-tree/scripts/memory_diff.py ]; then
echo "Base branch does not have memory_diff.py yet — copying from PR"
cp /tmp/memdiff-scripts/memory_diff.py /tmp/base-tree/scripts/
fi
mkdir -p /tmp/base-tree/scripts
cp /tmp/memdiff-scripts/memory_diff.py /tmp/base-tree/scripts/
cp /tmp/memdiff-scripts/format_memory_diff.py /tmp/base-tree/scripts/

- name: Measure base branch startup
env:
Expand Down
1 change: 1 addition & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ addopts =
filterwarnings =
ignore::DeprecationWarning
markers =
flaky: rerun integration tests that are timing-sensitive in browser or device-adjacent flows
integration: mark integration tests that may require browser automation or slower flows
plugin_sweep: click-sweep parametrized over every registered plugin (JTN-698). Runs a bounded set of clicks against /plugin/<id> for each plugin to catch handler regressions. CI may route this to a dedicated job if runtime grows.
journey: multi-step user-journey tests (JTN-719 epic). Each test drives a full end-to-end flow (e.g. first-run setup, edit settings, recover from error) with step-level assertions, going beyond the click-sweep's "handlers fire without error" guarantee. Gated by SKIP_BROWSER/SKIP_UI.
135 changes: 124 additions & 11 deletions scripts/format_memory_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@
# workflow that looks for it.
STICKY_MARKER = "<!-- memory-diff:jtn-610 -->"

# Delta (in bytes) above which we flag a row with a warning emoji. 1 MiB is a
# reasonable "noticeable but not alarming" threshold on the Pi Zero 2 W
# envelope (total budget is 200 MB idle).
WARN_DELTA_BYTES = 1 * 1024 * 1024
# Delta (in bytes) above which we flag a row with a warning emoji. Keep this
# high enough that allocator arena churn does not turn the whole table yellow.
WARN_DELTA_BYTES = 5 * 1024 * 1024
DETAIL_DELTA_FLOOR_BYTES = 256 * 1024
GROUP_TOP_N = 10


def _fmt_bytes(n: int) -> str:
Expand Down Expand Up @@ -74,6 +75,45 @@ def _short_location(loc: str, max_len: int = 60) -> str:
return tail.replace("|", "\\|")


def _location_group(loc: str) -> str:
"""Return a reviewer-friendly bucket for a source allocator location."""
if not loc:
return "<unknown>"
normalized = loc.replace("\\", "/")
if normalized.startswith("<string>"):
return "profile harness"
if normalized.startswith("<frozen importlib"):
return "python import system"
if normalized.startswith("<frozen"):
return "python runtime"

parts = [part for part in normalized.split("/") if part]
if "site-packages" in parts:
idx = parts.index("site-packages")
if idx + 1 < len(parts):
package = parts[idx + 1]
if package.endswith(".py"):
package = package[:-3]
return package.replace("|", "\\|")
if "src" in parts:
idx = parts.index("src")
if idx + 1 < len(parts):
package = parts[idx + 1]
if package.endswith(".py"):
package = package[:-3]
return f"inkypi:{package}".replace("|", "\\|")
for idx, part in enumerate(parts):
if part.startswith("python") and idx + 1 < len(parts):
return "python stdlib"
return _short_location(loc, max_len=40)


def _is_displayable_allocator(entry: dict) -> bool:
"""Drop profiler-wrapper rows that are not actionable app attribution."""
loc = str(entry.get("location", ""))
return not loc.startswith("<string>")


def _load(path: Path) -> dict:
"""Load a JSON summary, returning an empty shell if the file is missing.

Expand Down Expand Up @@ -135,6 +175,31 @@ def _merge_allocators(base: list[dict], pr: list[dict]) -> list[dict]:
return rows


def _merge_allocator_groups(base: list[dict], pr: list[dict]) -> list[dict]:
"""Aggregate allocator rows into package/module buckets before diffing."""
index: dict[str, dict] = {}
for side, entries in (("base_bytes", base), ("pr_bytes", pr)):
for entry in entries:
group = _location_group(str(entry.get("location", "<unknown>")))
bucket = index.setdefault(
group,
{"location": group, "base_bytes": 0, "pr_bytes": 0},
)
bucket[side] += int(entry.get("bytes", 0))
rows = list(index.values())
for row in rows:
row["delta"] = row["pr_bytes"] - row["base_bytes"]
rows.sort(key=lambda r: abs(r["delta"]), reverse=True)
return rows


def _significant_rows(rows: list[dict], top: int) -> list[dict]:
"""Return rows worth showing, suppressing zero-delta allocator noise."""
return [
row for row in rows if abs(int(row.get("delta", 0))) >= DETAIL_DELTA_FLOOR_BYTES
][:top]


def format_comment(base: dict, pr: dict, top: int = 20) -> str:
"""Build the full Markdown comment body.

Expand All @@ -157,10 +222,31 @@ def format_comment(base: dict, pr: dict, top: int = 20) -> str:
pr_mods = int(pr.get("module_count", 0) or 0)
mod_delta = pr_mods - base_mods

rows = _merge_allocators(
list(base.get("allocators", [])),
list(pr.get("allocators", [])),
)[:top]
base_allocators = [
entry
for entry in list(base.get("allocators", []))
if _is_displayable_allocator(entry)
]
pr_allocators = [
entry
for entry in list(pr.get("allocators", []))
if _is_displayable_allocator(entry)
]
rows = _significant_rows(_merge_allocators(base_allocators, pr_allocators), top)
group_rows = _significant_rows(
_merge_allocator_groups(base_allocators, pr_allocators),
GROUP_TOP_N,
)
base_sample_count = int(
base.get("allocator_sample_limit")
or base.get("sampled_allocator_count")
or len(base_allocators)
)
pr_sample_count = int(
pr.get("allocator_sample_limit")
or pr.get("sampled_allocator_count")
or len(pr_allocators)
)

lines: list[str] = []
lines.append(STICKY_MARKER)
Expand Down Expand Up @@ -190,15 +276,41 @@ def format_comment(base: dict, pr: dict, top: int = 20) -> str:
)
lines.append("")

lines.append("### Largest grouped allocator deltas")
lines.append("")
lines.append("| Group | Base | PR | Delta |")
lines.append("|---|---|---|---|")
if not group_rows:
lines.append("| _(no significant grouped allocator deltas)_ | — | — | — |")
else:
lines.extend(
(
f"| `{row['location']}` "
f"| {_fmt_bytes(row['base_bytes'])} "
f"| {_fmt_bytes(row['pr_bytes'])} "
f"| {_fmt_delta(row['delta'])} |"
)
for row in group_rows
)
lines.append("")

# Collapse the long allocator table by default so the PR conversation
# stays skimmable. GitHub renders <details> nicely in comments.
lines.append("<details>")
lines.append(f"<summary>Top {len(rows)} startup allocators</summary>")
detail_summary = (
f"Source-location detail: top {len(rows)} deltas"
if rows
else "Source-location detail: no significant deltas"
)
lines.append(
f"<summary>{detail_summary} "
f"(sampled base={base_sample_count}, PR={pr_sample_count})</summary>"
)
lines.append("")
lines.append("| # | Location | Base | PR | Delta |")
lines.append("|---|---|---|---|---|")
if not rows:
lines.append("| — | _(no allocators sampled)_ | — | — | — |")
lines.append("| — | _(no significant source-location deltas)_ | — | — | — |")
else:
for i, row in enumerate(rows, 1):
lines.append(
Expand All @@ -213,7 +325,8 @@ def format_comment(base: dict, pr: dict, top: int = 20) -> str:
lines.append(
f"<sub>JTN-610 · backend=base:{base_backend}, pr:{pr_backend} · "
"informational only, does not block merge. Hard RSS budgets are enforced "
"separately by JTN-608.</sub>"
"separately by JTN-608. Source-location rows are sampled allocator "
"attribution, not exact module ownership.</sub>"
)
return "\n".join(lines) + "\n"

Expand Down
Loading
Loading