From c3fa1965b2f124aed53f555bf299caad66502c57 Mon Sep 17 00:00:00 2001 From: yanyishuai <1093994647@qq.com> Date: Tue, 30 Jun 2026 21:53:54 +0800 Subject: [PATCH] fix(scripts): reject empty report source args (#807) --- scripts/claim_inventory.py | 49 +- scripts/pr_queue_health.py | 937 +++++++++++----------- scripts/proposed_work_triage.py | 1152 ++++++++++++++-------------- tests/test_claim_inventory.py | 1011 +++++++++++++----------- tests/test_pr_queue_health.py | 22 + tests/test_proposed_work_triage.py | 20 + 6 files changed, 1707 insertions(+), 1484 deletions(-) diff --git a/scripts/claim_inventory.py b/scripts/claim_inventory.py index 768e05f2..285b559c 100644 --- a/scripts/claim_inventory.py +++ b/scripts/claim_inventory.py @@ -21,6 +21,9 @@ DEFAULT_API_HOST = "https://api.mrwk.online" GH_TIMEOUT_SECONDS = 30 GH_LIMIT = 200 +GH_PUBLIC_API_SAFETY_CAP = 201 +GH_ISSUE_SAFETY_CAP = 201 +GH_PR_SAFETY_CAP = 201 GITHUB_URL_RE = re.compile( r"https://github\.com/[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+/" r"(?:issues|pull)/\d+(?:#[A-Za-z0-9_.-]+)?" @@ -581,14 +584,25 @@ def _get_json(url: str) -> Any: def load_public_api_state(api_host: str) -> dict[str, Any]: host = api_host.rstrip("/") - bounties = _get_json(f"{host}/api/v1/bounties?limit={GH_LIMIT}") - activity = _get_json(f"{host}/api/v1/activity?limit={GH_LIMIT}") + bounties = _get_json(f"{host}/api/v1/bounties?limit={GH_PUBLIC_API_SAFETY_CAP}") + if isinstance(bounties, list) and len(bounties) >= GH_PUBLIC_API_SAFETY_CAP: + raise RuntimeError( + f"public bounties list reached the {GH_PUBLIC_API_SAFETY_CAP} item safety cap; " + "use an API-paginated collector before trusting this live report" + ) + activity = _get_json(f"{host}/api/v1/activity?limit={GH_PUBLIC_API_SAFETY_CAP}") data: dict[str, Any] = {} if isinstance(bounties, list): data["bounties"] = bounties if isinstance(activity, dict): contributors = activity.get("contributors") if isinstance(contributors, list): + if len(contributors) >= GH_PUBLIC_API_SAFETY_CAP: + raise RuntimeError( + f"public activity contributors list reached the " + f"{GH_PUBLIC_API_SAFETY_CAP} item safety cap; " + "use an API-paginated collector before trusting this live report" + ) data["contributors"] = contributors recent = activity.get("recent") if isinstance(recent, list): @@ -607,11 +621,16 @@ def load_live_inventory(repo: str, api_host: str) -> dict[str, Any]: "--state", "open", "--limit", - str(GH_LIMIT), + str(GH_ISSUE_SAFETY_CAP), "--json", "number,title,url,labels,author", ] ) + if len(issue_list) >= GH_ISSUE_SAFETY_CAP: + raise RuntimeError( + f"gh issue list reached the {GH_ISSUE_SAFETY_CAP} item safety cap; " + "use an API-paginated collector before trusting this live report" + ) issues: list[dict[str, Any]] = [] for issue in issue_list: if ( @@ -643,11 +662,16 @@ def load_live_inventory(repo: str, api_host: str) -> dict[str, Any]: "--state", "open", "--limit", - str(GH_LIMIT), + str(GH_PR_SAFETY_CAP), "--json", "number,title,url,body,author,labels", ] ) + if len(prs) >= GH_PR_SAFETY_CAP: + raise RuntimeError( + f"gh pr list reached the {GH_PR_SAFETY_CAP} item safety cap; " + "use an API-paginated collector before trusting this live report" + ) pull_requests: list[dict[str, Any]] = [] for pr in prs: if not isinstance(pr, dict) or not isinstance(pr.get("number"), int): @@ -680,6 +704,15 @@ def _load_input(path: str) -> dict[str, Any]: return data +def _require_non_empty_arg(parser: argparse.ArgumentParser, option_name: str, value: str) -> str: + stripped = value.strip() + if not stripped: + parser.error(f"{option_name} must be a non-empty value") + if stripped != value: + parser.error(f"{option_name} must not include leading or trailing whitespace") + return value + + def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser( description="Inventory public MergeWork claim surfaces and payout status." @@ -691,7 +724,13 @@ def main(argv: list[str] | None = None) -> int: parser.add_argument("--format", choices=["json", "markdown"], default="markdown") args = parser.parse_args(argv) - data = _load_input(args.input) if args.input else load_live_inventory(args.repo, args.api_host) + if args.input is not None: + data = _load_input(_require_non_empty_arg(parser, "--input", args.input)) + else: + data = load_live_inventory( + _require_non_empty_arg(parser, "--repo", args.repo), + args.api_host, + ) report = analyze_inventory(data, api_host=args.api_host) if args.format == "json": print(json.dumps(report, indent=2, sort_keys=True)) diff --git a/scripts/pr_queue_health.py b/scripts/pr_queue_health.py index cf39a90e..92e81cdb 100644 --- a/scripts/pr_queue_health.py +++ b/scripts/pr_queue_health.py @@ -1,462 +1,475 @@ -from __future__ import annotations - -import argparse -import json -import re -import subprocess -import sys -from collections import defaultdict -from pathlib import Path -from typing import Any - -if __package__ in {None, ""}: - sys.path.insert(0, str(Path(__file__).resolve().parents[1])) - -from scripts.bounty_refs import BOUNTY_REF_RE - -NOISY_TITLE_PREFIX_RE = re.compile(r"^\s*(?:\[[^\]]+\]\s*)+") -UNSTABLE_MERGE_STATES = {"blocked", "conflicting", "dirty", "unknown", "unstable"} -GH_TIMEOUT_SECONDS = 30 -GH_PR_SAFETY_CAP = 201 -GH_ISSUE_SAFETY_CAP = 201 -MAX_BOUNTY_REF = 2**63 - 1 -ISSUE_SECTIONS = ( - ("Closed or exhausted bounty references", "closed_bounty_references"), - ("Non-live bounty references", "non_live_bounty_references"), - ("Missing bounty references", "missing_bounty_references"), - ("Dirty or unstable merge state", "dirty_or_unstable_merge_state"), - ("Needs info", "needs_info"), -) - - -def _labels(raw: dict[str, Any]) -> list[str]: - labels = raw.get("labels", []) - names: list[str] = [] - for label in labels: - if isinstance(label, str): - names.append(label) - elif isinstance(label, dict) and isinstance(label.get("name"), str): - names.append(label["name"]) - return names - - -def _comments(raw: dict[str, Any]) -> list[str]: - comments = raw.get("comments", []) - bodies: list[str] = [] - for comment in comments: - if isinstance(comment, str): - bodies.append(comment) - elif isinstance(comment, dict) and isinstance(comment.get("body"), str): - bodies.append(comment["body"]) - return bodies - - -def _merge_state(raw: dict[str, Any]) -> str: - for key in ("merge_state", "mergeStateStatus", "mergeable", "mergeable_state"): - value = raw.get(key) - if isinstance(value, str) and value: - return value.lower() - return "unknown" - - -def _scope_key(raw: dict[str, Any]) -> str: - explicit = raw.get("scope") - if isinstance(explicit, str) and explicit.strip(): - return " ".join(explicit.lower().split()) - title = str(raw.get("title") or "") - title = NOISY_TITLE_PREFIX_RE.sub("", title) - return " ".join(title.lower().split()) - - -def _bounty_refs(raw: dict[str, Any]) -> list[int]: - explicit = raw.get("bounty_refs") - if isinstance(explicit, list): - refs = [item for item in explicit if isinstance(item, int)] - if refs: - return sorted(set(refs)) - text = "\n".join( - str(raw.get(key) or "") - for key in ("title", "body", "description") - if raw.get(key) is not None - ) - found_refs: set[int] = set() - for match in BOUNTY_REF_RE.findall(text): - try: - ref = int(match) - except ValueError: - continue - if ref <= MAX_BOUNTY_REF: - found_refs.add(ref) - return sorted(found_refs) - - -def _is_open_bounty(raw: dict[str, Any]) -> bool: - state = str(raw.get("state") or "").lower() - remaining = raw.get("awards_remaining", raw.get("awardsRemaining")) - if state and state != "open": - return False - if remaining is not None: - try: - return int(remaining) > 0 - except (TypeError, ValueError): - return False - return state == "open" - - -def _bounty_liveness(raw: dict[str, Any]) -> tuple[bool, str]: - if not _is_open_bounty(raw): - return False, "closed or exhausted" - if "labels" in raw and not any(label.lower() == "mrwk:bounty" for label in _labels(raw)): - return False, "missing mrwk:bounty label" - if "comments" in raw and not any("Reserved on MergeWork:" in body for body in _comments(raw)): - return False, "missing Reserved on MergeWork claims-open comment" - return True, "live" - - -def _issue(pr: dict[str, Any], reason: str, detail: str) -> dict[str, Any]: - return { - "pull_request": pr["number"], - "title": pr["title"], - "url": pr.get("url"), - "reason": reason, - "detail": detail, - } - - -def analyze_queue(data: dict[str, Any]) -> dict[str, Any]: - bounties = { - int(item["number"]): item - for item in data.get("bounties", []) - if isinstance(item, dict) and isinstance(item.get("number"), int) - } - prs = [item for item in data.get("pull_requests", []) if isinstance(item, dict)] - normalized_prs: list[dict[str, Any]] = [] - for pr in prs: - if not isinstance(pr.get("number"), int): - continue - normalized_prs.append( - { - "number": int(pr["number"]), - "title": str(pr.get("title") or ""), - "url": pr.get("url"), - "refs": _bounty_refs(pr), - "labels": _labels(pr), - "merge_state": _merge_state(pr), - "scope": _scope_key(pr), - } - ) - - closed_bounty_references: list[dict[str, Any]] = [] - non_live_bounty_references: list[dict[str, Any]] = [] - missing_bounty_references: list[dict[str, Any]] = [] - dirty_or_unstable_merge_state: list[dict[str, Any]] = [] - needs_info: list[dict[str, Any]] = [] - duplicate_groups: dict[tuple[int, str], list[int]] = defaultdict(list) - - for pr in normalized_prs: - if not pr["refs"]: - missing_bounty_references.append( - _issue( - pr, - "missing_bounty_reference", - "No bounty reference such as Bounty #, Refs #, " - "Fixes #, or /claim # found", - ) - ) - for ref in pr["refs"]: - bounty = bounties.get(ref) - if bounty is None: - closed_bounty_references.append( - _issue( - pr, - "unknown_bounty_reference", - f"Referenced bounty #{ref} was not in input", - ) - ) - elif not _is_open_bounty(bounty): - closed_bounty_references.append( - _issue( - pr, - "closed_or_exhausted_bounty", - f"Referenced bounty #{ref} is not payable", - ) - ) - else: - is_live, reason = _bounty_liveness(bounty) - if not is_live: - non_live_bounty_references.append( - _issue( - pr, - "non_live_bounty_reference", - f"Referenced bounty #{ref} is not live claimable: {reason}", - ) - ) - duplicate_groups[(ref, pr["scope"])].append(pr["number"]) - if pr["merge_state"] in UNSTABLE_MERGE_STATES: - dirty_or_unstable_merge_state.append( - _issue(pr, "dirty_or_unstable_merge_state", f"Merge state is {pr['merge_state']}") - ) - if any(label.lower() == "mrwk:needs-info" for label in pr["labels"]): - needs_info.append(_issue(pr, "mrwk_needs_info", "PR has mrwk:needs-info label")) - - duplicate_scope_groups = [ - {"bounty": bounty, "scope": scope, "pull_requests": sorted(numbers)} - for (bounty, scope), numbers in sorted(duplicate_groups.items()) - if len(numbers) > 1 and scope - ] - closed_or_exhausted_count = sum( - 1 for bounty in bounties.values() if not _is_open_bounty(bounty) - ) - live_bounty_count = sum(1 for bounty in bounties.values() if _bounty_liveness(bounty)[0]) - non_live_bounty_count = sum( - 1 - for bounty in bounties.values() - if _is_open_bounty(bounty) and not _bounty_liveness(bounty)[0] - ) - report = { - "summary": { - "pull_requests": len(normalized_prs), - "open_bounties": len(bounties) - closed_or_exhausted_count, - "live_bounties": live_bounty_count, - "non_live_bounties": non_live_bounty_count, - "closed_or_exhausted_bounties": closed_or_exhausted_count, - "closed_bounty_references": len(closed_bounty_references), - "non_live_bounty_references": len(non_live_bounty_references), - "missing_bounty_references": len(missing_bounty_references), - "dirty_or_unstable_merge_state": len(dirty_or_unstable_merge_state), - "needs_info": len(needs_info), - "duplicate_scope_groups": len(duplicate_scope_groups), - }, - "closed_bounty_references": closed_bounty_references, - "non_live_bounty_references": non_live_bounty_references, - "missing_bounty_references": missing_bounty_references, - "dirty_or_unstable_merge_state": dirty_or_unstable_merge_state, - "needs_info": needs_info, - "duplicate_scope_groups": duplicate_scope_groups, - } - return report - - -def has_queue_issues(report: dict[str, Any]) -> bool: - return any( - report[key] - for key in ( - "closed_bounty_references", - "non_live_bounty_references", - "missing_bounty_references", - "dirty_or_unstable_merge_state", - "needs_info", - "duplicate_scope_groups", - ) - ) - - -def format_text_report(report: dict[str, Any]) -> str: - lines = ["PR queue health summary"] - for key, value in report["summary"].items(): - lines.append(f"- {key.replace('_', ' ')}: {value}") - if not has_queue_issues(report): - lines.append("") - lines.append("No queue-health issues found.") - return "\n".join(lines) - for title, key in ISSUE_SECTIONS: - if report[key]: - lines.append("") - lines.append(title) - for item in report[key]: - lines.append(f"- PR #{item['pull_request']}: {item['title']} ({item['detail']})") - if report["duplicate_scope_groups"]: - lines.append("") - lines.append("Likely duplicate bounty scope") - for item in report["duplicate_scope_groups"]: - prs = ", ".join(f"#{number}" for number in item["pull_requests"]) - lines.append(f"- Bounty #{item['bounty']}: {item['scope']} ({prs})") - return "\n".join(lines) - - -def _single_line(value: Any) -> str: - return " ".join(str(value or "").split()) - - -def _markdown_pr_issue(item: dict[str, Any]) -> str: - pr_label = f"PR #{item['pull_request']}" - url = item.get("url") - if isinstance(url, str) and url: - pr_label = f"[{pr_label}]({url})" - return f"- {pr_label}: {_single_line(item['title'])} ({_single_line(item['detail'])})" - - -def format_markdown_report(report: dict[str, Any]) -> str: - lines = ["## PR Queue Health Summary", ""] - for key, value in report["summary"].items(): - lines.append(f"- **{key.replace('_', ' ')}**: {value}") - if not has_queue_issues(report): - lines.append("") - lines.append("No queue-health issues found.") - return "\n".join(lines) - - for title, key in ISSUE_SECTIONS: - if report[key]: - lines.append("") - lines.append(f"### {title}") - for item in report[key]: - lines.append(_markdown_pr_issue(item)) - if report["duplicate_scope_groups"]: - lines.append("") - lines.append("### Likely duplicate bounty scope") - for item in report["duplicate_scope_groups"]: - prs = ", ".join(f"#{number}" for number in item["pull_requests"]) - lines.append(f"- Bounty #{item['bounty']}: {_single_line(item['scope'])} ({prs})") - return "\n".join(lines) - - -def _run_gh_json(args: list[str]) -> Any: - command = " ".join(args) - try: - completed = subprocess.run( - args, - check=True, - capture_output=True, - text=True, - encoding="utf-8", - errors="replace", - timeout=GH_TIMEOUT_SECONDS, - ) - except subprocess.TimeoutExpired as exc: - raise RuntimeError(f"gh command timed out after {GH_TIMEOUT_SECONDS}s: {command}") from exc - except subprocess.CalledProcessError as exc: - raise RuntimeError( - "gh command failed " - f"(exit {exc.returncode}): {command}\n" - f"stdout:\n{exc.stdout or exc.output or ''}\n" - f"stderr:\n{exc.stderr or ''}" - ) from exc - return json.loads(completed.stdout) - - -def load_live_queue(repo: str) -> dict[str, Any]: - prs = _run_gh_json( - [ - "gh", - "pr", - "list", - "--repo", - repo, - "--state", - "open", - "--limit", - str(GH_PR_SAFETY_CAP), - "--json", - "number,title,url,body,labels,mergeStateStatus", - ] - ) - if len(prs) >= GH_PR_SAFETY_CAP: - raise RuntimeError( - f"gh pr list reached the {GH_PR_SAFETY_CAP} item safety cap; " - "use an API-paginated collector before trusting this live report" - ) - referenced_issues = sorted( - {ref for pr in prs if isinstance(pr, dict) for ref in _bounty_refs(pr)} - ) - referenced_issue_numbers = set(referenced_issues) - issues = _run_gh_json( - [ - "gh", - "issue", - "list", - "--repo", - repo, - "--state", - "all", - "--limit", - str(GH_ISSUE_SAFETY_CAP), - "--json", - "number,title,state,labels", - ] - ) - if len(issues) >= GH_ISSUE_SAFETY_CAP: - raise RuntimeError( - f"gh issue list reached the {GH_ISSUE_SAFETY_CAP} item safety cap; " - "use an API-paginated collector before trusting this live report" - ) - issues_by_number = { - int(issue["number"]): issue - for issue in issues - if isinstance(issue, dict) and isinstance(issue.get("number"), int) - } - bounty_numbers = { - int(issue["number"]) - for issue in issues - if isinstance(issue, dict) - and isinstance(issue.get("number"), int) - and "bounty" in str(issue.get("title", "")).lower() - } | referenced_issue_numbers - bounty_issues = [] - for number in sorted(bounty_numbers): - issue = issues_by_number.get(number, {"number": number}) - viewed_issue = issue - include_comments = number in referenced_issue_numbers - if include_comments: - try: - viewed_issue = _run_gh_json( - [ - "gh", - "issue", - "view", - str(number), - "--repo", - repo, - "--comments", - "--json", - "number,title,state,labels,comments", - ] - ) - except RuntimeError: - if number not in issues_by_number: - continue - bounty_issue = { - "number": int(viewed_issue["number"]), - "title": viewed_issue.get("title"), - "state": viewed_issue.get("state"), - "labels": viewed_issue.get("labels", []), - "awards_remaining": 1 if viewed_issue.get("state") == "OPEN" else 0, - } - if include_comments: - bounty_issue["comments"] = viewed_issue.get("comments", []) - bounty_issues.append(bounty_issue) - return {"pull_requests": prs, "bounties": bounty_issues} - - -def _load_input(path: str) -> dict[str, Any]: - with open(path, encoding="utf-8") as handle: - data = json.load(handle) - if not isinstance(data, dict): - raise ValueError("queue input must be a JSON object") - return data - - -def main(argv: list[str] | None = None) -> int: - parser = argparse.ArgumentParser(description="Summarize MergeWork open PR queue health.") - source = parser.add_mutually_exclusive_group(required=True) - source.add_argument("--input", help="Read queue data from a JSON fixture file.") - source.add_argument( - "--repo", - help="Collect live queue data with gh, for example ramimbo/mergework.", - ) - parser.add_argument("--format", choices=["json", "markdown", "text"], default="text") - parser.add_argument("--fail-on-issues", action="store_true") - args = parser.parse_args(argv) - - data = _load_input(args.input) if args.input else load_live_queue(args.repo) - report = analyze_queue(data) - if args.format == "json": - print(json.dumps(report, indent=2, sort_keys=True)) - elif args.format == "markdown": - print(format_markdown_report(report)) - else: - print(format_text_report(report)) - return 1 if args.fail_on_issues and has_queue_issues(report) else 0 - - -if __name__ == "__main__": - raise SystemExit(main(sys.argv[1:])) +from __future__ import annotations + +import argparse +import json +import re +import subprocess +import sys +from collections import defaultdict +from pathlib import Path +from typing import Any + +if __package__ in {None, ""}: + sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +from scripts.bounty_refs import BOUNTY_REF_RE + +GH_TIMEOUT_SECONDS = 30 + +NOISY_TITLE_PREFIX_RE = re.compile(r"^\s*(?:\[[^\]]+\]\s*)+") +UNSTABLE_MERGE_STATES = {"blocked", "conflicting", "dirty", "unknown", "unstable"} +GH_PR_SAFETY_CAP = 201 +GH_ISSUE_SAFETY_CAP = 201 +MAX_BOUNTY_REF = 2**63 - 1 +ISSUE_SECTIONS = ( + ("Closed or exhausted bounty references", "closed_bounty_references"), + ("Non-live bounty references", "non_live_bounty_references"), + ("Missing bounty references", "missing_bounty_references"), + ("Dirty or unstable merge state", "dirty_or_unstable_merge_state"), + ("Needs info", "needs_info"), +) + + +def _labels(raw: dict[str, Any]) -> list[str]: + labels = raw.get("labels", []) + names: list[str] = [] + for label in labels: + if isinstance(label, str): + names.append(label) + elif isinstance(label, dict) and isinstance(label.get("name"), str): + names.append(label["name"]) + return names + + +def _comments(raw: dict[str, Any]) -> list[str]: + comments = raw.get("comments", []) + bodies: list[str] = [] + for comment in comments: + if isinstance(comment, str): + bodies.append(comment) + elif isinstance(comment, dict) and isinstance(comment.get("body"), str): + bodies.append(comment["body"]) + return bodies + + +def _merge_state(raw: dict[str, Any]) -> str: + for key in ("merge_state", "mergeStateStatus", "mergeable", "mergeable_state"): + value = raw.get(key) + if isinstance(value, str) and value: + return value.lower() + return "unknown" + + +def _scope_key(raw: dict[str, Any]) -> str: + explicit = raw.get("scope") + if isinstance(explicit, str) and explicit.strip(): + return " ".join(explicit.lower().split()) + title = str(raw.get("title") or "") + title = NOISY_TITLE_PREFIX_RE.sub("", title) + return " ".join(title.lower().split()) + + +def _bounty_refs(raw: dict[str, Any]) -> list[int]: + explicit = raw.get("bounty_refs") + if isinstance(explicit, list): + refs = [item for item in explicit if isinstance(item, int)] + if refs: + return sorted(set(refs)) + text = "\n".join( + str(raw.get(key) or "") + for key in ("title", "body", "description") + if raw.get(key) is not None + ) + found_refs: set[int] = set() + for match in BOUNTY_REF_RE.findall(text): + try: + ref = int(match) + except ValueError: + continue + if ref <= MAX_BOUNTY_REF: + found_refs.add(ref) + return sorted(found_refs) + + +def _is_open_bounty(raw: dict[str, Any]) -> bool: + state = str(raw.get("state") or "").lower() + remaining = raw.get("awards_remaining", raw.get("awardsRemaining")) + if state and state != "open": + return False + if remaining is not None: + try: + return int(remaining) > 0 + except (TypeError, ValueError): + return False + return state == "open" + + +def _bounty_liveness(raw: dict[str, Any]) -> tuple[bool, str]: + if not _is_open_bounty(raw): + return False, "closed or exhausted" + if "labels" in raw and not any(label.lower() == "mrwk:bounty" for label in _labels(raw)): + return False, "missing mrwk:bounty label" + if "comments" in raw and not any("Reserved on MergeWork:" in body for body in _comments(raw)): + return False, "missing Reserved on MergeWork claims-open comment" + return True, "live" + + +def _issue(pr: dict[str, Any], reason: str, detail: str) -> dict[str, Any]: + return { + "pull_request": pr["number"], + "title": pr["title"], + "url": pr.get("url"), + "reason": reason, + "detail": detail, + } + + +def analyze_queue(data: dict[str, Any]) -> dict[str, Any]: + bounties = { + int(item["number"]): item + for item in data.get("bounties", []) + if isinstance(item, dict) and isinstance(item.get("number"), int) + } + prs = [item for item in data.get("pull_requests", []) if isinstance(item, dict)] + normalized_prs: list[dict[str, Any]] = [] + for pr in prs: + if not isinstance(pr.get("number"), int): + continue + normalized_prs.append( + { + "number": int(pr["number"]), + "title": str(pr.get("title") or ""), + "url": pr.get("url"), + "refs": _bounty_refs(pr), + "labels": _labels(pr), + "merge_state": _merge_state(pr), + "scope": _scope_key(pr), + } + ) + + closed_bounty_references: list[dict[str, Any]] = [] + non_live_bounty_references: list[dict[str, Any]] = [] + missing_bounty_references: list[dict[str, Any]] = [] + dirty_or_unstable_merge_state: list[dict[str, Any]] = [] + needs_info: list[dict[str, Any]] = [] + duplicate_groups: dict[tuple[int, str], list[int]] = defaultdict(list) + + for pr in normalized_prs: + if not pr["refs"]: + missing_bounty_references.append( + _issue( + pr, + "missing_bounty_reference", + "No bounty reference such as Bounty #, Refs #, " + "Fixes #, or /claim # found", + ) + ) + for ref in pr["refs"]: + bounty = bounties.get(ref) + if bounty is None: + closed_bounty_references.append( + _issue( + pr, + "unknown_bounty_reference", + f"Referenced bounty #{ref} was not in input", + ) + ) + elif not _is_open_bounty(bounty): + closed_bounty_references.append( + _issue( + pr, + "closed_or_exhausted_bounty", + f"Referenced bounty #{ref} is not payable", + ) + ) + else: + is_live, reason = _bounty_liveness(bounty) + if not is_live: + non_live_bounty_references.append( + _issue( + pr, + "non_live_bounty_reference", + f"Referenced bounty #{ref} is not live claimable: {reason}", + ) + ) + duplicate_groups[(ref, pr["scope"])].append(pr["number"]) + if pr["merge_state"] in UNSTABLE_MERGE_STATES: + dirty_or_unstable_merge_state.append( + _issue(pr, "dirty_or_unstable_merge_state", f"Merge state is {pr['merge_state']}") + ) + if any(label.lower() == "mrwk:needs-info" for label in pr["labels"]): + needs_info.append(_issue(pr, "mrwk_needs_info", "PR has mrwk:needs-info label")) + + duplicate_scope_groups = [ + {"bounty": bounty, "scope": scope, "pull_requests": sorted(numbers)} + for (bounty, scope), numbers in sorted(duplicate_groups.items()) + if len(numbers) > 1 and scope + ] + closed_or_exhausted_count = sum( + 1 for bounty in bounties.values() if not _is_open_bounty(bounty) + ) + live_bounty_count = sum(1 for bounty in bounties.values() if _bounty_liveness(bounty)[0]) + non_live_bounty_count = sum( + 1 + for bounty in bounties.values() + if _is_open_bounty(bounty) and not _bounty_liveness(bounty)[0] + ) + report = { + "summary": { + "pull_requests": len(normalized_prs), + "open_bounties": len(bounties) - closed_or_exhausted_count, + "live_bounties": live_bounty_count, + "non_live_bounties": non_live_bounty_count, + "closed_or_exhausted_bounties": closed_or_exhausted_count, + "closed_bounty_references": len(closed_bounty_references), + "non_live_bounty_references": len(non_live_bounty_references), + "missing_bounty_references": len(missing_bounty_references), + "dirty_or_unstable_merge_state": len(dirty_or_unstable_merge_state), + "needs_info": len(needs_info), + "duplicate_scope_groups": len(duplicate_scope_groups), + }, + "closed_bounty_references": closed_bounty_references, + "non_live_bounty_references": non_live_bounty_references, + "missing_bounty_references": missing_bounty_references, + "dirty_or_unstable_merge_state": dirty_or_unstable_merge_state, + "needs_info": needs_info, + "duplicate_scope_groups": duplicate_scope_groups, + } + return report + + +def has_queue_issues(report: dict[str, Any]) -> bool: + return any( + report[key] + for key in ( + "closed_bounty_references", + "non_live_bounty_references", + "missing_bounty_references", + "dirty_or_unstable_merge_state", + "needs_info", + "duplicate_scope_groups", + ) + ) + + +def format_text_report(report: dict[str, Any]) -> str: + lines = ["PR queue health summary"] + for key, value in report["summary"].items(): + lines.append(f"- {key.replace('_', ' ')}: {value}") + if not has_queue_issues(report): + lines.append("") + lines.append("No queue-health issues found.") + return "\n".join(lines) + for title, key in ISSUE_SECTIONS: + if report[key]: + lines.append("") + lines.append(title) + for item in report[key]: + lines.append(f"- PR #{item['pull_request']}: {item['title']} ({item['detail']})") + if report["duplicate_scope_groups"]: + lines.append("") + lines.append("Likely duplicate bounty scope") + for item in report["duplicate_scope_groups"]: + prs = ", ".join(f"#{number}" for number in item["pull_requests"]) + lines.append(f"- Bounty #{item['bounty']}: {item['scope']} ({prs})") + return "\n".join(lines) + + +def _single_line(value: Any) -> str: + return " ".join(str(value or "").split()) + + +def _markdown_pr_issue(item: dict[str, Any]) -> str: + pr_label = f"PR #{item['pull_request']}" + url = item.get("url") + if isinstance(url, str) and url: + pr_label = f"[{pr_label}]({url})" + return f"- {pr_label}: {_single_line(item['title'])} ({_single_line(item['detail'])})" + + +def format_markdown_report(report: dict[str, Any]) -> str: + lines = ["## PR Queue Health Summary", ""] + for key, value in report["summary"].items(): + lines.append(f"- **{key.replace('_', ' ')}**: {value}") + if not has_queue_issues(report): + lines.append("") + lines.append("No queue-health issues found.") + return "\n".join(lines) + + for title, key in ISSUE_SECTIONS: + if report[key]: + lines.append("") + lines.append(f"### {title}") + for item in report[key]: + lines.append(_markdown_pr_issue(item)) + if report["duplicate_scope_groups"]: + lines.append("") + lines.append("### Likely duplicate bounty scope") + for item in report["duplicate_scope_groups"]: + prs = ", ".join(f"#{number}" for number in item["pull_requests"]) + lines.append(f"- Bounty #{item['bounty']}: {_single_line(item['scope'])} ({prs})") + return "\n".join(lines) + + +def _run_gh_json(args: list[str]) -> Any: + command = " ".join(args) + try: + completed = subprocess.run( + args, + check=True, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + timeout=GH_TIMEOUT_SECONDS, + ) + except subprocess.TimeoutExpired as exc: + raise RuntimeError(f"gh command timed out after {GH_TIMEOUT_SECONDS}s: {command}") from exc + except subprocess.CalledProcessError as exc: + raise RuntimeError( + "gh command failed " + f"(exit {exc.returncode}): {command}\n" + f"stdout:\n{exc.stdout or exc.output or ''}\n" + f"stderr:\n{exc.stderr or ''}" + ) from exc + return json.loads(completed.stdout) + + +def load_live_queue(repo: str) -> dict[str, Any]: + prs = _run_gh_json( + [ + "gh", + "pr", + "list", + "--repo", + repo, + "--state", + "open", + "--limit", + str(GH_PR_SAFETY_CAP), + "--json", + "number,title,url,body,labels,mergeStateStatus", + ] + ) + if len(prs) >= GH_PR_SAFETY_CAP: + raise RuntimeError( + f"gh pr list reached the {GH_PR_SAFETY_CAP} item safety cap; " + "use an API-paginated collector before trusting this live report" + ) + referenced_issues = sorted( + {ref for pr in prs if isinstance(pr, dict) for ref in _bounty_refs(pr)} + ) + referenced_issue_numbers = set(referenced_issues) + issues = _run_gh_json( + [ + "gh", + "issue", + "list", + "--repo", + repo, + "--state", + "all", + "--limit", + str(GH_ISSUE_SAFETY_CAP), + "--json", + "number,title,state,labels", + ] + ) + if len(issues) >= GH_ISSUE_SAFETY_CAP: + raise RuntimeError( + f"gh issue list reached the {GH_ISSUE_SAFETY_CAP} item safety cap; " + "use an API-paginated collector before trusting this live report" + ) + issues_by_number = { + int(issue["number"]): issue + for issue in issues + if isinstance(issue, dict) and isinstance(issue.get("number"), int) + } + bounty_numbers = { + int(issue["number"]) + for issue in issues + if isinstance(issue, dict) + and isinstance(issue.get("number"), int) + and "bounty" in str(issue.get("title", "")).lower() + } | referenced_issue_numbers + bounty_issues = [] + for number in sorted(bounty_numbers): + issue = issues_by_number.get(number, {"number": number}) + viewed_issue = issue + include_comments = number in referenced_issue_numbers + if include_comments: + try: + viewed_issue = _run_gh_json( + [ + "gh", + "issue", + "view", + str(number), + "--repo", + repo, + "--comments", + "--json", + "number,title,state,labels,comments", + ] + ) + except RuntimeError: + if number not in issues_by_number: + continue + bounty_issue = { + "number": int(viewed_issue["number"]), + "title": viewed_issue.get("title"), + "state": viewed_issue.get("state"), + "labels": viewed_issue.get("labels", []), + "awards_remaining": 1 if viewed_issue.get("state") == "OPEN" else 0, + } + if include_comments: + bounty_issue["comments"] = viewed_issue.get("comments", []) + bounty_issues.append(bounty_issue) + return {"pull_requests": prs, "bounties": bounty_issues} + + +def _load_input(path: str) -> dict[str, Any]: + with open(path, encoding="utf-8") as handle: + data = json.load(handle) + if not isinstance(data, dict): + raise ValueError("queue input must be a JSON object") + return data + + +def _require_non_empty_arg(parser: argparse.ArgumentParser, option_name: str, value: str) -> str: + stripped = value.strip() + if not stripped: + parser.error(f"{option_name} must be a non-empty value") + if stripped != value: + parser.error(f"{option_name} must not include leading or trailing whitespace") + return value + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Summarize MergeWork open PR queue health.") + source = parser.add_mutually_exclusive_group(required=True) + source.add_argument("--input", help="Read queue data from a JSON fixture file.") + source.add_argument( + "--repo", + help="Collect live queue data with gh, for example ramimbo/mergework.", + ) + parser.add_argument("--format", choices=["json", "markdown", "text"], default="text") + parser.add_argument("--fail-on-issues", action="store_true") + args = parser.parse_args(argv) + + if args.input is not None: + data = _load_input(_require_non_empty_arg(parser, "--input", args.input)) + else: + data = load_live_queue(_require_non_empty_arg(parser, "--repo", args.repo)) + report = analyze_queue(data) + if args.format == "json": + print(json.dumps(report, indent=2, sort_keys=True)) + elif args.format == "markdown": + print(format_markdown_report(report)) + else: + print(format_text_report(report)) + return 1 if args.fail_on_issues and has_queue_issues(report) else 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/scripts/proposed_work_triage.py b/scripts/proposed_work_triage.py index c33100b0..b64d3cfc 100644 --- a/scripts/proposed_work_triage.py +++ b/scripts/proposed_work_triage.py @@ -1,566 +1,586 @@ -from __future__ import annotations - -import argparse -import json -import re -import subprocess -import sys -import urllib.error -import urllib.parse -import urllib.request -from collections import defaultdict -from pathlib import Path -from typing import Any, cast - -if __package__ in {None, ""}: - sys.path.insert(0, str(Path(__file__).resolve().parents[1])) - -from scripts.api_host_args import public_api_host - - -def _positive_int(value: str) -> int: - """Argparse type that rejects --limit values below 1. - - A non-positive limit silently changes result size via Python slice - semantics (e.g. ``[:0]`` returns nothing, ``[:-1]`` drops the last row), - producing a successful but misleading report instead of a clear error. - """ - try: - parsed = int(value) - except (TypeError, ValueError): - raise argparse.ArgumentTypeError(f"expected an integer, got {value!r}") from None - if parsed < 1: - raise argparse.ArgumentTypeError(f"must be >= 1, got {parsed}") - return parsed - - -REQUIRED_TEMPLATE_SECTIONS = { - "problem": ("problem", "current problem"), - "evidence": ("evidence", "current evidence"), - "proposed_work": ("proposed work", "proposal"), - "value": ("value", "expected value"), - "acceptance": ("acceptance", "acceptance criteria", "possible acceptance criteria"), - "tests": ( - "tests", - "evidence or tests required", - "tests required", - "test notes", - "verification", - ), - "duplicate_search": ("duplicate search", "duplicates"), - "out_of_scope": ("out of scope",), -} -ROUTED_RE = re.compile( - r"\b(accepted by|accepted and|routed|created bounty|create_bounty|" - r"treasury proposal|reserved on mergework)\b", - re.IGNORECASE, -) -REJECTED_RE = re.compile( - r"\b(rejected|declined|not accepted|outside accepted scope)\b", re.IGNORECASE -) -NON_LIVE_CONFUSION_RE = re.compile( - r"(claimable now|already paid|guaranteed payout|cash[- ]?out|off[- ]?ramp)", - re.IGNORECASE, -) -BOUNTY_TITLE_RE = re.compile(r"^\s*MRWK bounty\s*:", re.IGNORECASE) -BOUNTY_TEMPLATE_HEADING_RE = re.compile(r"(^|\n)\s*#+\s*MRWK bounty\b", re.IGNORECASE) -WORD_RE = re.compile(r"[a-z0-9]+") -STOPWORDS = { - "add", - "and", - "bounty", - "for", - "from", - "issue", - "mrwk", - "proposed", - "request", - "the", - "to", - "work", -} -GH_TIMEOUT_SECONDS = 30 -HTTP_TIMEOUT_SECONDS = 30 -DEFAULT_API_HOST = "https://api.mrwk.online" -DEFAULT_PAYMENT_BOUNTY_ISSUE_NUMBERS = (649, 722) -LIVE_ISSUE_SEARCHES = ( - "label:proposed-work", - '"proposed work"', -) -READ_ONLY_GH_COMMANDS = { - ("issue", "list"), - ("issue", "view"), -} - - -def _labels(raw: dict[str, Any]) -> list[str]: - labels = raw.get("labels", []) - names: list[str] = [] - for label in labels: - if isinstance(label, str): - names.append(label) - elif isinstance(label, dict) and isinstance(label.get("name"), str): - names.append(label["name"]) - return names - - -def _comments(raw: dict[str, Any]) -> list[str]: - comments = raw.get("comments", []) - bodies: list[str] = [] - for comment in comments: - if isinstance(comment, str): - bodies.append(comment) - elif isinstance(comment, dict) and isinstance(comment.get("body"), str): - bodies.append(comment["body"]) - return bodies - - -def _combined_text(issue: dict[str, Any]) -> str: - parts = [str(issue.get("title") or ""), str(issue.get("body") or "")] - parts.extend(_comments(issue)) - return "\n".join(parts) - - -def _is_non_intake_bounty_issue(issue: dict[str, Any]) -> bool: - labels = {label.lower() for label in _labels(issue)} - if "mrwk:bounty" in labels: - return True - title = str(issue.get("title") or "") - if BOUNTY_TITLE_RE.search(title): - return True - body = str(issue.get("body") or "") - return bool(BOUNTY_TEMPLATE_HEADING_RE.search(body)) - - -def _has_section(body: str, aliases: tuple[str, ...]) -> bool: - lowered = body.lower() - for alias in aliases: - if re.search(rf"(^|\n)\s*#+\s*{re.escape(alias)}\b", lowered): - return True - if re.search(rf"(^|\n)\s*(?:-\s*)?\*\*{re.escape(alias)}\*\*", lowered): - return True - return False - - -def _missing_sections(body: str) -> list[str]: - return [ - key - for key, aliases in REQUIRED_TEMPLATE_SECTIONS.items() - if not _has_section(body, aliases) - ] - - -def _token_set(issue: dict[str, Any]) -> set[str]: - text = str(issue.get("title") or "").lower() - return {word for word in WORD_RE.findall(text) if len(word) > 3 and word not in STOPWORDS} - - -def _is_vague(body: str, missing_sections: list[str]) -> bool: - return len(body.split()) < 45 or len(missing_sections) >= 4 - - -def _has_non_live_confusion(text: str) -> bool: - for line in text.splitlines(): - lowered = line.lower() - is_guardrail = ( - "do not" in lowered or "don't" in lowered or lowered.lstrip().startswith("- no ") - ) - if "/claim" in lowered and not is_guardrail: - return True - if NON_LIVE_CONFUSION_RE.search(lowered) and not is_guardrail: - return True - return False - - -def _payment_index(data: dict[str, Any]) -> dict[str, dict[str, Any]]: - index: dict[str, dict[str, Any]] = {} - for item in data.get("payments", []): - if not isinstance(item, dict): - continue - submission_url = item.get("submission_url") - if isinstance(submission_url, str) and submission_url: - index[submission_url.rstrip("/")] = item - for bounty in data.get("bounties", []): - if not isinstance(bounty, dict): - continue - for proposal in bounty.get("pending_payout_proposals", []) or []: - if not isinstance(proposal, dict): - continue - submission_url = proposal.get("submission_url") - if isinstance(submission_url, str) and submission_url: - index[submission_url.rstrip("/")] = { - "state": "pending", - "source": "pending_payout_proposal", - "proposal_id": proposal.get("proposal_id"), - "accepted_by": proposal.get("accepted_by"), - "executes_after": proposal.get("executes_after"), - } - awards: list[Any] = [] - for award_key in ("awards", "accepted_awards"): - raw_awards = bounty.get(award_key) or [] - if isinstance(raw_awards, list): - awards.extend(raw_awards) - for award in awards: - if not isinstance(award, dict): - continue - submission_url = award.get("submission_url") - if isinstance(submission_url, str) and submission_url: - index[submission_url.rstrip("/")] = { - "state": "paid", - "source": "proof_backed_award", - "proof_url": award.get("proof_url"), - "ledger_sequence": award.get("ledger_sequence"), - } - return index - - -def _payment_status(issue: dict[str, Any], payments: dict[str, dict[str, Any]]) -> dict[str, Any]: - issue_url = str(issue.get("url") or "").rstrip("/") - payment = payments.get(issue_url) - if payment: - return payment - for comment in _comments(issue): - for url in re.findall(r"https://github\.com/[^\s)]+", comment): - payment = payments.get(url.rstrip("/")) - if payment: - return payment - return {"state": "none"} - - -def _normalize_issue( - raw: dict[str, Any], payments: dict[str, dict[str, Any]] -) -> dict[str, Any] | None: - number = raw.get("number") - if not isinstance(number, int): - return None - body = str(raw.get("body") or "") - labels = _labels(raw) - missing = _missing_sections(body) - text = _combined_text(raw) - warnings: list[str] = [] - if "proposed-work" not in {label.lower() for label in labels}: - warnings.append("missing_proposed_work_label") - if missing: - warnings.append("missing_template_sections") - if _is_vague(body, missing): - warnings.append("vague_or_under_specified") - if ROUTED_RE.search(text): - warnings.append("already_routed_or_accepted") - if REJECTED_RE.search(text): - warnings.append("rejected_or_out_of_scope") - if _has_non_live_confusion(text): - warnings.append("non_live_bounty_confusion") - payment = _payment_status(raw, payments) - if payment.get("state") == "pending": - warnings.append("accepted_pending_payout") - elif payment.get("state") == "paid": - warnings.append("proof_backed_paid") - return { - "number": number, - "title": str(raw.get("title") or ""), - "url": raw.get("url"), - "state": str(raw.get("state") or ""), - "labels": labels, - "missing_sections": missing, - "warnings": warnings, - "payment_status": payment, - "tokens": sorted(_token_set(raw)), - } - - -def _related_groups(proposals: list[dict[str, Any]]) -> list[dict[str, Any]]: - grouped: dict[tuple[str, ...], set[int]] = defaultdict(set) - for index, left in enumerate(proposals): - left_tokens = set(left["tokens"]) - if len(left_tokens) < 3: - continue - for right in proposals[index + 1 :]: - right_tokens = set(right["tokens"]) - common = left_tokens & right_tokens - if len(common) < 3: - continue - if len(common) / min(len(left_tokens), len(right_tokens)) < 0.6: - continue - grouped[tuple(sorted(common))].update({left["number"], right["number"]}) - groups: list[dict[str, Any]] = [] - for tokens, numbers in grouped.items(): - if len(numbers) < 2: - continue - groups.append( - { - "issues": sorted(numbers), - "evidence_tokens": list(tokens), - "suggested_scope": " / ".join(tokens[:6]), - } - ) - return sorted(groups, key=lambda item: (-len(item["issues"]), item["issues"])) - - -def _mark_duplicate_warnings( - proposals: list[dict[str, Any]], related_groups: list[dict[str, Any]] -) -> None: - grouped_numbers = { - number - for group in related_groups - for number in group.get("issues", []) - if isinstance(number, int) - } - for proposal in proposals: - if ( - proposal["number"] in grouped_numbers - and "duplicate_looking_related_proposal" not in proposal["warnings"] - ): - proposal["warnings"].append("duplicate_looking_related_proposal") - - -def analyze_proposed_work(data: dict[str, Any]) -> dict[str, Any]: - payments = _payment_index(data) - data_warnings = [ - warning for warning in data.get("data_warnings", []) if isinstance(warning, str) - ] - proposals = [ - proposal - for raw in data.get("issues", []) - if isinstance(raw, dict) and not _is_non_intake_bounty_issue(raw) - for proposal in [_normalize_issue(raw, payments)] - if proposal is not None - ] - related_groups = _related_groups(proposals) - _mark_duplicate_warnings(proposals, related_groups) - warning_counts: dict[str, int] = defaultdict(int) - payment_counts: dict[str, int] = defaultdict(int) - for proposal in proposals: - payment_counts[str(proposal["payment_status"].get("state") or "none")] += 1 - for warning in proposal["warnings"]: - warning_counts[warning] += 1 - return { - "summary": { - "proposed_work_issues": len(proposals), - "warning_counts": dict(sorted(warning_counts.items())), - "payment_counts": dict(sorted(payment_counts.items())), - "related_groups": len(related_groups), - "data_warnings": data_warnings, - }, - "proposals": proposals, - "related_groups": related_groups, - } - - -def format_markdown(report: dict[str, Any]) -> str: - lines = ["# Proposed Work Triage", ""] - summary = report["summary"] - lines.append(f"- proposed work issues: {summary['proposed_work_issues']}") - lines.append(f"- related groups: {summary['related_groups']}") - for state, count in summary["payment_counts"].items(): - lines.append(f"- {state} payment status: {count}") - for warning in summary.get("data_warnings", []): - lines.append(f"- data warning: {warning}") - lines.append("") - lines.append("## Issues") - for item in report["proposals"]: - warnings = ", ".join(item["warnings"]) if item["warnings"] else "none" - missing = ", ".join(item["missing_sections"]) if item["missing_sections"] else "none" - payment = item["payment_status"].get("state", "none") - lines.append(f"- #{item['number']} {item['title']} ({payment})") - lines.append(f" - warnings: {warnings}") - lines.append(f" - missing sections: {missing}") - if item.get("url"): - lines.append(f" - url: {item['url']}") - if report["related_groups"]: - lines.append("") - lines.append("## Related Groups") - for group in report["related_groups"]: - issues = ", ".join(f"#{number}" for number in group["issues"]) - evidence = ", ".join(group["evidence_tokens"]) - lines.append(f"- {issues}: {group['suggested_scope']} ({evidence})") - return "\n".join(lines) - - -def _run_gh(args: list[str]) -> Any: - if len(args) < 2 or tuple(args[:2]) not in READ_ONLY_GH_COMMANDS: - allowed = ", ".join(" ".join(command) for command in sorted(READ_ONLY_GH_COMMANDS)) - raise RuntimeError(f"live mode only permits read-only gh commands: {allowed}") - try: - result = subprocess.run( - ["gh", *args], - capture_output=True, - text=True, - timeout=GH_TIMEOUT_SECONDS, - check=False, - ) - except subprocess.TimeoutExpired as exc: - raise RuntimeError(f"gh command timed out after {GH_TIMEOUT_SECONDS}s") from exc - if result.returncode != 0: - raise RuntimeError(result.stderr.strip() or result.stdout.strip()) - try: - return json.loads(result.stdout) - except json.JSONDecodeError as exc: - excerpt = result.stdout[:200].strip() - raise RuntimeError(f"gh returned invalid JSON: {excerpt}") from exc - - -def _gh_issue_search(repo: str, query: str, limit: int) -> list[dict[str, Any]]: - rows = _run_gh( - [ - "issue", - "list", - "--repo", - repo, - "--state", - "all", - "--limit", - str(limit), - "--search", - query, - "--json", - "number", - ] - ) - if not isinstance(rows, list): - raise RuntimeError("gh issue list returned non-list JSON") - return [cast(dict[str, Any], row) for row in rows if isinstance(row, dict)] - - -def _load_public_json(url: str) -> Any: - request = urllib.request.Request(url, headers={"User-Agent": "mergework-proposed-work-triage"}) - with urllib.request.urlopen(request, timeout=HTTP_TIMEOUT_SECONDS) as response: - return json.load(response) - - -def _load_public_bounty_issue( - api_host: str, issue_number: int -) -> tuple[list[dict[str, Any]], list[str]]: - query = urllib.parse.urlencode({"issue_number": str(issue_number), "limit": "5"}) - warnings: list[str] = [] - try: - rows = _load_public_json(f"{api_host.rstrip('/')}/api/v1/bounties?{query}") - except (OSError, urllib.error.URLError, json.JSONDecodeError) as exc: - warnings.append( - "payment_state_incomplete: failed to load public bounty list " - f"for issue #{issue_number} ({type(exc).__name__})" - ) - return [], warnings - bounties: list[dict[str, Any]] = [] - for row in rows if isinstance(rows, list) else []: - if not isinstance(row, dict): - continue - bounty_id = row.get("id") - if not isinstance(bounty_id, int): - continue - try: - detail = _load_public_json(f"{api_host.rstrip('/')}/api/v1/bounties/{bounty_id}") - except (OSError, urllib.error.URLError, json.JSONDecodeError) as exc: - warnings.append( - "payment_state_incomplete: failed to load public bounty " - f"detail for bounty {bounty_id}; using list row only ({type(exc).__name__})" - ) - detail = row - if isinstance(detail, dict): - bounties.append(detail) - return bounties, warnings - - -def _load_public_bounty_issues( - api_host: str, issue_numbers: list[int] | tuple[int, ...] -) -> tuple[list[dict[str, Any]], list[str]]: - bounties: list[dict[str, Any]] = [] - warnings: list[str] = [] - for issue_number in issue_numbers: - issue_bounties, issue_warnings = _load_public_bounty_issue(api_host, issue_number) - bounties.extend(issue_bounties) - warnings.extend(issue_warnings) - return bounties, warnings - - -def _positive_issue_number(value: str) -> int: - try: - issue_number = int(value) - except ValueError as exc: - raise argparse.ArgumentTypeError("payment bounty issue must be an integer") from exc - if issue_number <= 0: - raise argparse.ArgumentTypeError("payment bounty issue must be positive") - return issue_number - - -def load_live_issues( - repo: str, - limit: int, - api_host: str = DEFAULT_API_HOST, - payment_bounty_issue_numbers: list[int] | tuple[int, ...] = ( - DEFAULT_PAYMENT_BOUNTY_ISSUE_NUMBERS - ), -) -> dict[str, Any]: - rows_by_number: dict[int, dict[str, Any]] = {} - per_search_limit = max(1, limit) - for query in LIVE_ISSUE_SEARCHES: - for row in _gh_issue_search(repo, query, per_search_limit): - number = row.get("number") - if isinstance(number, int): - rows_by_number[number] = row - issues: list[dict[str, Any]] = [] - for number in sorted(rows_by_number, reverse=True)[:limit]: - issue = _run_gh( - [ - "issue", - "view", - str(number), - "--repo", - repo, - "--comments", - "--json", - "number,title,url,body,labels,state,comments,author,createdAt,updatedAt", - ] - ) - issues.append(issue) - bounties, data_warnings = _load_public_bounty_issues(api_host, payment_bounty_issue_numbers) - return { - "issues": issues, - "bounties": bounties, - "data_warnings": data_warnings, - } - - -def main(argv: list[str] | None = None) -> int: - parser = argparse.ArgumentParser(description="Read-only proposed-work intake triage report") - source = parser.add_mutually_exclusive_group(required=True) - source.add_argument("--input", type=Path, help="Offline JSON fixture with issues/payments") - source.add_argument("--repo", help="GitHub repo for read-only gh live mode") - parser.add_argument("--limit", type=_positive_int, default=50) - parser.add_argument("--format", choices=("json", "markdown"), default="markdown") - parser.add_argument("--api-host", default=DEFAULT_API_HOST, type=public_api_host) - parser.add_argument( - "--payment-bounty-issue", - action="append", - default=None, - type=_positive_issue_number, - help=( - "GitHub issue number for an accepted-proposed-work bounty whose public " - "payment state should be loaded. Repeat during round transitions." - ), - ) - args = parser.parse_args(argv) - if args.input and args.payment_bounty_issue: - parser.error("--payment-bounty-issue is only valid in live --repo mode") - - data = ( - json.loads(args.input.read_text(encoding="utf-8")) - if args.input - else load_live_issues( - args.repo, - args.limit, - api_host=args.api_host, - payment_bounty_issue_numbers=( - args.payment_bounty_issue or list(DEFAULT_PAYMENT_BOUNTY_ISSUE_NUMBERS) - ), - ) - ) - report = analyze_proposed_work(data) - if args.format == "json": - print(json.dumps(report, indent=2, sort_keys=True)) - else: - print(format_markdown(report)) - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) +from __future__ import annotations + +import argparse +import json +import re +import subprocess +import sys +import urllib.error +import urllib.parse +import urllib.request +from collections import defaultdict +from pathlib import Path +from typing import Any, cast + +if __package__ in {None, ""}: + sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +from scripts.api_host_args import public_api_host + +GH_TIMEOUT_SECONDS = 30 + + +def _positive_int(value: str) -> int: + """Argparse type that rejects --limit values below 1. + + A non-positive limit silently changes result size via Python slice + semantics (e.g. ``[:0]`` returns nothing, ``[:-1]`` drops the last row), + producing a successful but misleading report instead of a clear error. + """ + try: + parsed = int(value) + except (TypeError, ValueError): + raise argparse.ArgumentTypeError(f"expected an integer, got {value!r}") from None + if parsed < 1: + raise argparse.ArgumentTypeError(f"must be >= 1, got {parsed}") + return parsed + + +REQUIRED_TEMPLATE_SECTIONS = { + "problem": ("problem", "current problem"), + "evidence": ("evidence", "current evidence"), + "proposed_work": ("proposed work", "proposal"), + "value": ("value", "expected value"), + "acceptance": ("acceptance", "acceptance criteria", "possible acceptance criteria"), + "tests": ( + "tests", + "evidence or tests required", + "tests required", + "test notes", + "verification", + ), + "duplicate_search": ("duplicate search", "duplicates"), + "out_of_scope": ("out of scope",), +} +ROUTED_RE = re.compile( + r"\b(accepted by|accepted and|routed|created bounty|create_bounty|" + r"treasury proposal|reserved on mergework)\b", + re.IGNORECASE, +) +REJECTED_RE = re.compile( + r"\b(rejected|declined|not accepted|outside accepted scope)\b", re.IGNORECASE +) +NON_LIVE_CONFUSION_RE = re.compile( + r"(claimable now|already paid|guaranteed payout|cash[- ]?out|off[- ]?ramp)", + re.IGNORECASE, +) +BOUNTY_TITLE_RE = re.compile(r"^\s*MRWK bounty\s*:", re.IGNORECASE) +BOUNTY_TEMPLATE_HEADING_RE = re.compile(r"(^|\n)\s*#+\s*MRWK bounty\b", re.IGNORECASE) +WORD_RE = re.compile(r"[a-z0-9]+") +STOPWORDS = { + "add", + "and", + "bounty", + "for", + "from", + "issue", + "mrwk", + "proposed", + "request", + "the", + "to", + "work", +} +HTTP_TIMEOUT_SECONDS = 30 +DEFAULT_API_HOST = "https://api.mrwk.online" +DEFAULT_PAYMENT_BOUNTY_ISSUE_NUMBERS = (649, 722) +LIVE_ISSUE_SEARCHES = ( + "label:proposed-work", + '"proposed work"', +) +READ_ONLY_GH_COMMANDS = { + ("issue", "list"), + ("issue", "view"), +} + + +def _labels(raw: dict[str, Any]) -> list[str]: + labels = raw.get("labels", []) + names: list[str] = [] + for label in labels: + if isinstance(label, str): + names.append(label) + elif isinstance(label, dict) and isinstance(label.get("name"), str): + names.append(label["name"]) + return names + + +def _comments(raw: dict[str, Any]) -> list[str]: + comments = raw.get("comments", []) + bodies: list[str] = [] + for comment in comments: + if isinstance(comment, str): + bodies.append(comment) + elif isinstance(comment, dict) and isinstance(comment.get("body"), str): + bodies.append(comment["body"]) + return bodies + + +def _combined_text(issue: dict[str, Any]) -> str: + parts = [str(issue.get("title") or ""), str(issue.get("body") or "")] + parts.extend(_comments(issue)) + return "\n".join(parts) + + +def _is_non_intake_bounty_issue(issue: dict[str, Any]) -> bool: + labels = {label.lower() for label in _labels(issue)} + if "mrwk:bounty" in labels: + return True + title = str(issue.get("title") or "") + if BOUNTY_TITLE_RE.search(title): + return True + body = str(issue.get("body") or "") + return bool(BOUNTY_TEMPLATE_HEADING_RE.search(body)) + + +def _has_section(body: str, aliases: tuple[str, ...]) -> bool: + lowered = body.lower() + for alias in aliases: + if re.search(rf"(^|\n)\s*#+\s*{re.escape(alias)}\b", lowered): + return True + if re.search(rf"(^|\n)\s*(?:-\s*)?\*\*{re.escape(alias)}\*\*", lowered): + return True + return False + + +def _missing_sections(body: str) -> list[str]: + return [ + key + for key, aliases in REQUIRED_TEMPLATE_SECTIONS.items() + if not _has_section(body, aliases) + ] + + +def _token_set(issue: dict[str, Any]) -> set[str]: + text = str(issue.get("title") or "").lower() + return {word for word in WORD_RE.findall(text) if len(word) > 3 and word not in STOPWORDS} + + +def _is_vague(body: str, missing_sections: list[str]) -> bool: + return len(body.split()) < 45 or len(missing_sections) >= 4 + + +def _has_non_live_confusion(text: str) -> bool: + for line in text.splitlines(): + lowered = line.lower() + is_guardrail = ( + "do not" in lowered or "don't" in lowered or lowered.lstrip().startswith("- no ") + ) + if "/claim" in lowered and not is_guardrail: + return True + if NON_LIVE_CONFUSION_RE.search(lowered) and not is_guardrail: + return True + return False + + +def _payment_index(data: dict[str, Any]) -> dict[str, dict[str, Any]]: + index: dict[str, dict[str, Any]] = {} + for item in data.get("payments", []): + if not isinstance(item, dict): + continue + submission_url = item.get("submission_url") + if isinstance(submission_url, str) and submission_url: + index[submission_url.rstrip("/")] = item + for bounty in data.get("bounties", []): + if not isinstance(bounty, dict): + continue + for proposal in bounty.get("pending_payout_proposals", []) or []: + if not isinstance(proposal, dict): + continue + submission_url = proposal.get("submission_url") + if isinstance(submission_url, str) and submission_url: + index[submission_url.rstrip("/")] = { + "state": "pending", + "source": "pending_payout_proposal", + "proposal_id": proposal.get("proposal_id"), + "accepted_by": proposal.get("accepted_by"), + "executes_after": proposal.get("executes_after"), + } + awards: list[Any] = [] + for award_key in ("awards", "accepted_awards"): + raw_awards = bounty.get(award_key) or [] + if isinstance(raw_awards, list): + awards.extend(raw_awards) + for award in awards: + if not isinstance(award, dict): + continue + submission_url = award.get("submission_url") + if isinstance(submission_url, str) and submission_url: + index[submission_url.rstrip("/")] = { + "state": "paid", + "source": "proof_backed_award", + "proof_url": award.get("proof_url"), + "ledger_sequence": award.get("ledger_sequence"), + } + return index + + +def _payment_status(issue: dict[str, Any], payments: dict[str, dict[str, Any]]) -> dict[str, Any]: + issue_url = str(issue.get("url") or "").rstrip("/") + payment = payments.get(issue_url) + if payment: + return payment + for comment in _comments(issue): + for url in re.findall(r"https://github\.com/[^\s)]+", comment): + payment = payments.get(url.rstrip("/")) + if payment: + return payment + return {"state": "none"} + + +def _normalize_issue( + raw: dict[str, Any], payments: dict[str, dict[str, Any]] +) -> dict[str, Any] | None: + number = raw.get("number") + if not isinstance(number, int): + return None + body = str(raw.get("body") or "") + labels = _labels(raw) + missing = _missing_sections(body) + text = _combined_text(raw) + warnings: list[str] = [] + if "proposed-work" not in {label.lower() for label in labels}: + warnings.append("missing_proposed_work_label") + if missing: + warnings.append("missing_template_sections") + if _is_vague(body, missing): + warnings.append("vague_or_under_specified") + if ROUTED_RE.search(text): + warnings.append("already_routed_or_accepted") + if REJECTED_RE.search(text): + warnings.append("rejected_or_out_of_scope") + if _has_non_live_confusion(text): + warnings.append("non_live_bounty_confusion") + payment = _payment_status(raw, payments) + if payment.get("state") == "pending": + warnings.append("accepted_pending_payout") + elif payment.get("state") == "paid": + warnings.append("proof_backed_paid") + return { + "number": number, + "title": str(raw.get("title") or ""), + "url": raw.get("url"), + "state": str(raw.get("state") or ""), + "labels": labels, + "missing_sections": missing, + "warnings": warnings, + "payment_status": payment, + "tokens": sorted(_token_set(raw)), + } + + +def _related_groups(proposals: list[dict[str, Any]]) -> list[dict[str, Any]]: + grouped: dict[tuple[str, ...], set[int]] = defaultdict(set) + for index, left in enumerate(proposals): + left_tokens = set(left["tokens"]) + if len(left_tokens) < 3: + continue + for right in proposals[index + 1 :]: + right_tokens = set(right["tokens"]) + common = left_tokens & right_tokens + if len(common) < 3: + continue + if len(common) / min(len(left_tokens), len(right_tokens)) < 0.6: + continue + grouped[tuple(sorted(common))].update({left["number"], right["number"]}) + groups: list[dict[str, Any]] = [] + for tokens, numbers in grouped.items(): + if len(numbers) < 2: + continue + groups.append( + { + "issues": sorted(numbers), + "evidence_tokens": list(tokens), + "suggested_scope": " / ".join(tokens[:6]), + } + ) + return sorted(groups, key=lambda item: (-len(item["issues"]), item["issues"])) + + +def _mark_duplicate_warnings( + proposals: list[dict[str, Any]], related_groups: list[dict[str, Any]] +) -> None: + grouped_numbers = { + number + for group in related_groups + for number in group.get("issues", []) + if isinstance(number, int) + } + for proposal in proposals: + if ( + proposal["number"] in grouped_numbers + and "duplicate_looking_related_proposal" not in proposal["warnings"] + ): + proposal["warnings"].append("duplicate_looking_related_proposal") + + +def analyze_proposed_work(data: dict[str, Any]) -> dict[str, Any]: + payments = _payment_index(data) + data_warnings = [ + warning for warning in data.get("data_warnings", []) if isinstance(warning, str) + ] + proposals = [ + proposal + for raw in data.get("issues", []) + if isinstance(raw, dict) and not _is_non_intake_bounty_issue(raw) + for proposal in [_normalize_issue(raw, payments)] + if proposal is not None + ] + related_groups = _related_groups(proposals) + _mark_duplicate_warnings(proposals, related_groups) + warning_counts: dict[str, int] = defaultdict(int) + payment_counts: dict[str, int] = defaultdict(int) + for proposal in proposals: + payment_counts[str(proposal["payment_status"].get("state") or "none")] += 1 + for warning in proposal["warnings"]: + warning_counts[warning] += 1 + return { + "summary": { + "proposed_work_issues": len(proposals), + "warning_counts": dict(sorted(warning_counts.items())), + "payment_counts": dict(sorted(payment_counts.items())), + "related_groups": len(related_groups), + "data_warnings": data_warnings, + }, + "proposals": proposals, + "related_groups": related_groups, + } + + +def format_markdown(report: dict[str, Any]) -> str: + lines = ["# Proposed Work Triage", ""] + summary = report["summary"] + lines.append(f"- proposed work issues: {summary['proposed_work_issues']}") + lines.append(f"- related groups: {summary['related_groups']}") + for state, count in summary["payment_counts"].items(): + lines.append(f"- {state} payment status: {count}") + for warning in summary.get("data_warnings", []): + lines.append(f"- data warning: {warning}") + lines.append("") + lines.append("## Issues") + for item in report["proposals"]: + warnings = ", ".join(item["warnings"]) if item["warnings"] else "none" + missing = ", ".join(item["missing_sections"]) if item["missing_sections"] else "none" + payment = item["payment_status"].get("state", "none") + lines.append(f"- #{item['number']} {item['title']} ({payment})") + lines.append(f" - warnings: {warnings}") + lines.append(f" - missing sections: {missing}") + if item.get("url"): + lines.append(f" - url: {item['url']}") + if report["related_groups"]: + lines.append("") + lines.append("## Related Groups") + for group in report["related_groups"]: + issues = ", ".join(f"#{number}" for number in group["issues"]) + evidence = ", ".join(group["evidence_tokens"]) + lines.append(f"- {issues}: {group['suggested_scope']} ({evidence})") + return "\n".join(lines) + + +def _run_gh(args: list[str]) -> Any: + if len(args) < 2 or tuple(args[:2]) not in READ_ONLY_GH_COMMANDS: + allowed = ", ".join(" ".join(command) for command in sorted(READ_ONLY_GH_COMMANDS)) + raise RuntimeError(f"live mode only permits read-only gh commands: {allowed}") + try: + result = subprocess.run( + ["gh", *args], + capture_output=True, + text=True, + timeout=GH_TIMEOUT_SECONDS, + check=False, + ) + except subprocess.TimeoutExpired as exc: + raise RuntimeError(f"gh command timed out after {GH_TIMEOUT_SECONDS}s") from exc + if result.returncode != 0: + raise RuntimeError(result.stderr.strip() or result.stdout.strip()) + try: + return json.loads(result.stdout) + except json.JSONDecodeError as exc: + excerpt = result.stdout[:200].strip() + raise RuntimeError(f"gh returned invalid JSON: {excerpt}") from exc + + +def _gh_issue_search(repo: str, query: str, limit: int) -> list[dict[str, Any]]: + rows = _run_gh( + [ + "issue", + "list", + "--repo", + repo, + "--state", + "all", + "--limit", + str(limit), + "--search", + query, + "--json", + "number", + ] + ) + if not isinstance(rows, list): + raise RuntimeError("gh issue list returned non-list JSON") + return [cast(dict[str, Any], row) for row in rows if isinstance(row, dict)] + + +def _load_public_json(url: str) -> Any: + request = urllib.request.Request(url, headers={"User-Agent": "mergework-proposed-work-triage"}) + with urllib.request.urlopen(request, timeout=HTTP_TIMEOUT_SECONDS) as response: + return json.load(response) + + +def _load_public_bounty_issue( + api_host: str, issue_number: int +) -> tuple[list[dict[str, Any]], list[str]]: + query = urllib.parse.urlencode({"issue_number": str(issue_number), "limit": "5"}) + warnings: list[str] = [] + try: + rows = _load_public_json(f"{api_host.rstrip('/')}/api/v1/bounties?{query}") + except (OSError, urllib.error.URLError, json.JSONDecodeError) as exc: + warnings.append( + "payment_state_incomplete: failed to load public bounty list " + f"for issue #{issue_number} ({type(exc).__name__})" + ) + return [], warnings + bounties: list[dict[str, Any]] = [] + for row in rows if isinstance(rows, list) else []: + if not isinstance(row, dict): + continue + bounty_id = row.get("id") + if not isinstance(bounty_id, int): + continue + try: + detail = _load_public_json(f"{api_host.rstrip('/')}/api/v1/bounties/{bounty_id}") + except (OSError, urllib.error.URLError, json.JSONDecodeError) as exc: + warnings.append( + "payment_state_incomplete: failed to load public bounty " + f"detail for bounty {bounty_id}; using list row only ({type(exc).__name__})" + ) + detail = row + if isinstance(detail, dict): + bounties.append(detail) + return bounties, warnings + + +def _load_public_bounty_issues( + api_host: str, issue_numbers: list[int] | tuple[int, ...] +) -> tuple[list[dict[str, Any]], list[str]]: + bounties: list[dict[str, Any]] = [] + warnings: list[str] = [] + for issue_number in issue_numbers: + issue_bounties, issue_warnings = _load_public_bounty_issue(api_host, issue_number) + bounties.extend(issue_bounties) + warnings.extend(issue_warnings) + return bounties, warnings + + +def _positive_issue_number(value: str) -> int: + try: + issue_number = int(value) + except ValueError as exc: + raise argparse.ArgumentTypeError("payment bounty issue must be an integer") from exc + if issue_number <= 0: + raise argparse.ArgumentTypeError("payment bounty issue must be positive") + return issue_number + + +def load_live_issues( + repo: str, + limit: int, + api_host: str = DEFAULT_API_HOST, + payment_bounty_issue_numbers: list[int] | tuple[int, ...] = ( + DEFAULT_PAYMENT_BOUNTY_ISSUE_NUMBERS + ), +) -> dict[str, Any]: + rows_by_number: dict[int, dict[str, Any]] = {} + per_search_limit = max(1, limit) + for query in LIVE_ISSUE_SEARCHES: + for row in _gh_issue_search(repo, query, per_search_limit): + number = row.get("number") + if isinstance(number, int): + rows_by_number[number] = row + issues: list[dict[str, Any]] = [] + for number in sorted(rows_by_number, reverse=True)[:limit]: + issue = _run_gh( + [ + "issue", + "view", + str(number), + "--repo", + repo, + "--comments", + "--json", + "number,title,url,body,labels,state,comments,author,createdAt,updatedAt", + ] + ) + issues.append(issue) + bounties, data_warnings = _load_public_bounty_issues(api_host, payment_bounty_issue_numbers) + return { + "issues": issues, + "bounties": bounties, + "data_warnings": data_warnings, + } + + +def _require_non_empty_arg(parser: argparse.ArgumentParser, option_name: str, value: str) -> str: + stripped = value.strip() + if not stripped: + parser.error(f"{option_name} must be a non-empty value") + if stripped != value: + parser.error(f"{option_name} must not include leading or trailing whitespace") + return value + + +def _require_input_path(parser: argparse.ArgumentParser, path: Path) -> Path: + raw = str(path) + if not raw.strip(): + parser.error("--input must be a non-empty path") + if raw != raw.strip(): + parser.error("--input must not include leading or trailing whitespace") + return path + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description="Read-only proposed-work intake triage report") + source = parser.add_mutually_exclusive_group(required=True) + source.add_argument("--input", type=Path, help="Offline JSON fixture with issues/payments") + source.add_argument("--repo", help="GitHub repo for read-only gh live mode") + parser.add_argument("--limit", type=_positive_int, default=50) + parser.add_argument("--format", choices=("json", "markdown"), default="markdown") + parser.add_argument("--api-host", default=DEFAULT_API_HOST, type=public_api_host) + parser.add_argument( + "--payment-bounty-issue", + action="append", + default=None, + type=_positive_issue_number, + help=( + "GitHub issue number for an accepted-proposed-work bounty whose public " + "payment state should be loaded. Repeat during round transitions." + ), + ) + args = parser.parse_args(argv) + if args.input and args.payment_bounty_issue: + parser.error("--payment-bounty-issue is only valid in live --repo mode") + + if args.input is not None: + input_path = _require_input_path(parser, args.input) + data = json.loads(input_path.read_text(encoding="utf-8")) + else: + repo = _require_non_empty_arg(parser, "--repo", args.repo) + data = load_live_issues( + repo, + args.limit, + api_host=args.api_host, + payment_bounty_issue_numbers=( + args.payment_bounty_issue or list(DEFAULT_PAYMENT_BOUNTY_ISSUE_NUMBERS) + ), + ) + report = analyze_proposed_work(data) + if args.format == "json": + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print(format_markdown(report)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_claim_inventory.py b/tests/test_claim_inventory.py index e0d4595f..31c44453 100644 --- a/tests/test_claim_inventory.py +++ b/tests/test_claim_inventory.py @@ -1,451 +1,560 @@ -from __future__ import annotations - -import json -import subprocess -import sys -from pathlib import Path - -import pytest - -from scripts import claim_inventory -from scripts.claim_inventory import analyze_inventory, format_markdown_report, main - -ROOT = Path(__file__).resolve().parents[1] - - -def test_claim_inventory_uses_canonical_api_host_by_default() -> None: - assert claim_inventory.DEFAULT_API_HOST == "https://api.mrwk.online" - - -def _fixture() -> dict[str, object]: - return { - "bounties": [ - { - "id": 85, - "issue_number": 578, - "status": "open", - "awards_remaining": 30, - "pending_payout_proposals": [ - { - "proposal_id": 67, - "executes_after": "2026-06-01T11:41:45Z", - "to_account": "github:pending-reviewer", - "bounty_id": 85, - "accepted_by": "ramimbo", - "submission_url": ( - "https://github.com/ramimbo/mergework/pull/620#pullrequestreview-67" - ), - } - ], - }, - {"id": 87, "issue_number": 581, "status": "open", "awards_remaining": 1}, - ], - "proofs": [ - { - "source_url": "https://github.com/ramimbo/mergework/pull/452#pullrequestreview-1", - "proof_url": "/proofs/abc123", - }, - { - "source_url": "https://github.com/ramimbo/mergework/pull/581#discussion_r1", - "proof_url": "/proofs/discussion-r1", - }, - ], - "recent": [ - { - "submission_url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-4", - "proof_url": "/proofs/recent-paid", - "bounty_issue_number": 578, - "bounty_id": 85, - "ledger_sequence": 42, - } - ], - "issues": [ - { - "number": 578, - "title": "MRWK bounty: review open MergeWork PRs with evidence", - "url": "https://github.com/ramimbo/mergework/issues/578", - "labels": ["mrwk:bounty"], - "author": {"login": "ramimbo"}, - "comments": [ - { - "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-1", - "author": {"login": "eliasx45"}, - "body": ( - "/claim " - "https://github.com/ramimbo/mergework/pull/452#pullrequestreview-1\n" - "Reviewed PR #452 with tests." - ), - }, - { - "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-2", - "author": {"login": "other-reviewer"}, - "body": ( - "/claim " - "https://github.com/ramimbo/mergework/pull/533#issuecomment-2\n" - "Duplicate review claim." - ), - }, - { - "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-3", - "author": {"login": "smoke-checker"}, - "body": ( - "Smoke-check claim: " - "https://github.com/ramimbo/mergework/pull/533#issuecomment-2 " - "works on the public activity page." - ), - }, - { - "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-4", - "author": {"login": "recent-winner"}, - "body": "/claim https://github.com/ramimbo/mergework/pull/700", - }, - { - "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-5", - "author": {"login": "pending-reviewer"}, - "body": ( - "/claim " - "https://github.com/ramimbo/mergework/" - "pull/620#pullrequestreview-67\n" - "Reviewed current head while payout is pending." - ), - }, - { - "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-6", - "author": {"login": "pending-reviewer"}, - "body": ( - "Accepted review evidence: " - "https://github.com/ramimbo/mergework/" - "pull/620#pullrequestreview-67" - ), - }, - ], - } - ], - "pull_requests": [ - { - "number": 581, - "title": "Refs #581: Add claim inventory report", - "url": "https://github.com/ramimbo/mergework/pull/581", - "author": {"login": "jakerated-r"}, - "body": "Refs #581\n\nAdds scripts/claim_inventory.py.", - "comments": [ - { - "url": "https://github.com/ramimbo/mergework/pull/581#issuecomment-1", - "author": {"login": "reviewer"}, - "body": "Looks good after docs smoke.", - } - ], - "reviews": [ - { - "url": "https://github.com/ramimbo/mergework/pull/581#pullrequestreview-9", - "author": {"login": "reviewer"}, - "body": "Reviewed the fixture mode and markdown output.", - } - ], - "review_comments": [ - { - "url": "https://github.com/ramimbo/mergework/pull/581#discussion_r1", - "author": {"login": "inline-reviewer"}, - "body": "The markdown claim row still looks traceable.", - } - ], - }, - { - "number": 582, - "title": "Refs #581: Add another inventory report", - "url": "https://github.com/ramimbo/mergework/pull/582", - "author": {"login": "jakerated-r"}, - "body": "Claiming another read-only report.", - "comments": [ - { - "author": {"login": "maintainer"}, - "body": "Looks fine.", - } - ], - }, - { - "number": 999, - "title": "Small cleanup with no bounty link", - "url": "https://github.com/ramimbo/mergework/pull/999", - "author": {"login": "unknown"}, - "body": "Claiming this small cleanup, but no bounty reference is included.", - }, - { - "number": 1000, - "title": "Refs #9999: unknown bounty", - "url": "https://github.com/ramimbo/mergework/pull/1000", - "author": {"login": "unknown"}, - "body": "Refs #9999\n\nValidation: pytest passed.", - }, - ], - } - - -def test_claim_inventory_classifies_required_statuses(tmp_path, capsys) -> None: - report = analyze_inventory(_fixture(), api_host="https://api.example.test") - - rows = {row["source_url"]: row for row in report["rows"]} - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-1"]["likely_status"] - == "already_paid" - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-4"]["likely_status"] - == "already_paid" - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-4"]["proof_url"] - == "https://api.example.test/proofs/recent-paid" - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-2"]["likely_status"] - == "duplicate_candidate" - ) - assert ( - rows["https://github.com/ramimbo/mergework/pull/999"]["likely_status"] - == "missing_bounty_ref" - ) - assert ( - rows["https://github.com/ramimbo/mergework/pull/1000"]["likely_status"] == "unknown_bounty" - ) - assert rows["https://github.com/ramimbo/mergework/pull/581"]["bounty_id"] == 87 - assert ( - rows["https://github.com/ramimbo/mergework/pull/581#discussion_r1"]["source_type"] - == "pull_request_review_comment" - ) - assert ( - rows["https://github.com/ramimbo/mergework/pull/581#discussion_r1"]["proof_url"] - == "https://api.example.test/proofs/discussion-r1" - ) - assert ( - rows["https://github.com/ramimbo/mergework/pull/582"]["likely_status"] == "unpaid_candidate" - ) - assert rows["https://github.com/ramimbo/mergework/pull/582"]["pending_proposal_id"] is None - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-3"]["source_type"] - == "bounty_issue_comment" - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"]["likely_status"] - == "pending_payout" - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-6"]["likely_status"] - == "pending_payout" - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"][ - "pending_proposal_id" - ] - == 67 - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"][ - "pending_proposal_url" - ] - == "https://api.example.test/api/v1/treasury/proposals/67" - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"][ - "pending_executes_after" - ] - == "2026-06-01T11:41:45Z" - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"]["pending_to_account"] - == "github:pending-reviewer" - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"]["pending_bounty_id"] - == 85 - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"][ - "pending_accepted_by" - ] - == "ramimbo" - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"][ - "pending_submission_url" - ] - == "https://github.com/ramimbo/mergework/pull/620#pullrequestreview-67" - ) - assert report["summary"]["pending_payout"] == 2 - assert set(report["likely_status_enum"]) >= { - "already_paid", - "pending_payout", - "unpaid_candidate", - "duplicate_candidate", - "missing_bounty_ref", - "unknown_bounty", - "ignored_or_unclear", - } - - input_path = tmp_path / "claims.json" - input_path.write_text(json.dumps(_fixture()), encoding="utf-8") - assert main(["--input", str(input_path), "--format", "json"]) == 0 - output = json.loads(capsys.readouterr().out) - assert output["summary"]["already_paid"] == 3 - - -def test_claim_inventory_markdown_report_is_pasteable() -> None: - markdown = format_markdown_report(analyze_inventory(_fixture())) - - assert "## Claim Inventory" in markdown - assert "| Status | Bounty | Claimant | Type | Source | Proof/Pending |" in markdown - assert "`already_paid`" in markdown - assert "`pending_payout`" in markdown - assert "https://api.mrwk.online/proofs/abc123" in markdown - assert "https://api.mrwk.online/api/v1/treasury/proposals/67" in markdown - - -def test_claim_inventory_paid_proof_overrides_pending_payout() -> None: - fixture = _fixture() - proofs = fixture["proofs"] - assert isinstance(proofs, list) - proofs.append( - { - "source_url": "https://github.com/ramimbo/mergework/pull/620#pullrequestreview-67", - "proof_url": "/proofs/pending-later-paid", - } - ) - - report = analyze_inventory(fixture, api_host="https://api.example.test") - rows = {row["source_url"]: row for row in report["rows"]} - - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"]["likely_status"] - == "already_paid" - ) - assert ( - rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"]["proof_url"] - == "https://api.example.test/proofs/pending-later-paid" - ) - - -def test_claim_inventory_live_mode_uses_read_only_calls(monkeypatch) -> None: - calls: list[list[str]] = [] - - def fake_run_gh_json(args: list[str]) -> object: - calls.append(args) - if args[:3] == ["gh", "issue", "list"]: - return [ - { - "number": 581, - "title": "MRWK bounty: claim inventory", - "url": "https://github.com/ramimbo/mergework/issues/581", - "labels": [{"name": "mrwk:bounty"}], - "author": {"login": "ramimbo"}, - } - ] - if args[:3] == ["gh", "issue", "view"]: - return { - "number": 581, - "title": "MRWK bounty: claim inventory", - "url": "https://github.com/ramimbo/mergework/issues/581", - "body": "Reward: 500 MRWK", - "labels": [{"name": "mrwk:bounty"}], - "author": {"login": "ramimbo"}, - "comments": [], - } - if args[:3] == ["gh", "pr", "list"]: - return [ - { - "number": 582, - "title": "Refs #581: Add inventory", - "url": "https://github.com/ramimbo/mergework/pull/582", - "body": "Refs #581", - "author": {"login": "bot"}, - "labels": [], - } - ] - if args[:3] == ["gh", "pr", "view"]: - return { - "number": 582, - "title": "Refs #581: Add inventory", - "url": "https://github.com/ramimbo/mergework/pull/582", - "body": "Refs #581", - "author": {"login": "bot"}, - "labels": [], - "comments": [], - "reviews": [], - } - if args[:2] == ["gh", "api"]: - return [ - { - "html_url": "https://github.com/ramimbo/mergework/pull/582#discussion_r123", - "user": {"login": "reviewer"}, - "body": "Inline review claim evidence for #581.", - } - ] - raise AssertionError(args) - - monkeypatch.setattr(claim_inventory, "_run_gh_json", fake_run_gh_json) - monkeypatch.setattr( - claim_inventory, - "load_public_api_state", - lambda api_host: { - "bounties": [{"id": 87, "issue_number": 581, "status": "open", "awards_remaining": 1}], - "proofs": [], - "recent": [], - }, - ) - - data = claim_inventory.load_live_inventory("ramimbo/mergework", "https://api.example.test") - - assert data["bounties"][0]["id"] == 87 - assert data["pull_requests"][0]["review_comments"] == [ - { - "url": "https://github.com/ramimbo/mergework/pull/582#discussion_r123", - "author": {"login": "reviewer"}, - "body": "Inline review claim evidence for #581.", - } - ] - allowed_prefixes = { - ("gh", "issue", "list"), - ("gh", "issue", "view"), - ("gh", "pr", "list"), - ("gh", "pr", "view"), - ("gh", "api"), - } - assert calls, "expected at least one gh invocation" - assert all( - tuple(call[:3]) in allowed_prefixes or tuple(call[:2]) in allowed_prefixes for call in calls - ), calls - - -def test_run_gh_json_reports_missing_gh(monkeypatch) -> None: - def missing_gh(*args, **kwargs): - raise FileNotFoundError("gh") - - monkeypatch.setattr(claim_inventory.subprocess, "run", missing_gh) - - try: - claim_inventory._run_gh_json(["gh", "issue", "list"]) - except RuntimeError as exc: - assert "GitHub CLI executable 'gh' was not found" in str(exc) - assert "live --repo mode" in str(exc) - else: - raise AssertionError("expected missing gh RuntimeError") - - -def test_claim_inventory_script_entrypoint_loads_shared_parser() -> None: - result = subprocess.run( - [sys.executable, "scripts/claim_inventory.py", "--help"], - cwd=ROOT, - capture_output=True, - text=True, - check=False, - ) - - assert result.returncode == 0 - assert "usage:" in result.stdout - - -def test_claim_inventory_rejects_invalid_api_host(capsys) -> None: - for bad in ("", " ", "/relative", "ftp://api.example.test"): - with pytest.raises(SystemExit) as excinfo: - main(["--repo", "ramimbo/mergework", "--api-host", bad]) - assert excinfo.value.code == 2 - assert "api host must" in capsys.readouterr().err +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +import pytest + +from scripts import claim_inventory +from scripts.claim_inventory import analyze_inventory, format_markdown_report, main + +ROOT = Path(__file__).resolve().parents[1] + + +def test_claim_inventory_uses_canonical_api_host_by_default() -> None: + assert claim_inventory.DEFAULT_API_HOST == "https://api.mrwk.online" + + +def _fixture() -> dict[str, object]: + return { + "bounties": [ + { + "id": 85, + "issue_number": 578, + "status": "open", + "awards_remaining": 30, + "pending_payout_proposals": [ + { + "proposal_id": 67, + "executes_after": "2026-06-01T11:41:45Z", + "to_account": "github:pending-reviewer", + "bounty_id": 85, + "accepted_by": "ramimbo", + "submission_url": ( + "https://github.com/ramimbo/mergework/pull/620#pullrequestreview-67" + ), + } + ], + }, + {"id": 87, "issue_number": 581, "status": "open", "awards_remaining": 1}, + ], + "proofs": [ + { + "source_url": "https://github.com/ramimbo/mergework/pull/452#pullrequestreview-1", + "proof_url": "/proofs/abc123", + }, + { + "source_url": "https://github.com/ramimbo/mergework/pull/581#discussion_r1", + "proof_url": "/proofs/discussion-r1", + }, + ], + "recent": [ + { + "submission_url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-4", + "proof_url": "/proofs/recent-paid", + "bounty_issue_number": 578, + "bounty_id": 85, + "ledger_sequence": 42, + } + ], + "issues": [ + { + "number": 578, + "title": "MRWK bounty: review open MergeWork PRs with evidence", + "url": "https://github.com/ramimbo/mergework/issues/578", + "labels": ["mrwk:bounty"], + "author": {"login": "ramimbo"}, + "comments": [ + { + "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-1", + "author": {"login": "eliasx45"}, + "body": ( + "/claim " + "https://github.com/ramimbo/mergework/pull/452#pullrequestreview-1\n" + "Reviewed PR #452 with tests." + ), + }, + { + "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-2", + "author": {"login": "other-reviewer"}, + "body": ( + "/claim " + "https://github.com/ramimbo/mergework/pull/533#issuecomment-2\n" + "Duplicate review claim." + ), + }, + { + "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-3", + "author": {"login": "smoke-checker"}, + "body": ( + "Smoke-check claim: " + "https://github.com/ramimbo/mergework/pull/533#issuecomment-2 " + "works on the public activity page." + ), + }, + { + "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-4", + "author": {"login": "recent-winner"}, + "body": "/claim https://github.com/ramimbo/mergework/pull/700", + }, + { + "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-5", + "author": {"login": "pending-reviewer"}, + "body": ( + "/claim " + "https://github.com/ramimbo/mergework/" + "pull/620#pullrequestreview-67\n" + "Reviewed current head while payout is pending." + ), + }, + { + "url": "https://github.com/ramimbo/mergework/issues/578#issuecomment-6", + "author": {"login": "pending-reviewer"}, + "body": ( + "Accepted review evidence: " + "https://github.com/ramimbo/mergework/" + "pull/620#pullrequestreview-67" + ), + }, + ], + } + ], + "pull_requests": [ + { + "number": 581, + "title": "Refs #581: Add claim inventory report", + "url": "https://github.com/ramimbo/mergework/pull/581", + "author": {"login": "jakerated-r"}, + "body": "Refs #581\n\nAdds scripts/claim_inventory.py.", + "comments": [ + { + "url": "https://github.com/ramimbo/mergework/pull/581#issuecomment-1", + "author": {"login": "reviewer"}, + "body": "Looks good after docs smoke.", + } + ], + "reviews": [ + { + "url": "https://github.com/ramimbo/mergework/pull/581#pullrequestreview-9", + "author": {"login": "reviewer"}, + "body": "Reviewed the fixture mode and markdown output.", + } + ], + "review_comments": [ + { + "url": "https://github.com/ramimbo/mergework/pull/581#discussion_r1", + "author": {"login": "inline-reviewer"}, + "body": "The markdown claim row still looks traceable.", + } + ], + }, + { + "number": 582, + "title": "Refs #581: Add another inventory report", + "url": "https://github.com/ramimbo/mergework/pull/582", + "author": {"login": "jakerated-r"}, + "body": "Claiming another read-only report.", + "comments": [ + { + "author": {"login": "maintainer"}, + "body": "Looks fine.", + } + ], + }, + { + "number": 999, + "title": "Small cleanup with no bounty link", + "url": "https://github.com/ramimbo/mergework/pull/999", + "author": {"login": "unknown"}, + "body": "Claiming this small cleanup, but no bounty reference is included.", + }, + { + "number": 1000, + "title": "Refs #9999: unknown bounty", + "url": "https://github.com/ramimbo/mergework/pull/1000", + "author": {"login": "unknown"}, + "body": "Refs #9999\n\nValidation: pytest passed.", + }, + ], + } + + +def test_claim_inventory_classifies_required_statuses(tmp_path, capsys) -> None: + report = analyze_inventory(_fixture(), api_host="https://api.example.test") + + rows = {row["source_url"]: row for row in report["rows"]} + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-1"]["likely_status"] + == "already_paid" + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-4"]["likely_status"] + == "already_paid" + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-4"]["proof_url"] + == "https://api.example.test/proofs/recent-paid" + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-2"]["likely_status"] + == "duplicate_candidate" + ) + assert ( + rows["https://github.com/ramimbo/mergework/pull/999"]["likely_status"] + == "missing_bounty_ref" + ) + assert ( + rows["https://github.com/ramimbo/mergework/pull/1000"]["likely_status"] == "unknown_bounty" + ) + assert rows["https://github.com/ramimbo/mergework/pull/581"]["bounty_id"] == 87 + assert ( + rows["https://github.com/ramimbo/mergework/pull/581#discussion_r1"]["source_type"] + == "pull_request_review_comment" + ) + assert ( + rows["https://github.com/ramimbo/mergework/pull/581#discussion_r1"]["proof_url"] + == "https://api.example.test/proofs/discussion-r1" + ) + assert ( + rows["https://github.com/ramimbo/mergework/pull/582"]["likely_status"] == "unpaid_candidate" + ) + assert rows["https://github.com/ramimbo/mergework/pull/582"]["pending_proposal_id"] is None + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-3"]["source_type"] + == "bounty_issue_comment" + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"]["likely_status"] + == "pending_payout" + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-6"]["likely_status"] + == "pending_payout" + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"][ + "pending_proposal_id" + ] + == 67 + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"][ + "pending_proposal_url" + ] + == "https://api.example.test/api/v1/treasury/proposals/67" + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"][ + "pending_executes_after" + ] + == "2026-06-01T11:41:45Z" + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"]["pending_to_account"] + == "github:pending-reviewer" + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"]["pending_bounty_id"] + == 85 + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"][ + "pending_accepted_by" + ] + == "ramimbo" + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"][ + "pending_submission_url" + ] + == "https://github.com/ramimbo/mergework/pull/620#pullrequestreview-67" + ) + assert report["summary"]["pending_payout"] == 2 + assert set(report["likely_status_enum"]) >= { + "already_paid", + "pending_payout", + "unpaid_candidate", + "duplicate_candidate", + "missing_bounty_ref", + "unknown_bounty", + "ignored_or_unclear", + } + + input_path = tmp_path / "claims.json" + input_path.write_text(json.dumps(_fixture()), encoding="utf-8") + assert main(["--input", str(input_path), "--format", "json"]) == 0 + output = json.loads(capsys.readouterr().out) + assert output["summary"]["already_paid"] == 3 + + +def test_claim_inventory_markdown_report_is_pasteable() -> None: + markdown = format_markdown_report(analyze_inventory(_fixture())) + + assert "## Claim Inventory" in markdown + assert "| Status | Bounty | Claimant | Type | Source | Proof/Pending |" in markdown + assert "`already_paid`" in markdown + assert "`pending_payout`" in markdown + assert "https://api.mrwk.online/proofs/abc123" in markdown + assert "https://api.mrwk.online/api/v1/treasury/proposals/67" in markdown + + +def test_claim_inventory_paid_proof_overrides_pending_payout() -> None: + fixture = _fixture() + proofs = fixture["proofs"] + assert isinstance(proofs, list) + proofs.append( + { + "source_url": "https://github.com/ramimbo/mergework/pull/620#pullrequestreview-67", + "proof_url": "/proofs/pending-later-paid", + } + ) + + report = analyze_inventory(fixture, api_host="https://api.example.test") + rows = {row["source_url"]: row for row in report["rows"]} + + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"]["likely_status"] + == "already_paid" + ) + assert ( + rows["https://github.com/ramimbo/mergework/issues/578#issuecomment-5"]["proof_url"] + == "https://api.example.test/proofs/pending-later-paid" + ) + + +def test_claim_inventory_live_mode_uses_read_only_calls(monkeypatch) -> None: + calls: list[list[str]] = [] + + def fake_run_gh_json(args: list[str]) -> object: + calls.append(args) + if args[:3] == ["gh", "issue", "list"]: + return [ + { + "number": 581, + "title": "MRWK bounty: claim inventory", + "url": "https://github.com/ramimbo/mergework/issues/581", + "labels": [{"name": "mrwk:bounty"}], + "author": {"login": "ramimbo"}, + } + ] + if args[:3] == ["gh", "issue", "view"]: + return { + "number": 581, + "title": "MRWK bounty: claim inventory", + "url": "https://github.com/ramimbo/mergework/issues/581", + "body": "Reward: 500 MRWK", + "labels": [{"name": "mrwk:bounty"}], + "author": {"login": "ramimbo"}, + "comments": [], + } + if args[:3] == ["gh", "pr", "list"]: + return [ + { + "number": 582, + "title": "Refs #581: Add inventory", + "url": "https://github.com/ramimbo/mergework/pull/582", + "body": "Refs #581", + "author": {"login": "bot"}, + "labels": [], + } + ] + if args[:3] == ["gh", "pr", "view"]: + return { + "number": 582, + "title": "Refs #581: Add inventory", + "url": "https://github.com/ramimbo/mergework/pull/582", + "body": "Refs #581", + "author": {"login": "bot"}, + "labels": [], + "comments": [], + "reviews": [], + } + if args[:2] == ["gh", "api"]: + return [ + { + "html_url": "https://github.com/ramimbo/mergework/pull/582#discussion_r123", + "user": {"login": "reviewer"}, + "body": "Inline review claim evidence for #581.", + } + ] + raise AssertionError(args) + + monkeypatch.setattr(claim_inventory, "_run_gh_json", fake_run_gh_json) + monkeypatch.setattr( + claim_inventory, + "load_public_api_state", + lambda api_host: { + "bounties": [{"id": 87, "issue_number": 581, "status": "open", "awards_remaining": 1}], + "proofs": [], + "recent": [], + }, + ) + + data = claim_inventory.load_live_inventory("ramimbo/mergework", "https://api.example.test") + + assert data["bounties"][0]["id"] == 87 + assert data["pull_requests"][0]["review_comments"] == [ + { + "url": "https://github.com/ramimbo/mergework/pull/582#discussion_r123", + "author": {"login": "reviewer"}, + "body": "Inline review claim evidence for #581.", + } + ] + allowed_prefixes = { + ("gh", "issue", "list"), + ("gh", "issue", "view"), + ("gh", "pr", "list"), + ("gh", "pr", "view"), + ("gh", "api"), + } + assert calls, "expected at least one gh invocation" + assert all( + tuple(call[:3]) in allowed_prefixes or tuple(call[:2]) in allowed_prefixes for call in calls + ), calls + + +def test_run_gh_json_reports_missing_gh(monkeypatch) -> None: + def missing_gh(*args, **kwargs): + raise FileNotFoundError("gh") + + monkeypatch.setattr(claim_inventory.subprocess, "run", missing_gh) + + try: + claim_inventory._run_gh_json(["gh", "issue", "list"]) + except RuntimeError as exc: + assert "GitHub CLI executable 'gh' was not found" in str(exc) + assert "live --repo mode" in str(exc) + else: + raise AssertionError("expected missing gh RuntimeError") + + +def test_claim_inventory_script_entrypoint_loads_shared_parser() -> None: + result = subprocess.run( + [sys.executable, "scripts/claim_inventory.py", "--help"], + cwd=ROOT, + capture_output=True, + text=True, + check=False, + ) + + assert result.returncode == 0 + assert "usage:" in result.stdout + + +def test_claim_inventory_rejects_invalid_api_host(capsys) -> None: + for bad in ("", " ", "/relative", "ftp://api.example.test"): + with pytest.raises(SystemExit) as excinfo: + main(["--repo", "ramimbo/mergework", "--api-host", bad]) + assert excinfo.value.code == 2 + assert "api host must" in capsys.readouterr().err + + +def test_claim_inventory_exposes_safety_caps() -> None: + assert claim_inventory.GH_PR_SAFETY_CAP > claim_inventory.GH_LIMIT + assert claim_inventory.GH_ISSUE_SAFETY_CAP > claim_inventory.GH_LIMIT + assert claim_inventory.GH_PUBLIC_API_SAFETY_CAP > claim_inventory.GH_LIMIT + + +def test_claim_inventory_public_api_fails_fast_on_bounty_safety_cap(monkeypatch) -> None: + cap = claim_inventory.GH_PUBLIC_API_SAFETY_CAP + bounties = [{"id": i, "title": f"bounty-{i}"} for i in range(cap)] + activity = {"contributors": [{"login": "x"}], "recent": []} + + def fake_get_json(url: str) -> object: + if "bounties" in url: + return bounties + return activity + + monkeypatch.setattr(claim_inventory, "_get_json", fake_get_json) + + with pytest.raises(RuntimeError) as excinfo: + claim_inventory.load_public_api_state("https://api.mrwk.online") + assert "bounties" in str(excinfo.value) + assert "safety cap" in str(excinfo.value) + + +def test_claim_inventory_public_api_fails_fast_on_activity_safety_cap(monkeypatch) -> None: + cap = claim_inventory.GH_PUBLIC_API_SAFETY_CAP + bounties = [{"id": 1, "title": "bounty-1"}] + activity = { + "contributors": [{"login": f"c{i}"} for i in range(cap)], + "recent": [], + } + + def fake_get_json(url: str) -> object: + if "bounties" in url: + return bounties + return activity + + monkeypatch.setattr(claim_inventory, "_get_json", fake_get_json) + + with pytest.raises(RuntimeError) as excinfo: + claim_inventory.load_public_api_state("https://api.mrwk.online") + assert "contributors" in str(excinfo.value) + assert "safety cap" in str(excinfo.value) + + +def test_claim_inventory_live_mode_fails_fast_on_issue_safety_cap(monkeypatch) -> None: + cap = claim_inventory.GH_ISSUE_SAFETY_CAP + issue_list = [{"number": i} for i in range(cap)] + prs: list[dict] = [] + + def fake_run_gh_json(cmd: list[str]) -> object: + if cmd[:2] == ["gh", "issue"] and "list" in cmd: + return issue_list + if cmd[:2] == ["gh", "pr"] and "list" in cmd: + return prs + return [] + + monkeypatch.setattr(claim_inventory, "_run_gh_json", fake_run_gh_json) + + with pytest.raises(RuntimeError) as excinfo: + claim_inventory.load_live_inventory("ramimbo/mergework", "https://api.mrwk.online") + assert "issue list" in str(excinfo.value) + assert "safety cap" in str(excinfo.value) + + +def test_claim_inventory_live_mode_fails_fast_on_pr_safety_cap(monkeypatch) -> None: + issue_cap = claim_inventory.GH_ISSUE_SAFETY_CAP + pr_cap = claim_inventory.GH_PR_SAFETY_CAP + issue_list = [{"number": i} for i in range(issue_cap - 1)] + prs = [{"number": i} for i in range(pr_cap)] + + def fake_run_gh_json(cmd: list[str]) -> object: + if cmd[:2] == ["gh", "issue"] and "list" in cmd: + return issue_list + if cmd[:2] == ["gh", "pr"] and "list" in cmd: + return prs + return [] + + monkeypatch.setattr(claim_inventory, "_run_gh_json", fake_run_gh_json) + monkeypatch.setattr(claim_inventory, "_get_json", lambda _url: []) + + with pytest.raises(RuntimeError) as excinfo: + claim_inventory.load_live_inventory("ramimbo/mergework", "https://api.mrwk.online") + assert "pr list" in str(excinfo.value) + assert "safety cap" in str(excinfo.value) + + +@pytest.mark.parametrize( + ("source_args", "expected_message"), + ( + (["--input", ""], "--input must be a non-empty value"), + (["--input", " "], "--input must be a non-empty value"), + (["--repo", ""], "--repo must be a non-empty value"), + (["--repo", " "], "--repo must be a non-empty value"), + (["--repo", " ramimbo/mergework "], "--repo must not include"), + ), +) +def test_claim_inventory_rejects_empty_source_args( + source_args: list[str], + expected_message: str, + capsys, +) -> None: + with pytest.raises(SystemExit) as excinfo: + main([*source_args, "--format", "json"]) + + assert excinfo.value.code == 2 + assert expected_message in capsys.readouterr().err diff --git a/tests/test_pr_queue_health.py b/tests/test_pr_queue_health.py index e965c750..22e42866 100644 --- a/tests/test_pr_queue_health.py +++ b/tests/test_pr_queue_health.py @@ -546,3 +546,25 @@ def fake_run(args, **kwargs): with pytest.raises(RuntimeError, match="pr list reached the 201 item safety cap"): pr_queue_health.load_live_queue("ramimbo/mergework") + + +@pytest.mark.parametrize( + ("source_args", "expected_message"), + ( + (["--input", ""], "--input must be a non-empty value"), + (["--input", " "], "--input must be a non-empty value"), + (["--repo", ""], "--repo must be a non-empty value"), + (["--repo", " "], "--repo must be a non-empty value"), + (["--repo", " ramimbo/mergework "], "--repo must not include"), + ), +) +def test_pr_queue_health_rejects_empty_source_args( + source_args: list[str], + expected_message: str, + capsys, +) -> None: + with pytest.raises(SystemExit) as excinfo: + main([*source_args, "--format", "json"]) + + assert excinfo.value.code == 2 + assert expected_message in capsys.readouterr().err diff --git a/tests/test_proposed_work_triage.py b/tests/test_proposed_work_triage.py index 9001665b..4ef4668b 100644 --- a/tests/test_proposed_work_triage.py +++ b/tests/test_proposed_work_triage.py @@ -780,3 +780,23 @@ def test_proposed_work_triage_rejects_invalid_api_host(capsys) -> None: main(["--repo", "ramimbo/mergework", "--api-host", bad]) assert excinfo.value.code == 2 assert "api host must" in capsys.readouterr().err + + +@pytest.mark.parametrize( + ("source_args", "expected_message"), + ( + (["--repo", ""], "--repo must be a non-empty value"), + (["--repo", " "], "--repo must be a non-empty value"), + (["--repo", " ramimbo/mergework "], "--repo must not include"), + ), +) +def test_proposed_work_triage_rejects_empty_repo_source( + source_args: list[str], + expected_message: str, + capsys, +) -> None: + with pytest.raises(SystemExit) as excinfo: + main([*source_args, "--format", "json"]) + + assert excinfo.value.code == 2 + assert expected_message in capsys.readouterr().err