Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions scripts/check_pr_payment_language.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from __future__ import annotations

import argparse
import json
import os
import sys
import urllib.error
import urllib.request
from pathlib import Path
from typing import Any

if __package__ in {None, ""}:
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))

from scripts.public_payment_language import (
find_payment_language_violations,
format_violation_report,
)

DEFAULT_TIMEOUT_SECONDS = 30


def _github_token() -> str:
for name in ("GH_TOKEN", "GITHUB_TOKEN"):
value = os.environ.get(name, "").strip()
if value:
return value
raise RuntimeError("GitHub token required; set GH_TOKEN or GITHUB_TOKEN")


def _load_pull_request(repo: str, number: int) -> dict[str, Any]:
owner, name = repo.split("/", 1)
url = f"https://api.github.com/repos/{owner}/{name}/pulls/{number}"
request = urllib.request.Request(
url,
headers={
"Authorization": f"Bearer {_github_token()}",
"Accept": "application/vnd.github+json",
"User-Agent": "mergework-pr-payment-language-check",
},
)
try:
with urllib.request.urlopen(request, timeout=DEFAULT_TIMEOUT_SECONDS) as response:
payload = json.loads(response.read().decode("utf-8"))
except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as exc:
raise RuntimeError(f"failed to fetch PR #{number} from GitHub API: {exc}") from exc
if not isinstance(payload, dict):
raise RuntimeError(f"GitHub API returned non-object JSON for PR #{number}")
return payload


def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(
description="Fail when a PR body uses premature payment/status wording."
)
source = parser.add_mutually_exclusive_group(required=True)
source.add_argument("--text-file", help="Read submission/PR body text from a file.")
source.add_argument("--repo", help="GitHub repository, for example ramimbo/mergework.")
parser.add_argument("--pr", type=int, help="Pull request number (required with --repo).")
parser.add_argument("--format", choices=["json", "text"], default="text")
parser.add_argument("--fail-on-issues", action="store_true")
args = parser.parse_args(argv)

if args.repo and args.pr is None:
parser.error("--pr is required when using --repo")
if args.text_file:
text = Path(args.text_file).read_text(encoding="utf-8")
context = {"source": "text_file", "pull_request": None}
else:
assert args.repo is not None and args.pr is not None
pr = _load_pull_request(args.repo, args.pr)
text = "\n".join(str(pr.get(key) or "") for key in ("title", "body"))
Comment on lines +71 to +72

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟠 Major | ⚡ Quick win

Check only the PR body in GitHub mode.

This CLI is described as a PR-body checker, but this path concatenates the title and body. A compliant PR body will still fail if the title says something like ban "paid" wording, which changes the gate's contract from the one described in this PR.

Suggested fix
-        text = "\n".join(str(pr.get(key) or "") for key in ("title", "body"))
+        text = str(pr.get("body") or "")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
pr = _load_pull_request(args.repo, args.pr)
text = "\n".join(str(pr.get(key) or "") for key in ("title", "body"))
pr = _load_pull_request(args.repo, args.pr)
text = str(pr.get("body") or "")

context = {
"source": "github_api",
"pull_request": args.pr,
"url": pr.get("html_url"),
}

violations = find_payment_language_violations(text)
report = {"context": context, "violations": violations}
if args.format == "json":
print(json.dumps(report, indent=2, sort_keys=True))
else:
print(format_violation_report(violations))
return 1 if args.fail_on_issues and violations else 0


if __name__ == "__main__":
raise SystemExit(main(sys.argv[1:]))
98 changes: 98 additions & 0 deletions scripts/public_payment_language.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""Detect premature payment/status wording in public submission text."""

from __future__ import annotations

import re

SUGGESTED_REPLACEMENT = (
"Use a neutral 'Submission status' section and note that acceptance and any "
"later proof or ledger outcome are tracked by maintainers through the bounty "
"issue and public rows."
)

_PAYOUT_BOUNDARY_RE = re.compile(r"payout\s+boundary", re.IGNORECASE)
_LEGACY_WITHDRAWABLE_RE = re.compile(
r"not\s+(?:confirmed|earned)\s+or\s+withdrawable",
re.IGNORECASE,
)

_ALLOWLIST_LINE_RES = (
re.compile(r"no payout execution", re.IGNORECASE),
re.compile(r"payment lifecycle", re.IGNORECASE),
re.compile(r"pay_bounty proposal", re.IGNORECASE),
re.compile(r"proof-backed", re.IGNORECASE),
re.compile(r"does not (?:create|execute|trigger|mutate)", re.IGNORECASE),
re.compile(r"pending payout", re.IGNORECASE),
re.compile(r"accepted for payout review", re.IGNORECASE),
re.compile(r"reserve(?:s|d)? words", re.IGNORECASE),
re.compile(r"do not (?:write|describe|claim)", re.IGNORECASE),
)

_RESERVED_STATUS_ASSERTION_RES = (
re.compile(
r"\b(?:is|was|are|were|already|marked as|considered)\s+"
r"(?:paid|settled|received|withdrawable)\b",
re.IGNORECASE,
),
re.compile(
r"\b(?:paid|settled|received|withdrawable)\s+(?:claim|status|reward|payout)\b",
re.IGNORECASE,
),
re.compile(
r"\b(?:claim|submission|work)\s+(?:is|was)\s+(?:paid|settled|received|withdrawable)\b",
re.IGNORECASE,
),
)


def _line_is_allowlisted(line: str) -> bool:
return any(pattern.search(line) for pattern in _ALLOWLIST_LINE_RES)


def find_payment_language_violations(text: str) -> list[str]:
"""Return human-readable violations for premature payment/status wording."""
if not text or not text.strip():
return []

violations: list[str] = []
if _PAYOUT_BOUNDARY_RE.search(text):
violations.append(
"deprecated 'Payout boundary' heading found; prefer neutral 'Submission status' wording"
)
if _LEGACY_WITHDRAWABLE_RE.search(text):
violations.append(
"legacy 'not confirmed or withdrawable' phrasing found; "
"use neutral submission status language"
)

for line in text.splitlines():
stripped = line.strip()
if not stripped or stripped.startswith("#"):
continue
if _line_is_allowlisted(line):
continue
for pattern in _RESERVED_STATUS_ASSERTION_RES:
if pattern.search(line):
violations.append(
f"reserved payment/status wording used as a claim assertion: {stripped[:120]}"
)
break
Comment on lines +52 to +79

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟠 Major | ⚡ Quick win

Run all payment-language rules through the same per-line classifier.

The initial whole-text search bypasses _line_is_allowlisted, so instructional lines like Do not write "not confirmed or withdrawable" still fail. Then stripped.startswith("#") skips headings entirely, so ## Claim is paid or ## Withdrawable status never reaches the reserved-assertion regexes. That gives this detector both false positives and false negatives.

Suggested fix
 def find_payment_language_violations(text: str) -> list[str]:
     """Return human-readable violations for premature payment/status wording."""
     if not text or not text.strip():
         return []

     violations: list[str] = []
-    if _PAYOUT_BOUNDARY_RE.search(text):
-        violations.append(
-            "deprecated 'Payout boundary' heading found; prefer neutral 'Submission status' wording"
-        )
-    if _LEGACY_WITHDRAWABLE_RE.search(text):
-        violations.append(
-            "legacy 'not confirmed or withdrawable' phrasing found; use neutral submission status language"
-        )

     for line in text.splitlines():
         stripped = line.strip()
-        if not stripped or stripped.startswith("#"):
+        if not stripped:
             continue
         if _line_is_allowlisted(line):
             continue
+        if _PAYOUT_BOUNDARY_RE.search(line):
+            violations.append(
+                "deprecated 'Payout boundary' heading found; prefer neutral 'Submission status' wording"
+            )
+        if _LEGACY_WITHDRAWABLE_RE.search(line):
+            violations.append(
+                "legacy 'not confirmed or withdrawable' phrasing found; use neutral submission status language"
+            )
         for pattern in _RESERVED_STATUS_ASSERTION_RES:
             if pattern.search(line):
                 violations.append(
                     "reserved payment/status wording used as a claim assertion: "
                     f"{stripped[:120]}"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def find_payment_language_violations(text: str) -> list[str]:
"""Return human-readable violations for premature payment/status wording."""
if not text or not text.strip():
return []
violations: list[str] = []
if _PAYOUT_BOUNDARY_RE.search(text):
violations.append(
"deprecated 'Payout boundary' heading found; prefer neutral 'Submission status' wording"
)
if _LEGACY_WITHDRAWABLE_RE.search(text):
violations.append(
"legacy 'not confirmed or withdrawable' phrasing found; use neutral submission status language"
)
for line in text.splitlines():
stripped = line.strip()
if not stripped or stripped.startswith("#"):
continue
if _line_is_allowlisted(line):
continue
for pattern in _RESERVED_STATUS_ASSERTION_RES:
if pattern.search(line):
violations.append(
"reserved payment/status wording used as a claim assertion: "
f"{stripped[:120]}"
)
break
def find_payment_language_violations(text: str) -> list[str]:
"""Return human-readable violations for premature payment/status wording."""
if not text or not text.strip():
return []
violations: list[str] = []
for line in text.splitlines():
stripped = line.strip()
if not stripped:
continue
if _line_is_allowlisted(line):
continue
if _PAYOUT_BOUNDARY_RE.search(line):
violations.append(
"deprecated 'Payout boundary' heading found; prefer neutral 'Submission status' wording"
)
if _LEGACY_WITHDRAWABLE_RE.search(line):
violations.append(
"legacy 'not confirmed or withdrawable' phrasing found; use neutral submission status language"
)
for pattern in _RESERVED_STATUS_ASSERTION_RES:
if pattern.search(line):
violations.append(
"reserved payment/status wording used as a claim assertion: "
f"{stripped[:120]}"
)
break


# Preserve order while deduplicating identical messages.
seen: set[str] = set()
unique: list[str] = []
for item in violations:
if item in seen:
continue
seen.add(item)
unique.append(item)
return unique


def format_violation_report(violations: list[str]) -> str:
if not violations:
return "No premature payment/status wording found."
lines = ["Premature payment/status wording:"]
lines.extend(f"- {item}" for item in violations)
lines.append(f"Suggestion: {SUGGESTED_REPLACEMENT}")
return "\n".join(lines)
25 changes: 25 additions & 0 deletions scripts/submission_quality_gate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@

from scripts.api_host_args import public_api_host
from scripts.bounty_refs import BOUNTY_REF_RE, GITHUB_LINKED_ISSUE_RE, LEADING_BOUNTY_REF_RE
from scripts.public_payment_language import (
SUGGESTED_REPLACEMENT,
find_payment_language_violations,
)


def _non_negative_int(value: str) -> int:
Expand Down Expand Up @@ -481,6 +485,27 @@ def evaluate_submission(data: dict[str, Any]) -> dict[str, Any]:
)
)

payment_violations = find_payment_language_violations(text)
if payment_violations:
preview = "; ".join(payment_violations[:2])
if len(payment_violations) > 2:
preview += f"; +{len(payment_violations) - 2} more"
checks.append(
_check(
"payment_status_language",
"fail",
f"{preview}. {SUGGESTED_REPLACEMENT}",
)
)
else:
checks.append(
_check(
"payment_status_language",
"pass",
"no premature payment/status wording found",
)
)

similar = _similar_open_prs(pull_requests, bounty_ref, _title_from_submission(text))
if similar:
checks.append(
Expand Down
37 changes: 37 additions & 0 deletions tests/test_public_payment_language.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from __future__ import annotations

from scripts.public_payment_language import find_payment_language_violations


def test_payout_boundary_heading_is_flagged() -> None:
text = "## Payout boundary\nThis work is not confirmed or withdrawable."
violations = find_payment_language_violations(text)
assert any("Payout boundary" in item for item in violations)
assert any("not confirmed or withdrawable" in item for item in violations)


def test_neutral_submission_status_passes() -> None:
text = """
## Submission status
Maintainer acceptance and any later proof or ledger outcome are tracked separately.
"""
assert find_payment_language_violations(text) == []


def test_technical_scope_without_payout_execution_passes() -> None:
text = "No payout execution changes. Read-only maintenance scripts only."
assert find_payment_language_violations(text) == []


def test_reserved_paid_status_assertion_is_flagged() -> None:
text = "This submission is paid and withdrawable once merged."
violations = find_payment_language_violations(text)
assert violations


def test_lifecycle_docs_wording_passes() -> None:
text = (
'Do not write "paid", "settled", "received", or "withdrawable" in intake updates. '
"Pending payout proposals are accepted for payout review, not proof-backed payment."
)
assert find_payment_language_violations(text) == []
Comment thread
coderabbitai[bot] marked this conversation as resolved.
60 changes: 60 additions & 0 deletions tests/test_submission_quality_gate.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def test_submission_quality_gate_passes_open_bounty_with_evidence(capsys, tmp_pa
"summary_present": "pass",
"evidence_present": "pass",
"similar_open_pr": "pass",
"payment_status_language": "pass",
}

input_path = tmp_path / "submission.json"
Expand Down Expand Up @@ -481,6 +482,65 @@ def test_submission_quality_gate_warns_for_missing_evidence() -> None:
} in result["checks"]


def test_submission_quality_gate_fails_payout_boundary_wording() -> None:
result = evaluate_submission(
{
"submission_text": """
Summary: tighten validation
Refs #319
## Payout boundary
This work is not confirmed or withdrawable.
Validation: pytest passed.
""",
"bounties": [{"number": 319, "state": "OPEN", "awards_remaining": 1}],
"pull_requests": [],
}
)

assert result["status"] == "fail"
payment_check = next(c for c in result["checks"] if c["name"] == "payment_status_language")
assert payment_check["status"] == "fail"
assert "Payout boundary" in payment_check["message"]


def test_submission_quality_gate_fails_reserved_paid_status_assertion() -> None:
result = evaluate_submission(
{
"submission_text": """
Summary: docs tweak
Refs #319
This submission is paid and withdrawable once merged.
Validation: pytest passed.
""",
"bounties": [{"number": 319, "state": "OPEN", "awards_remaining": 1}],
"pull_requests": [],
}
)

assert result["status"] == "fail"
payment_check = next(c for c in result["checks"] if c["name"] == "payment_status_language")
assert payment_check["status"] == "fail"


def test_submission_quality_gate_passes_neutral_submission_status_wording() -> None:
result = evaluate_submission(
{
"submission_text": """
Summary: add guard
Refs #319
## Submission status
Maintainer acceptance and any later proof or ledger outcome are tracked separately.
Validation: pytest passed.
""",
"bounties": [{"number": 319, "state": "OPEN", "awards_remaining": 1}],
"pull_requests": [],
}
)

payment_check = next(c for c in result["checks"] if c["name"] == "payment_status_language")
assert payment_check["status"] == "pass"


def test_submission_quality_gate_warns_for_similar_open_pr() -> None:
result = evaluate_submission(
{
Expand Down
Loading