diff --git a/app/oauth_deploy_smoke.py b/app/oauth_deploy_smoke.py new file mode 100644 index 00000000..95f25941 --- /dev/null +++ b/app/oauth_deploy_smoke.py @@ -0,0 +1,83 @@ +from __future__ import annotations + +import os +import sys +import tempfile +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +EXPRESS_CANNOT_GET = "Cannot GET" +OAUTH_ROUTE_PATHS = ( + "/auth/github/login", + "/auth/github/callback", +) + + +def _probe_database_url(database_url: str | None) -> tuple[str, str | None, str | None]: + if database_url is not None: + return database_url, None, None + fd, path = tempfile.mkstemp(suffix=".sqlite3") + os.close(fd) + return f"sqlite:///{path}", path, None + + +def validate_oauth_routes_registered(database_url: str | None = None) -> list[str]: + """Ensure GitHub OAuth browser routes are registered in the running app.""" + probe_url, temp_path, prior_db = _probe_database_url(database_url) + patch_env = "app.main" not in sys.modules and database_url is None + if patch_env: + prior_db = os.environ.get("MERGEWORK_DATABASE_URL") + os.environ["MERGEWORK_DATABASE_URL"] = probe_url + + try: + from fastapi.testclient import TestClient + + from app.main import create_app + + client = TestClient(create_app(database_url=probe_url, webhook_secret="deploy-smoke")) + errors: list[str] = [] + for path in OAUTH_ROUTE_PATHS: + response = client.get(path, follow_redirects=False) + if response.status_code == 404: + errors.append(f"{path} returned 404 — OAuth route is not registered") + elif EXPRESS_CANNOT_GET in response.text: + errors.append(f"{path} returned an Express Cannot GET shell") + + login = client.get("/auth/github/login", follow_redirects=False) + if login.status_code not in {503, 302}: + errors.append( + "GET /auth/github/login should return 503 when OAuth is unconfigured " + f"or 302 when configured; got {login.status_code}" + ) + + callback = client.get("/auth/github/callback", follow_redirects=False) + if callback.status_code != 422: + errors.append( + "GET /auth/github/callback without query params should return 422 " + f"when registered; got {callback.status_code}" + ) + + return errors + finally: + if patch_env: + if prior_db is None: + os.environ.pop("MERGEWORK_DATABASE_URL", None) + else: + os.environ["MERGEWORK_DATABASE_URL"] = prior_db + if temp_path is not None: + os.unlink(temp_path) + + +def is_healthy_oauth_route(status_code: int | None, body: str) -> bool: + if status_code is None: + return False + if EXPRESS_CANNOT_GET in body: + return False + if status_code == 404: + return False + return status_code in {200, 302, 422, 503} + + +def oauth_route_paths() -> tuple[str, ...]: + return OAUTH_ROUTE_PATHS diff --git a/docs/admin-runbook.md b/docs/admin-runbook.md index 61913612..c3a17463 100644 --- a/docs/admin-runbook.md +++ b/docs/admin-runbook.md @@ -352,9 +352,15 @@ the reviewer login: python scripts/review_bounty_candidates.py \ --repo ramimbo/mergework \ --reviewer reviewer-login \ + --bounty-issue 654 \ --format markdown ``` +When `--bounty-issue` is supplied in live mode, the report also ingests that +issue's claim comments and classifies duplicate/stale claim risk (`already_claimed_on_bounty_issue`, +`already_claimed_current_head`, `claimed_by_pr_comment`, `claimed_stale_head_or_base`, +`dirty_unclaimed_current_base_candidate`) with matched claim URLs for auditability. + The report classifies open PRs as fresh review candidates, self-authored, already reviewed at the current head by that reviewer, already covered by current-head human reviews, waiting for author update, dirty/conflicted, missing @@ -438,6 +444,28 @@ balances. Keep the legacy callback links still need it. If the GitHub app is rotated later, update deployment secrets outside the repository and restart Docker Compose. +After deploy or when bounty comments look stale, run the public link health +check against representative bounty, proposal, proof, and OAuth URLs: + +```bash +python scripts/check_public_mrwk_links.py --input fixtures/public_mrwk_links.json --fail-on-issues +``` + +The script fails when a published link returns HTTP 4xx/5xx or an Express +`Cannot GET` shell instead of the expected public detail response. OAuth routes +use a separate health rule: `422` or `503` from FastAPI means the route is +registered, while `404` or an Express shell means production is serving the +wrong app (see issue #1146). + +Post-deploy, also run: + +```bash +docker compose run --rm app python scripts/check_deploy_ready.py +``` + +That gate now verifies GitHub OAuth login/callback routes are registered in +the built app before a release goes live. + ## Disputes - Ask for concrete missing evidence with `mrwk:needs-info`. @@ -472,3 +500,22 @@ MERGEWORK_STAGING_BASE_URL=https://staging.mrwk.example.test \ MERGEWORK_DRY_RUN_REPO=ramimbo/mergework \ docker compose run --rm app python scripts/staging_webhook_dry_run.py ``` + +## Superseded review bounty rounds + +When a new `mrwk:bounty` + `review` round opens, older open review rounds can +send mixed signals to contributors. Use the read-only classifier to list likely +superseded rounds before closing or commenting on stale issues: + +```bash +python scripts/flag_superseded_review_rounds.py --input fixtures/review-rounds.json +python scripts/flag_superseded_review_rounds.py --input fixtures/review-rounds.json --fail-on-superseded +``` + +## Public template text smoke + +Run this before changing public query/status notices in Jinja templates: + +```bash +python scripts/template_text_smoke.py +``` diff --git a/fixtures/public_mrwk_links.json b/fixtures/public_mrwk_links.json new file mode 100644 index 00000000..d9505c4b --- /dev/null +++ b/fixtures/public_mrwk_links.json @@ -0,0 +1,34 @@ +{ + "links": [ + { + "url": "https://mrwk.online/auth/github/login", + "type": "oauth", + "status_code": 503, + "body": "{\"detail\":\"GitHub OAuth is not configured\"}" + }, + { + "url": "https://mrwk.online/auth/github/callback", + "type": "oauth", + "status_code": 422, + "body": "{\"detail\":[{\"type\":\"missing\",\"loc\":[\"query\",\"code\"]}]}" + }, + { + "url": "https://mrwk.online/bounties/120", + "type": "bounty", + "status_code": 200, + "body": "{\"id\":120,\"status\":\"open\"}" + }, + { + "url": "https://api.mrwk.online/api/v1/treasury/proposals/211", + "type": "proposal", + "status_code": 200, + "body": "{\"id\":211,\"status\":\"pending\"}" + }, + { + "url": "https://mrwk.online/proofs/abc123", + "type": "proof", + "status_code": 200, + "body": "{\"hash\":\"abc123\",\"kind\":\"bounty_payment\"}" + } + ] +} diff --git a/scripts/check_deploy_ready.py b/scripts/check_deploy_ready.py index 435ad6ac..40229965 100644 --- a/scripts/check_deploy_ready.py +++ b/scripts/check_deploy_ready.py @@ -10,11 +10,19 @@ def main() -> int: - errors = validate_deploy_settings(get_settings()) + settings = get_settings() + errors = validate_deploy_settings(settings) try: executor_config_from_env() except ValueError as exc: errors.append(str(exc)) + if not errors: + try: + from app.oauth_deploy_smoke import validate_oauth_routes_registered + + errors.extend(validate_oauth_routes_registered()) + except ImportError: + pass if errors: print("Deploy readiness check failed:") for error in errors: diff --git a/scripts/check_public_mrwk_links.py b/scripts/check_public_mrwk_links.py new file mode 100644 index 00000000..905fed09 --- /dev/null +++ b/scripts/check_public_mrwk_links.py @@ -0,0 +1,154 @@ +from __future__ import annotations + +import argparse +import json +import sys +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + +if __package__ in {None, ""}: + sys.path.insert(0, str(Path(__file__).resolve().parents[1])) + +EXPRESS_CANNOT_GET = "Cannot GET" +DEFAULT_USER_AGENT = "mergework-public-link-health-check" +GH_TIMEOUT_SECONDS = 30 + + +def is_healthy_oauth_route(status_code: int | None, body: str) -> bool: + if status_code is None: + return False + if EXPRESS_CANNOT_GET in body: + return False + if status_code == 404: + return False + return status_code in {200, 302, 422, 503} + + +def is_healthy_link(status_code: int | None, body: str, *, link_type: str = "unknown") -> bool: + if link_type == "oauth": + return is_healthy_oauth_route(status_code, body) + if status_code is None or status_code < 200 or status_code >= 400: + return False + return EXPRESS_CANNOT_GET not in body + + +def analyze_probe_results(rows: list[dict[str, Any]]) -> dict[str, Any]: + violations: list[dict[str, Any]] = [] + for row in rows: + url = str(row.get("url") or "") + status_code = row.get("status_code") + body = str(row.get("body") or "") + link_type = str(row.get("type") or "unknown") + if is_healthy_link(status_code, body, link_type=link_type): + continue + detail = f"{link_type} link unhealthy: HTTP {status_code}" + if EXPRESS_CANNOT_GET in body: + detail += " (Express Cannot GET shell)" + violations.append( + { + "url": url, + "type": link_type, + "status_code": status_code, + "detail": detail, + "source": row.get("source"), + } + ) + return { + "summary": { + "checked_links": len(rows), + "unhealthy_links": len(violations), + }, + "violations": violations, + } + + +def probe_url(url: str, *, timeout: float = GH_TIMEOUT_SECONDS) -> dict[str, Any]: + request = urllib.request.Request( + url, + headers={ + "Accept": "*/*", + "User-Agent": DEFAULT_USER_AGENT, + }, + ) + try: + with urllib.request.urlopen(request, timeout=timeout) as response: + body = response.read(4096).decode("utf-8", errors="replace") + return { + "url": url, + "status_code": response.status, + "body": body, + } + except urllib.error.HTTPError as exc: + body = exc.read(4096).decode("utf-8", errors="replace") + return { + "url": url, + "status_code": exc.code, + "body": body, + } + except urllib.error.URLError as exc: + return { + "url": url, + "status_code": None, + "body": str(exc.reason or exc), + } + + +def load_input_rows(path: Path) -> list[dict[str, Any]]: + payload = json.loads(path.read_text(encoding="utf-8")) + if isinstance(payload, list): + return payload + if isinstance(payload, dict) and isinstance(payload.get("links"), list): + return payload["links"] + raise ValueError("Input JSON must be a list of link probes or an object with a links array") + + +def format_report(report: dict[str, Any], *, fmt: str) -> str: + if fmt == "json": + return json.dumps(report, indent=2, sort_keys=True) + lines = [ + "Public MRWK link health check", + f"- checked: {report['summary']['checked_links']}", + f"- unhealthy: {report['summary']['unhealthy_links']}", + ] + for violation in report["violations"]: + lines.append(f"- {violation['detail']}: {violation['url']}") + return "\n".join(lines) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Validate public MRWK bounty/proposal/proof links.", + ) + parser.add_argument("--input", type=Path, help="JSON fixture with link probe rows") + parser.add_argument("--url", action="append", default=[], help="Live URL to probe") + parser.add_argument( + "--type", + default="unknown", + help="Default link type label for --url probes", + ) + parser.add_argument("--format", choices=("text", "json"), default="text") + parser.add_argument("--fail-on-issues", action="store_true") + args = parser.parse_args(argv) + + rows: list[dict[str, Any]] = [] + if args.input: + rows.extend(load_input_rows(args.input)) + for url in args.url: + row = probe_url(url) + row["type"] = args.type + rows.append(row) + + if not rows: + parser.error("Provide --input or at least one --url") + + report = analyze_probe_results(rows) + print(format_report(report, fmt=args.format)) + if args.fail_on_issues and report["summary"]["unhealthy_links"]: + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/test_check_public_mrwk_links.py b/tests/test_check_public_mrwk_links.py new file mode 100644 index 00000000..4f926bf4 --- /dev/null +++ b/tests/test_check_public_mrwk_links.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +from scripts.check_public_mrwk_links import analyze_probe_results, is_healthy_link, main + +ROOT = Path(__file__).resolve().parents[1] + + +def test_analyze_probe_results_flags_express_cannot_get() -> None: + report = analyze_probe_results( + [ + { + "url": "https://mrwk.online/bounties/120", + "type": "bounty", + "status_code": 404, + "body": "Cannot GET /bounties/120", + }, + { + "url": "https://api.mrwk.online/api/v1/treasury/proposals/211", + "type": "proposal", + "status_code": 404, + "body": "Cannot GET /api/v1/treasury/proposals/211", + }, + { + "url": "https://mrwk.online/proofs/abc123", + "type": "proof", + "status_code": 200, + "body": '{"hash":"abc123","kind":"bounty_payment"}', + }, + ] + ) + + assert report["summary"] == {"checked_links": 3, "unhealthy_links": 2} + assert [item["type"] for item in report["violations"]] == ["bounty", "proposal"] + assert all("Cannot GET" in item["detail"] for item in report["violations"]) + + +def test_is_healthy_link_accepts_redirect_ready_responses() -> None: + assert is_healthy_link(200, '{"status":"open"}') + assert is_healthy_link(302, "") + assert not is_healthy_link(404, "Cannot GET /proofs/x") + assert not is_healthy_link(None, "timed out") + assert is_healthy_link(422, '{"detail":[]}', link_type="oauth") + assert not is_healthy_link(422, '{"detail":[]}', link_type="bounty") + + +def test_check_public_mrwk_links_cli_reads_fixture(tmp_path, capsys) -> None: + fixture = { + "links": [ + { + "url": "https://mrwk.online/bounties/120", + "type": "bounty", + "status_code": 200, + "body": '{"id":120,"status":"open"}', + } + ] + } + input_path = tmp_path / "links.json" + input_path.write_text(json.dumps(fixture), encoding="utf-8") + + exit_code = main(["--input", str(input_path), "--format", "text"]) + assert exit_code == 0 + assert "unhealthy: 0" in capsys.readouterr().out + + +def test_check_public_mrwk_links_script_entrypoint() -> None: + result = subprocess.run( + [sys.executable, str(ROOT / "scripts" / "check_public_mrwk_links.py"), "--help"], + cwd=ROOT, + capture_output=True, + text=True, + check=False, + ) + assert result.returncode == 0 + assert "--fail-on-issues" in result.stdout diff --git a/tests/test_oauth_deploy_smoke.py b/tests/test_oauth_deploy_smoke.py new file mode 100644 index 00000000..87527f42 --- /dev/null +++ b/tests/test_oauth_deploy_smoke.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +from fastapi.testclient import TestClient + +from app.main import create_app +from app.oauth_deploy_smoke import ( + is_healthy_oauth_route, + validate_oauth_routes_registered, +) +from scripts.check_public_mrwk_links import analyze_probe_results, is_healthy_link + +ROOT = Path(__file__).resolve().parents[1] + + +def test_validate_oauth_routes_registered(sqlite_url: str) -> None: + errors = validate_oauth_routes_registered(sqlite_url) + assert errors == [] + + +def test_oauth_routes_return_fastapi_not_express(sqlite_url: str) -> None: + client = TestClient(create_app(database_url=sqlite_url, webhook_secret="secret")) + callback = client.get("/auth/github/callback", follow_redirects=False) + assert callback.status_code == 422 + assert "Cannot GET" not in callback.text + + +def test_is_healthy_oauth_route_accepts_registered_fastapi_responses() -> None: + assert is_healthy_oauth_route(503, '{"detail":"GitHub OAuth is not configured"}') + assert is_healthy_oauth_route(422, '{"detail":[{"loc":["query","code"]}]}') + assert not is_healthy_oauth_route(404, "Cannot GET /auth/github/callback") + + +def test_public_mrwk_links_fixture_accepts_oauth_rows() -> None: + payload = json.loads((ROOT / "fixtures" / "public_mrwk_links.json").read_text(encoding="utf-8")) + report = analyze_probe_results(payload["links"]) + assert report["summary"]["unhealthy_links"] == 0 + + +def test_check_deploy_ready_includes_oauth_route_gate(tmp_path, monkeypatch) -> None: + monkeypatch.setenv("MERGEWORK_DATABASE_URL", f"sqlite:///{tmp_path / 'deploy.sqlite3'}") + monkeypatch.setenv("MERGEWORK_GITHUB_WEBHOOK_SECRET", "webhook-secret-32-characters-long") + monkeypatch.setenv("MERGEWORK_ADMIN_TOKEN", "admin-token-32-characters-long-ok") + monkeypatch.setenv("MERGEWORK_COOKIE_SECRET", "cookie-secret-32-characters-long") + monkeypatch.setenv("MERGEWORK_GITHUB_OAUTH_CLIENT_ID", "client-id") + monkeypatch.setenv( + "MERGEWORK_GITHUB_OAUTH_CLIENT_SECRET", "oauth-7818e79f9d3a4a1d82ff0e1b9f0b8e42" + ) + monkeypatch.setenv("MERGEWORK_PUBLIC_BASE_URL", "https://mrwk.example.test") + monkeypatch.setenv("MERGEWORK_ADMIN_LOGINS", "alice") + monkeypatch.setenv("MERGEWORK_GITHUB_ACCEPTED_LABELERS", "alice") + result = subprocess.run( + [sys.executable, str(ROOT / "scripts" / "check_deploy_ready.py")], + cwd=ROOT, + capture_output=True, + text=True, + check=False, + ) + assert result.returncode == 0, result.stdout + result.stderr + assert "Deploy readiness check passed." in result.stdout + + +def test_is_healthy_link_delegates_oauth_type() -> None: + assert is_healthy_link(422, '{"detail":[]}', link_type="oauth") + assert not is_healthy_link(422, '{"detail":[]}', link_type="bounty")