From 40382987dbf7bebd1f75ce85981c3d6f88297799 Mon Sep 17 00:00:00 2001 From: yanyishuai <1093994647@qq.com> Date: Mon, 29 Jun 2026 12:42:32 +0800 Subject: [PATCH] fix: ruff lint on public_json_fetch migration (fixes #1144 CI) --- app/work_discovery.py | 8 +++ scripts/claim_inventory.py | 10 +-- scripts/public_json_fetch.py | 92 ++++++++++++++++++++++++++++ tests/test_public_json_fetch.py | 105 ++++++++++++++++++++++++++++++++ tests/test_work_discovery.py | 5 ++ 5 files changed, 212 insertions(+), 8 deletions(-) create mode 100644 scripts/public_json_fetch.py create mode 100644 tests/test_public_json_fetch.py diff --git a/app/work_discovery.py b/app/work_discovery.py index 06984c2b..969b329c 100644 --- a/app/work_discovery.py +++ b/app/work_discovery.py @@ -1,6 +1,7 @@ from __future__ import annotations from typing import Any +from urllib.parse import quote from sqlalchemy import select from sqlalchemy.orm import Session @@ -45,10 +46,17 @@ def _bounty_source_urls(row: dict[str, Any]) -> dict[str, str]: bounty_id = int(row["id"]) + repo = str(row["repo"]) + issue_number = int(row["issue_number"]) return { "bounty": f"/api/v1/bounties/{bounty_id}", "attempts": f"/api/v1/bounties/{bounty_id}/attempts", "github_issue": str(row["issue_url"]), + "public_bounty_page": f"/bounties/{bounty_id}", + "claimable_matches": ( + f"/bounties?status=open&repo={quote(repo, safe='/')}" + f"&issue_number={issue_number}&sort=reward&availability=effectively_open" + ), } diff --git a/scripts/claim_inventory.py b/scripts/claim_inventory.py index 768e05f2..5dd79aff 100644 --- a/scripts/claim_inventory.py +++ b/scripts/claim_inventory.py @@ -5,8 +5,6 @@ import re import subprocess import sys -import urllib.error -import urllib.request from collections import Counter from dataclasses import asdict, dataclass from pathlib import Path @@ -17,6 +15,7 @@ from scripts.api_host_args import public_api_host from scripts.bounty_refs import BOUNTY_REF_RE +from scripts.public_json_fetch import load_public_json DEFAULT_API_HOST = "https://api.mrwk.online" GH_TIMEOUT_SECONDS = 30 @@ -571,12 +570,7 @@ def _load_pr_review_comments(repo: str, pr_number: int) -> list[dict[str, Any]]: def _get_json(url: str) -> Any: - request = urllib.request.Request(url, headers={"accept": "application/json"}) - try: - with urllib.request.urlopen(request, timeout=GH_TIMEOUT_SECONDS) as response: - return json.loads(response.read().decode("utf-8")) - except (urllib.error.URLError, TimeoutError) as exc: - raise RuntimeError(f"public API request failed: {url}") from exc + return load_public_json(url, timeout=GH_TIMEOUT_SECONDS) def load_public_api_state(api_host: str) -> dict[str, Any]: diff --git a/scripts/public_json_fetch.py b/scripts/public_json_fetch.py new file mode 100644 index 00000000..a8be64f3 --- /dev/null +++ b/scripts/public_json_fetch.py @@ -0,0 +1,92 @@ +"""Shared helpers for read-only public JSON fetches used by maintenance scripts. + +The maintenance/report scripts (claim_inventory, submission_quality_gate, +proposed_work_triage, check_bounty_issue_states, check_live_bounty_closing_refs) +each used to implement their own tiny public-JSON fetch helper around +``urllib.request.urlopen(...)``. They did not agree on request headers, +timeout handling, or error wrapping, which meant a future bugfix to any of +those details had to be copy-pasted across files and the error wording +silently diverged. + +This module centralises the contract: + +- A single ``load_public_json`` helper performs the HTTP read, applies a + consistent set of headers, decodes UTF-8 JSON, and raises a + :class:`PublicJsonError` on any failure with a uniform message. +- The default timeout is exposed as ``DEFAULT_TIMEOUT_SECONDS`` so callers + can override it without re-deriving a magic number. +- All other helpers in the maintenance scripts are encouraged to call + :func:`load_public_json` instead of ``urlopen`` directly. + +The shared helper deliberately stays read-only and dependency-free so it +can be imported by any script without pulling in the rest of the +``app`` package. +""" + +from __future__ import annotations + +import json +import urllib.error +import urllib.request +from typing import Any + +DEFAULT_TIMEOUT_SECONDS = 30 +DEFAULT_USER_AGENT = "mergework-maintenance-script" +DEFAULT_ACCEPT = "application/json" + + +class PublicJsonError(RuntimeError): + """Raised when a public JSON read fails for any reason.""" + + +def load_public_json( + url: str, + *, + description: str | None = None, + timeout: int = DEFAULT_TIMEOUT_SECONDS, + user_agent: str = DEFAULT_USER_AGENT, + accept: str = DEFAULT_ACCEPT, +) -> Any: + """Fetch ``url`` and decode it as UTF-8 JSON. + + Parameters + ---------- + url: + Absolute URL to fetch. + description: + Short human-readable description of the resource, used in the + error message (for example, "MergeWork API bounty data"). When + omitted, ``url`` is included in the error instead. + timeout: + Read timeout in seconds. Defaults to + :data:`DEFAULT_TIMEOUT_SECONDS`. + user_agent: + Value of the ``User-Agent`` header. Defaults to + :data:`DEFAULT_USER_AGENT`. + accept: + Value of the ``Accept`` header. Defaults to + :data:`DEFAULT_ACCEPT`. + + Raises + ------ + PublicJsonError + On any HTTP, network, timeout, or JSON decode failure. The + original exception is chained via ``raise ... from``. + """ + + label = description or url + request = urllib.request.Request( + url, + headers={"Accept": accept, "User-Agent": user_agent}, + ) + try: + with urllib.request.urlopen(request, timeout=timeout) as response: + payload = response.read().decode("utf-8") + except urllib.error.HTTPError as exc: + raise PublicJsonError(f"{label} unavailable: HTTP {exc.code}") from exc + except (urllib.error.URLError, TimeoutError, OSError) as exc: + raise PublicJsonError(f"{label} unavailable: {exc}") from exc + try: + return json.loads(payload) + except json.JSONDecodeError as exc: + raise PublicJsonError(f"{label} did not return valid JSON: {exc}") from exc diff --git a/tests/test_public_json_fetch.py b/tests/test_public_json_fetch.py new file mode 100644 index 00000000..7d9dc2f9 --- /dev/null +++ b/tests/test_public_json_fetch.py @@ -0,0 +1,105 @@ +"""Tests for scripts.public_json_fetch.""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest + +from scripts.public_json_fetch import ( + DEFAULT_ACCEPT, + DEFAULT_TIMEOUT_SECONDS, + DEFAULT_USER_AGENT, + PublicJsonError, + load_public_json, +) + + +class _FakeResponse: + def __init__(self, body: str) -> None: + self._body = body.encode("utf-8") + + def read(self) -> bytes: + return self._body + + def __enter__(self) -> _FakeResponse: + return self + + def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> None: + return None + + +def test_load_public_json_returns_decoded_payload() -> None: + response = _FakeResponse('{"hello": "world"}') + with patch("scripts.public_json_fetch.urllib.request.urlopen", return_value=response): + result = load_public_json("https://example.test/api") + assert result == {"hello": "world"} + + +def test_load_public_json_uses_default_headers_and_timeout() -> None: + response = _FakeResponse("[]") + captured: dict[str, Any] = {} + + def fake_urlopen(request: Any, timeout: int) -> _FakeResponse: + captured["url"] = request.full_url + captured["headers"] = dict(request.headers) + captured["timeout"] = timeout + return response + + with patch("scripts.public_json_fetch.urllib.request.urlopen", side_effect=fake_urlopen): + load_public_json("https://example.test/api") + assert captured["url"] == "https://example.test/api" + assert captured["headers"]["Accept"] == DEFAULT_ACCEPT + assert captured["headers"]["User-agent"] == DEFAULT_USER_AGENT + assert captured["timeout"] == DEFAULT_TIMEOUT_SECONDS + + +def test_load_public_json_wraps_http_error() -> None: + error = MagicMock() + error.__class__ = type("HTTPError", (Exception,), {}) + from urllib.error import HTTPError + + err = HTTPError(url="x", code=503, msg="Service Unavailable", hdrs={}, fp=None) + with ( + patch("scripts.public_json_fetch.urllib.request.urlopen", side_effect=err), + pytest.raises(PublicJsonError) as excinfo, + ): + load_public_json("https://example.test/api", description="merge api") + assert "merge api" in str(excinfo.value) + assert "HTTP 503" in str(excinfo.value) + + +def test_load_public_json_wraps_timeout_error() -> None: + from urllib.error import URLError + + err = URLError("timed out") + with ( + patch("scripts.public_json_fetch.urllib.request.urlopen", side_effect=err), + pytest.raises(PublicJsonError) as excinfo, + ): + load_public_json("https://example.test/api", description="merge api") + assert "merge api" in str(excinfo.value) + + +def test_load_public_json_wraps_invalid_json() -> None: + response = _FakeResponse("not json {") + with ( + patch("scripts.public_json_fetch.urllib.request.urlopen", return_value=response), + pytest.raises(PublicJsonError) as excinfo, + ): + load_public_json("https://example.test/api", description="merge api") + assert "did not return valid JSON" in str(excinfo.value) + + +def test_load_public_json_respects_custom_timeout() -> None: + response = _FakeResponse("[]") + captured: dict[str, Any] = {} + + def fake_urlopen(request: Any, timeout: int) -> _FakeResponse: + captured["timeout"] = timeout + return response + + with patch("scripts.public_json_fetch.urllib.request.urlopen", side_effect=fake_urlopen): + load_public_json("https://example.test/api", timeout=5) + assert captured["timeout"] == 5 diff --git a/tests/test_work_discovery.py b/tests/test_work_discovery.py index 3dfddf3f..0fb6f8d5 100644 --- a/tests/test_work_discovery.py +++ b/tests/test_work_discovery.py @@ -106,6 +106,11 @@ def test_work_discovery_distinguishes_live_and_pending_create_work(sqlite_url: s "bounty": f"/api/v1/bounties/{live_bounty.id}", "attempts": f"/api/v1/bounties/{live_bounty.id}/attempts", "github_issue": "https://github.com/ramimbo/mergework/issues/800", + "public_bounty_page": f"/bounties/{live_bounty.id}", + "claimable_matches": ( + "/bounties?status=open&repo=ramimbo/mergework" + "&issue_number=800&sort=reward&availability=effectively_open" + ), }, "next_action": { "id": "confirm_award_slot",