Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions app/work_discovery.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from typing import Any
from urllib.parse import quote

from sqlalchemy import select
from sqlalchemy.orm import Session
Expand Down Expand Up @@ -45,10 +46,17 @@

def _bounty_source_urls(row: dict[str, Any]) -> dict[str, str]:
bounty_id = int(row["id"])
repo = str(row["repo"])
issue_number = int(row["issue_number"])
return {
"bounty": f"/api/v1/bounties/{bounty_id}",
"attempts": f"/api/v1/bounties/{bounty_id}/attempts",
"github_issue": str(row["issue_url"]),
"public_bounty_page": f"/bounties/{bounty_id}",
"claimable_matches": (
f"/bounties?status=open&repo={quote(repo, safe='/')}"
f"&issue_number={issue_number}&sort=reward&availability=effectively_open"
),
}


Expand Down
10 changes: 2 additions & 8 deletions scripts/claim_inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import re
import subprocess
import sys
import urllib.error
import urllib.request
from collections import Counter
from dataclasses import asdict, dataclass
from pathlib import Path
Expand All @@ -17,6 +15,7 @@

from scripts.api_host_args import public_api_host
from scripts.bounty_refs import BOUNTY_REF_RE
from scripts.public_json_fetch import load_public_json

DEFAULT_API_HOST = "https://api.mrwk.online"
GH_TIMEOUT_SECONDS = 30
Expand Down Expand Up @@ -571,12 +570,7 @@ def _load_pr_review_comments(repo: str, pr_number: int) -> list[dict[str, Any]]:


def _get_json(url: str) -> Any:
request = urllib.request.Request(url, headers={"accept": "application/json"})
try:
with urllib.request.urlopen(request, timeout=GH_TIMEOUT_SECONDS) as response:
return json.loads(response.read().decode("utf-8"))
except (urllib.error.URLError, TimeoutError) as exc:
raise RuntimeError(f"public API request failed: {url}") from exc
return load_public_json(url, timeout=GH_TIMEOUT_SECONDS)


def load_public_api_state(api_host: str) -> dict[str, Any]:
Expand Down
92 changes: 92 additions & 0 deletions scripts/public_json_fetch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""Shared helpers for read-only public JSON fetches used by maintenance scripts.

The maintenance/report scripts (claim_inventory, submission_quality_gate,
proposed_work_triage, check_bounty_issue_states, check_live_bounty_closing_refs)
each used to implement their own tiny public-JSON fetch helper around
``urllib.request.urlopen(...)``. They did not agree on request headers,
timeout handling, or error wrapping, which meant a future bugfix to any of
those details had to be copy-pasted across files and the error wording
silently diverged.

This module centralises the contract:

- A single ``load_public_json`` helper performs the HTTP read, applies a
consistent set of headers, decodes UTF-8 JSON, and raises a
:class:`PublicJsonError` on any failure with a uniform message.
- The default timeout is exposed as ``DEFAULT_TIMEOUT_SECONDS`` so callers
can override it without re-deriving a magic number.
- All other helpers in the maintenance scripts are encouraged to call
:func:`load_public_json` instead of ``urlopen`` directly.

The shared helper deliberately stays read-only and dependency-free so it
can be imported by any script without pulling in the rest of the
``app`` package.
"""

from __future__ import annotations

import json
import urllib.error
import urllib.request
from typing import Any

DEFAULT_TIMEOUT_SECONDS = 30
DEFAULT_USER_AGENT = "mergework-maintenance-script"
DEFAULT_ACCEPT = "application/json"


class PublicJsonError(RuntimeError):
"""Raised when a public JSON read fails for any reason."""


def load_public_json(
url: str,
*,
description: str | None = None,
timeout: int = DEFAULT_TIMEOUT_SECONDS,
user_agent: str = DEFAULT_USER_AGENT,
accept: str = DEFAULT_ACCEPT,
) -> Any:
"""Fetch ``url`` and decode it as UTF-8 JSON.

Parameters
----------
url:
Absolute URL to fetch.
description:
Short human-readable description of the resource, used in the
error message (for example, "MergeWork API bounty data"). When
omitted, ``url`` is included in the error instead.
timeout:
Read timeout in seconds. Defaults to
:data:`DEFAULT_TIMEOUT_SECONDS`.
user_agent:
Value of the ``User-Agent`` header. Defaults to
:data:`DEFAULT_USER_AGENT`.
accept:
Value of the ``Accept`` header. Defaults to
:data:`DEFAULT_ACCEPT`.

Raises
------
PublicJsonError
On any HTTP, network, timeout, or JSON decode failure. The
original exception is chained via ``raise ... from``.
"""

label = description or url
request = urllib.request.Request(
url,
headers={"Accept": accept, "User-Agent": user_agent},
)
try:
with urllib.request.urlopen(request, timeout=timeout) as response:
payload = response.read().decode("utf-8")
except urllib.error.HTTPError as exc:
raise PublicJsonError(f"{label} unavailable: HTTP {exc.code}") from exc
except (urllib.error.URLError, TimeoutError, OSError) as exc:
raise PublicJsonError(f"{label} unavailable: {exc}") from exc
try:
return json.loads(payload)
except json.JSONDecodeError as exc:
raise PublicJsonError(f"{label} did not return valid JSON: {exc}") from exc
105 changes: 105 additions & 0 deletions tests/test_public_json_fetch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""Tests for scripts.public_json_fetch."""

from __future__ import annotations

from typing import Any
from unittest.mock import MagicMock, patch

import pytest

from scripts.public_json_fetch import (
DEFAULT_ACCEPT,
DEFAULT_TIMEOUT_SECONDS,
DEFAULT_USER_AGENT,
PublicJsonError,
load_public_json,
)


class _FakeResponse:
def __init__(self, body: str) -> None:
self._body = body.encode("utf-8")

def read(self) -> bytes:
return self._body

def __enter__(self) -> _FakeResponse:
return self

def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
return None


def test_load_public_json_returns_decoded_payload() -> None:
response = _FakeResponse('{"hello": "world"}')
with patch("scripts.public_json_fetch.urllib.request.urlopen", return_value=response):
result = load_public_json("https://example.test/api")
assert result == {"hello": "world"}


def test_load_public_json_uses_default_headers_and_timeout() -> None:
response = _FakeResponse("[]")
captured: dict[str, Any] = {}

def fake_urlopen(request: Any, timeout: int) -> _FakeResponse:
captured["url"] = request.full_url
captured["headers"] = dict(request.headers)
captured["timeout"] = timeout
return response

with patch("scripts.public_json_fetch.urllib.request.urlopen", side_effect=fake_urlopen):
load_public_json("https://example.test/api")
assert captured["url"] == "https://example.test/api"
assert captured["headers"]["Accept"] == DEFAULT_ACCEPT
assert captured["headers"]["User-agent"] == DEFAULT_USER_AGENT
assert captured["timeout"] == DEFAULT_TIMEOUT_SECONDS


def test_load_public_json_wraps_http_error() -> None:
error = MagicMock()
error.__class__ = type("HTTPError", (Exception,), {})
from urllib.error import HTTPError

err = HTTPError(url="x", code=503, msg="Service Unavailable", hdrs={}, fp=None)
with (
patch("scripts.public_json_fetch.urllib.request.urlopen", side_effect=err),
pytest.raises(PublicJsonError) as excinfo,
):
load_public_json("https://example.test/api", description="merge api")
assert "merge api" in str(excinfo.value)
assert "HTTP 503" in str(excinfo.value)


def test_load_public_json_wraps_timeout_error() -> None:
from urllib.error import URLError

err = URLError("timed out")
with (
patch("scripts.public_json_fetch.urllib.request.urlopen", side_effect=err),
pytest.raises(PublicJsonError) as excinfo,
):
load_public_json("https://example.test/api", description="merge api")
assert "merge api" in str(excinfo.value)


def test_load_public_json_wraps_invalid_json() -> None:
response = _FakeResponse("not json {")
with (
patch("scripts.public_json_fetch.urllib.request.urlopen", return_value=response),
pytest.raises(PublicJsonError) as excinfo,
):
load_public_json("https://example.test/api", description="merge api")
assert "did not return valid JSON" in str(excinfo.value)


def test_load_public_json_respects_custom_timeout() -> None:
response = _FakeResponse("[]")
captured: dict[str, Any] = {}

def fake_urlopen(request: Any, timeout: int) -> _FakeResponse:
captured["timeout"] = timeout
return response

with patch("scripts.public_json_fetch.urllib.request.urlopen", side_effect=fake_urlopen):
load_public_json("https://example.test/api", timeout=5)
assert captured["timeout"] == 5
5 changes: 5 additions & 0 deletions tests/test_work_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,11 @@ def test_work_discovery_distinguishes_live_and_pending_create_work(sqlite_url: s
"bounty": f"/api/v1/bounties/{live_bounty.id}",
"attempts": f"/api/v1/bounties/{live_bounty.id}/attempts",
"github_issue": "https://github.com/ramimbo/mergework/issues/800",
"public_bounty_page": f"/bounties/{live_bounty.id}",
"claimable_matches": (
"/bounties?status=open&repo=ramimbo/mergework"
"&issue_number=800&sort=reward&availability=effectively_open"
),
},
"next_action": {
"id": "confirm_award_slot",
Expand Down
Loading