Skip to content

Commit c81f7bc

Browse files
Harden token transport and logging
Amp-Thread-ID: https://ampcode.com/threads/T-019e72df-3c24-711b-b1ef-f6934d03be4a Co-authored-by: Amp <amp@ampcode.com>
1 parent 12c4408 commit c81f7bc

7 files changed

Lines changed: 121 additions & 21 deletions

File tree

.github/workflows/release.yml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ on:
1212
type: string
1313

1414
permissions:
15-
contents: write
15+
contents: read
1616
pull-requests: read
1717

1818
concurrency:
@@ -65,8 +65,10 @@ jobs:
6565

6666
- name: Validate release inputs
6767
id: release
68+
env:
69+
RELEASE_TAG: ${{ github.event.inputs.tag || github.ref_name }}
6870
run: |
69-
release_tag="${{ github.event.inputs.tag || github.ref_name }}"
71+
release_tag="${RELEASE_TAG}"
7072
if [[ ! "${release_tag}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
7173
echo "::error title=Invalid release tag::Use a vMAJOR.MINOR.PATCH tag, got '${release_tag}'."
7274
exit 1
@@ -214,6 +216,8 @@ jobs:
214216
name: Publish GitHub release assets
215217
needs: [validate, wheel]
216218
runs-on: ubuntu-24.04
219+
permissions:
220+
contents: write
217221

218222
steps:
219223
- name: Download release assets
@@ -226,8 +230,9 @@ jobs:
226230
env:
227231
GH_TOKEN: ${{ github.token }}
228232
GH_REPO: ${{ github.repository }}
233+
RELEASE_TAG: ${{ github.event.inputs.tag || github.ref_name }}
229234
run: |
230-
release_tag="${{ github.event.inputs.tag || github.ref_name }}"
235+
release_tag="${RELEASE_TAG}"
231236
notes_path="$(find release-assets -name release-notes.md -print -quit)"
232237
mapfile -t release_assets < <(find release-assets -type f ! -name release-notes.md | sort)
233238

src/src_py_lib/clients/github.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ def _normalize_github_url(github_url: str) -> str:
113113
stripped = github_url.strip().rstrip("/")
114114
if "://" not in stripped:
115115
stripped = f"https://{stripped}"
116+
split = urlsplit(stripped)
117+
if split.scheme != "https":
118+
raise ValueError(f"GitHub URL must be an https:// URL (got {split.scheme!r})")
119+
if not split.hostname:
120+
raise ValueError(f"could not parse hostname from GitHub URL {stripped!r}")
116121
return stripped
117122

118123

src/src_py_lib/clients/graphql.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pathlib import Path
1010
from typing import cast
1111

12-
from src_py_lib.utils.http import HTTPClient, HTTPClientError, HTTPResponse
12+
from src_py_lib.utils.http import HTTPClient, HTTPClientError, HTTPResponse, log_safe_url
1313
from src_py_lib.utils.json_types import JSONDict, JSONValue, json_dict, json_list, json_str
1414
from src_py_lib.utils.logging import event
1515

@@ -249,7 +249,7 @@ def _execute_once(
249249
page_number=page_number,
250250
page_size=_int_variable(variables, first_variable),
251251
cursor_present=variables.get(after_variable) is not None,
252-
url=self.url,
252+
url=log_safe_url(self.url),
253253
variable_names=sorted(variables),
254254
query_bytes=len(query.encode("utf-8")),
255255
) as fields:

src/src_py_lib/clients/sourcegraph.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,9 @@ class SourcegraphClient:
182182
`endpoint` should be the instance base URL, for example
183183
`https://sourcegraph.example.com`.
184184
185+
Plain HTTP endpoints are rejected unless `allow_insecure_http=True` is set
186+
for local development.
187+
185188
Set `trace=True` to ask Sourcegraph to retain traces for each GraphQL
186189
request. Traced requests are available through `drain_traces()` and can be
187190
fetched from the instance's Jaeger/debug endpoint with
@@ -192,12 +195,16 @@ class SourcegraphClient:
192195
token: str
193196
http: HTTPClient = field(default_factory=HTTPClient)
194197
trace: bool = False
198+
allow_insecure_http: bool = False
195199
_traces: queue.Queue[SourcegraphTrace] = field(
196200
default_factory=lambda: queue.Queue[SourcegraphTrace](), init=False, repr=False
197201
)
198202

199203
def __post_init__(self) -> None:
200-
self.endpoint = normalize_sourcegraph_endpoint(self.endpoint)
204+
self.endpoint = normalize_sourcegraph_endpoint(
205+
self.endpoint,
206+
require_https=not self.allow_insecure_http,
207+
)
201208

202209
def graphql(self, query: str, variables: Mapping[str, JSONValue] | None = None) -> JSONDict:
203210
return self._client().execute(query, variables)

src/src_py_lib/utils/http.py

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,19 @@
3333
"secret",
3434
"token",
3535
)
36+
SENSITIVE_URL_QUERY_FRAGMENTS: Final[tuple[str, ...]] = (
37+
"access_token",
38+
"api-key",
39+
"api_key",
40+
"authorization",
41+
"code",
42+
"credential",
43+
"key",
44+
"password",
45+
"secret",
46+
"signature",
47+
"token",
48+
)
3649

3750
logger = logging.getLogger(__name__)
3851

@@ -150,7 +163,7 @@ def response(
150163
"http_request",
151164
level="debug",
152165
method=method,
153-
url=_safe_url(request_url),
166+
url=log_safe_url(request_url),
154167
attempt=attempt,
155168
request_headers=_headers_for_log(request_headers),
156169
request_bytes=len(body or b""),
@@ -179,7 +192,7 @@ def response(
179192
if not self._should_retry(response.status_code, attempt):
180193
raise HTTPClientError(
181194
f"HTTP {response.status_code} for {method} "
182-
f"{_safe_url(request_url)}: {body_text}",
195+
f"{log_safe_url(request_url)}: {body_text}",
183196
status_code=response.status_code,
184197
body=body_text,
185198
headers=dict(response.headers),
@@ -203,7 +216,7 @@ def response(
203216
"timed out" if isinstance(exception, httpx.TimeoutException) else "failed"
204217
)
205218
raise HTTPClientError(
206-
f"HTTP request {failure} for {method} {_safe_url(request_url)}: "
219+
f"HTTP request {failure} for {method} {log_safe_url(request_url)}: "
207220
f"{_exception_message(exception)}"
208221
) from exception
209222
record_http_retry()
@@ -246,7 +259,7 @@ def json_response(
246259
), response
247260
except json.JSONDecodeError as exception:
248261
raise HTTPClientError(
249-
f"Invalid JSON response from {method} {_safe_url(url)}"
262+
f"Invalid JSON response from {method} {log_safe_url(url)}"
250263
) from exception
251264

252265
def _should_retry(self, status_code: int | None, attempt: int) -> bool:
@@ -276,9 +289,31 @@ def _with_query(
276289
return f"{url}{separator}{urllib.parse.urlencode(filtered)}"
277290

278291

279-
def _safe_url(url: str) -> str:
292+
def log_safe_url(url: str) -> str:
293+
"""Return a URL safe to include in logs and exception messages."""
280294
split = urllib.parse.urlsplit(url)
281-
return urllib.parse.urlunsplit((split.scheme, split.netloc, split.path, split.query, ""))
295+
return urllib.parse.urlunsplit(
296+
(split.scheme, _safe_netloc(split.netloc), split.path, _safe_query(split.query), "")
297+
)
298+
299+
300+
def _safe_netloc(netloc: str) -> str:
301+
return netloc.rsplit("@", 1)[-1]
302+
303+
304+
def _safe_query(query: str) -> str:
305+
if not query:
306+
return ""
307+
parts: list[str] = []
308+
for part in query.split("&"):
309+
key, separator, _value = part.partition("=")
310+
if _is_sensitive_query_parameter(urllib.parse.unquote_plus(key)):
311+
parts.append(f"{key}={REDACTED_HEADER_VALUE}")
312+
elif separator:
313+
parts.append(part)
314+
else:
315+
parts.append(key)
316+
return "&".join(parts)
282317

283318

284319
def _headers_for_log(headers: Mapping[str, str] | httpx.Headers) -> dict[str, str | list[str]]:
@@ -311,6 +346,11 @@ def _is_sensitive_header(name: str) -> bool:
311346
return any(fragment in lowered for fragment in SENSITIVE_HEADER_FRAGMENTS)
312347

313348

349+
def _is_sensitive_query_parameter(name: str) -> bool:
350+
lowered = name.lower()
351+
return any(fragment in lowered for fragment in SENSITIVE_URL_QUERY_FRAGMENTS)
352+
353+
314354
def _response_http_version(response: httpx.Response) -> str | None:
315355
version = response.extensions.get("http_version")
316356
if isinstance(version, bytes):

src/src_py_lib/utils/logging.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,9 @@
4343
TRACE_ID_BYTES: Final[int] = 16
4444
SPAN_ID_BYTES: Final[int] = 8
4545
MEBIBYTE: Final[int] = 1024 * 1024
46+
REDACTED_LOG_VALUE: Final[str] = "[redacted]"
4647
SECRET_FIELD_FRAGMENTS: Final[tuple[str, ...]] = (
48+
"api-key",
4749
"api_key",
4850
"authorization",
4951
"cookie",
@@ -768,7 +770,7 @@ def sanitized_config_snapshot(config: object) -> dict[str, Any]:
768770
if callable(value):
769771
continue
770772
key_text = str(key)
771-
if any(fragment in key_text.lower() for fragment in SECRET_FIELD_FRAGMENTS):
773+
if _is_sensitive_log_field(key_text):
772774
snapshot[key_text] = _secret_state(value)
773775
elif isinstance(value, Path):
774776
snapshot[key_text] = str(value)
@@ -937,13 +939,14 @@ def _http_headers(raw_headers: object) -> dict[str, str | list[str]]:
937939
if name is None or value is None:
938940
continue
939941
key = name.lower()
942+
logged_value = REDACTED_LOG_VALUE if _is_sensitive_log_field(key) else value
940943
existing = headers.get(key)
941944
if existing is None:
942-
headers[key] = value
945+
headers[key] = logged_value
943946
elif isinstance(existing, list):
944-
existing.append(value)
947+
existing.append(logged_value)
945948
else:
946-
headers[key] = [existing, value]
949+
headers[key] = [existing, logged_value]
947950
return {key: headers[key] for key in sorted(headers)}
948951

949952

@@ -971,6 +974,11 @@ def _is_hex_identifier(value: str, length: int) -> bool:
971974
)
972975

973976

977+
def _is_sensitive_log_field(name: str) -> bool:
978+
lowered = name.lower()
979+
return any(fragment in lowered for fragment in SECRET_FIELD_FRAGMENTS)
980+
981+
974982
def _secret_state(value: object) -> str:
975983
if value is None or value == "":
976984
return "missing"

tests/test_logging_http_clients.py

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,7 +1209,8 @@ def test_httpcore_response_headers_are_structured(self) -> None:
12091209
"receive_response_headers.complete "
12101210
"return_value=(b'HTTP/1.1', 200, b'OK', "
12111211
"[(b'Zed', b'last'), (b'Content-Type', b'application/json'), "
1212-
"(b'Alpha', b'first')])"
1212+
"(b'Set-Cookie', b'session=secret'), "
1213+
"(b'X-Api-Key', b'secret'), (b'Alpha', b'first')])"
12131214
)
12141215
finally:
12151216
logger = logging.getLogger(logger_name)
@@ -1226,12 +1227,17 @@ def test_httpcore_response_headers_are_structured(self) -> None:
12261227
self.assertEqual(response_headers["http_version"], "HTTP/1.1")
12271228
self.assertEqual(response_headers["status_code"], 200)
12281229
self.assertEqual(response_headers["reason_phrase"], "OK")
1229-
self.assertEqual(list(response_headers["headers"]), ["alpha", "content-type", "zed"])
1230+
self.assertEqual(
1231+
list(response_headers["headers"]),
1232+
["alpha", "content-type", "set-cookie", "x-api-key", "zed"],
1233+
)
12301234
self.assertEqual(
12311235
response_headers["headers"],
12321236
{
12331237
"alpha": "first",
12341238
"content-type": "application/json",
1239+
"set-cookie": "[redacted]",
1240+
"x-api-key": "[redacted]",
12351241
"zed": "last",
12361242
},
12371243
)
@@ -1311,8 +1317,9 @@ def handler(_request: httpx.Request) -> httpx.Response:
13111317
client = HTTPClient(max_attempts=1, transport=httpx.MockTransport(handler))
13121318
payload = client.json(
13131319
"POST",
1314-
"https://example.com/api",
1320+
"https://user:pass@example.com/api?code=oauth",
13151321
headers={"Authorization": "Bearer token"},
1322+
query={"limit": 10, "access_token": "secret", "signature": "signed"},
13161323
json_body={"hello": "world"},
13171324
)
13181325
finally:
@@ -1332,6 +1339,11 @@ def handler(_request: httpx.Request) -> httpx.Response:
13321339
self.assertFalse(any(row.get("logger") in {"httpx", "httpcore"} for row in rows))
13331340
self.assertEqual(http_request["status_code"], 200)
13341341
self.assertEqual(http_request["reason_phrase"], "OK")
1342+
self.assertEqual(
1343+
http_request["url"],
1344+
"https://example.com/api?code=[redacted]&limit=10"
1345+
"&access_token=[redacted]&signature=[redacted]",
1346+
)
13351347
self.assertEqual(http_request["request_bytes"], len(b'{"hello": "world"}'))
13361348
self.assertEqual(http_request["request_headers"]["authorization"], "[redacted]")
13371349
self.assertEqual(
@@ -1364,10 +1376,13 @@ def handler(_request: httpx.Request) -> httpx.Response:
13641376
)
13651377

13661378
with self.assertRaisesRegex(HTTPClientError, "rate limited") as raised:
1367-
client.json("GET", "https://example.com/api")
1379+
client.json("GET", "https://user:pass@example.com/api?access_token=secret")
13681380

13691381
self.assertEqual(raised.exception.status_code, 429)
13701382
self.assertEqual(raised.exception.body, "rate limited")
1383+
self.assertIn("https://example.com/api?access_token=[redacted]", str(raised.exception))
1384+
self.assertNotIn("user:pass", str(raised.exception))
1385+
self.assertNotIn("secret", str(raised.exception))
13711386

13721387

13731388
class ClientTest(unittest.TestCase):
@@ -1409,6 +1424,13 @@ def test_sourcegraph_client_builds_graphql_request(self) -> None:
14091424
self.assertEqual(http.calls[0]["url"], "https://sourcegraph.example.com/.api/graphql")
14101425
self.assertEqual(http.calls[0]["headers"], {"Authorization": "token token"})
14111426

1427+
def test_sourcegraph_client_rejects_http_endpoint_by_default(self) -> None:
1428+
with self.assertRaisesRegex(ValueError, "https:// URL"):
1429+
SourcegraphClient("http://sourcegraph.example.com", "token")
1430+
1431+
client = SourcegraphClient("http://localhost:3080", "token", allow_insecure_http=True)
1432+
self.assertEqual(client.endpoint, "http://localhost:3080")
1433+
14121434
def test_sourcegraph_client_streams_connection_nodes(self) -> None:
14131435
http = RecordingHTTP(
14141436
[
@@ -1780,7 +1802,12 @@ def test_graphql_client_emits_query_debug_events(self) -> None:
17801802
},
17811803
]
17821804
)
1783-
client = GraphQLClient("https://example.com/graphql", {}, "Example", http=http)
1805+
client = GraphQLClient(
1806+
"https://user:pass@example.com/graphql?access_token=secret&query=ok",
1807+
{},
1808+
"Example",
1809+
http=http,
1810+
)
17841811
query = """
17851812
query Items($first: Int!, $after: String, $userId: ID!) {
17861813
viewer { items { nodes { id } pageInfo { hasNextPage endCursor } } }
@@ -1822,6 +1849,10 @@ def test_graphql_client_emits_query_debug_events(self) -> None:
18221849
self.assertEqual([row["page_size"] for row in starts], [2, 2])
18231850
self.assertEqual([row["cursor_present"] for row in starts], [False, True])
18241851
self.assertEqual(starts[0]["graphql_client"], "Example")
1852+
self.assertEqual(
1853+
starts[0]["url"],
1854+
"https://example.com/graphql?access_token=[redacted]&query=ok",
1855+
)
18251856
self.assertEqual(starts[0]["variable_names"], ["after", "first", "userId"])
18261857
self.assertEqual(ends[0]["response_fields"], ["viewer"])
18271858

@@ -1946,6 +1977,10 @@ def test_github_client_can_target_github_enterprise(self) -> None:
19461977
graphql_api_url("github.example.com"), "https://github.example.com/api/graphql"
19471978
)
19481979

1980+
def test_github_client_rejects_http_enterprise_url(self) -> None:
1981+
with self.assertRaisesRegex(ValueError, "https:// URL"):
1982+
graphql_api_url("http://github.example.com")
1983+
19491984
def test_github_client_validate_queries_viewer(self) -> None:
19501985
http = RecordingHTTP([{"data": {"viewer": {"login": "alice"}}}])
19511986
client = GitHubClient("token", http=http)

0 commit comments

Comments
 (0)