Skip to content

Commit 79dd0a6

Browse files
committed
Add streaming GraphQL pagination
Add config validation for numeric comparisons and regex patterns Add log level aliases Change default log level from debug to info
1 parent 7ffd805 commit 79dd0a6

11 files changed

Lines changed: 1510 additions & 108 deletions

File tree

README.md

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ uv add git+https://github.com/sourcegraph/src-py-lib.git
3434
- `src_py_lib.clients.graphql` — shared GraphQL execution with automatic cursor
3535
pagination, batched alias lookups, and schema introspection export.
3636
- `src_py_lib.clients.sourcegraph` — Sourcegraph GraphQL client with token
37-
validation and shared config fields for `SRC_ENDPOINT` (default:
38-
`https://sourcegraph.com`) and `SRC_ACCESS_TOKEN`.
37+
validation, endpoint normalization, connection streaming, and shared config
38+
fields for `SRC_ENDPOINT` (default: `https://sourcegraph.com`) and
39+
`SRC_ACCESS_TOKEN`.
3940
- `src_py_lib.clients.linear` — Linear GraphQL client with automatic cursor
4041
handling, token validation, shared config fields, and injectable HTTP policy.
4142
- `src_py_lib.clients.slack` — Slack Web API client with token validation,
@@ -70,7 +71,7 @@ import src_py_lib as src
7071

7172
class LinearExportConfig(src.LinearClientConfig):
7273
output_dir: Path = src.config_field(
73-
Path("."),
74+
default=Path("."),
7475
env_var="LINEAR_EXPORT_OUTPUT_DIR",
7576
cli_flag="--output-dir",
7677
metavar="PATH",
@@ -85,32 +86,45 @@ print(f"Writing files under {config.output_dir}")
8586
Config precedence is: code defaults, `.env`, shell environment, then CLI
8687
overrides. API client modules can provide shared Config base classes such as
8788
`LinearClientConfig`, and `parse_args` resolves `op://...` references by
88-
default. Pass a custom `argparse.ArgumentParser` to `parse_args` when a
89-
CLI also has non-Config flags. Mark sensitive fields with `secret=True` so
90-
snapshots do not expose resolved values.
89+
default. `config_field(default=...)` supports aliases, store-true /
90+
store-false command flags, optional values, numeric bounds, and string patterns
91+
for simple CLIs. Pass a custom `argparse.ArgumentParser` to `parse_args` only when you
92+
need parsing beyond Config fields. Help text preserves description and
93+
argument-help newlines, and reserves enough option-column width for long config
94+
flags. Mark sensitive fields with `secret=True` so snapshots do not expose
95+
resolved values.
9196

9297
## Logging example
9398

9499
Configure logging once at process startup. Prefer configuring the root logger
95100
(`logger_name=""`, the default) so project modules and shared `src_py_lib` modules
96101
such as `src_py_lib.utils.http` are captured by the same terminal and JSONL handlers.
97102
Use `logging()` in CLIs to configure logging, add the command field to all
98-
structured events, and emit standard startup metadata.
103+
structured events, and emit standard run/startup/run-end metadata.
99104
Use `debug()`, `info()`, `warning()`, `error()`, and `critical()` for one-off
100105
structured events. Use `event()` blocks around timed work; they emit `trace`,
101-
`span`, and nested `parent_span` fields.
106+
`span`, and nested `parent_span` fields. Use `start_level="debug"` to hide
107+
noisy start events while keeping end timing visible, and
108+
`omit_success_status=True` for very high-volume success events. Use `stage()`
109+
for workflow context such as `stage="apply"`.
102110
When the root logger is configured, noisy `httpx`/`httpcore` records are suppressed;
103111
`HTTPClient` emits structured `http_request` events instead.
104-
Set `SRC_LOG_LEVEL=INFO` for a run to omit DEBUG events from the log file.
112+
Run-end events include HTTP attempt/byte/status/retry counters. Set
113+
`LoggingSettings.resource_sample_interval_seconds` to emit DEBUG
114+
`resource_sample` events and include process resource totals on run end. Set
115+
`SRC_LOG_LEVEL=INFO` for a run to omit DEBUG events from the log file.
116+
`LoggingConfig` includes `--verbose/-v`, `--quiet/-q`, and `--silent/-s`
117+
shortcuts (also available as `SRC_LOG_VERBOSE`, `SRC_LOG_QUIET`, and
118+
`SRC_LOG_SILENT`). Use `logging_settings_from_config()` to build
119+
`LoggingSettings` from those conventions.
105120

106121
```python
107122
import src_py_lib as src
108-
from src_py_lib.clients.sourcegraph import SourcegraphClient
109123

110124
with src.logging({"src_token": "provided"}):
111125
src.info("sync_started", repository_count=3)
112126

113-
client = SourcegraphClient("https://sourcegraph.example.com", "token")
127+
client = src.SourcegraphClient("https://sourcegraph.example.com", "token")
114128
data = client.graphql("query Viewer { currentUser { username } }")
115129
```
116130

src/src_py_lib/__init__.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
from __future__ import annotations
44

55
import sys
6+
from collections.abc import Callable, Mapping
67
from contextlib import AbstractContextManager
78
from pathlib import Path
9+
from typing import Any
810

911
from src_py_lib.clients.github import GitHubClient, PullRequest, gh_cli_token, pr_ref_from_url
1012
from src_py_lib.clients.google_sheets import (
@@ -18,6 +20,7 @@
1820
GraphQLError,
1921
aliased_batched_query,
2022
introspect_schema,
23+
stream_connection_nodes,
2124
)
2225
from src_py_lib.clients.linear import (
2326
LinearClient,
@@ -31,6 +34,12 @@
3134
SlackPacer,
3235
slack_client_from_config,
3336
)
37+
from src_py_lib.clients.sourcegraph import (
38+
SourcegraphClient,
39+
SourcegraphClientConfig,
40+
normalize_sourcegraph_endpoint,
41+
sourcegraph_client_from_config,
42+
)
3443
from src_py_lib.utils.config import (
3544
Config,
3645
ConfigError,
@@ -63,7 +72,11 @@
6372
log,
6473
log_context,
6574
logging_context,
75+
logging_settings_from_config,
76+
resolve_log_level_name,
77+
stage,
6678
startup_event,
79+
submit_with_log_context,
6780
warning,
6881
)
6982
from src_py_lib.utils.tsv import write_tsv
@@ -75,13 +88,17 @@ def logging(
7588
command: str | None = None,
7689
git_cwd: Path | str | None = None,
7790
logging_config: LoggingSettings | None = None,
91+
run_fields: Mapping[str, Any] | None = None,
92+
run_summary: Callable[[], Mapping[str, Any]] | None = None,
7893
) -> AbstractContextManager[Path | None]:
7994
"""Configure standard CLI logging and emit startup metadata."""
8095
return logging_context(
8196
command or _script_name(),
8297
config,
8398
git_cwd=git_cwd,
8499
logging_config=logging_config,
100+
run_fields=run_fields,
101+
run_summary=run_summary,
85102
)
86103

87104

@@ -109,6 +126,8 @@ def _script_name() -> str:
109126
"SlackClientConfig",
110127
"SlackError",
111128
"SlackPacer",
129+
"SourcegraphClient",
130+
"SourcegraphClientConfig",
112131
"aliased_batched_query",
113132
"config_field",
114133
"config_snapshot",
@@ -131,14 +150,21 @@ def _script_name() -> str:
131150
"load_json_cache",
132151
"load_json_subset",
133152
"logging",
153+
"logging_settings_from_config",
134154
"log",
135155
"log_context",
156+
"normalize_sourcegraph_endpoint",
136157
"parse_args",
137158
"pr_ref_from_url",
138159
"quota_project_from_adc",
160+
"resolve_log_level_name",
139161
"save_json_cache",
140162
"slack_client_from_config",
163+
"sourcegraph_client_from_config",
164+
"stage",
141165
"startup_event",
166+
"stream_connection_nodes",
167+
"submit_with_log_context",
142168
"warning",
143169
"write_tsv",
144170
]

src/src_py_lib/clients/graphql.py

Lines changed: 125 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import json
66
import re
7-
from collections.abc import Callable, Mapping
7+
from collections.abc import Callable, Iterator, Mapping, Sequence
88
from dataclasses import dataclass, field
99
from pathlib import Path
1010
from typing import cast
@@ -113,6 +113,17 @@
113113
class GraphQLError(RuntimeError):
114114
"""Raised for GraphQL transport or application errors."""
115115

116+
def __init__(
117+
self,
118+
message: str,
119+
*,
120+
status_code: int | None = None,
121+
is_application_error: bool = False,
122+
) -> None:
123+
super().__init__(message)
124+
self.status_code = status_code
125+
self.is_application_error = is_application_error
126+
116127

117128
@dataclass
118129
class GraphQLClient:
@@ -174,6 +185,49 @@ def execute_next_page(next_variables: JSONDict) -> JSONDict:
174185
)
175186
return data
176187

188+
def stream_connection_nodes(
189+
self,
190+
query: str,
191+
variables: Mapping[str, JSONValue] | None = None,
192+
*,
193+
connection_path: Sequence[str],
194+
page_size: int | None = None,
195+
first_variable: str = "first",
196+
after_variable: str = "after",
197+
) -> Iterator[JSONDict]:
198+
"""Stream one GraphQL connection's nodes page by page.
199+
200+
`connection_path` is the response path to the connection object that
201+
contains `nodes` and `pageInfo`, for example `("viewer", "items")`.
202+
Unlike `execute(..., follow_pages=True)`, this does not accumulate all
203+
nodes in memory before returning.
204+
"""
205+
page_number = 1
206+
207+
def execute_page(
208+
operation: str, page_variables: Mapping[str, JSONValue] | None
209+
) -> JSONDict:
210+
nonlocal page_number
211+
data = self._execute_once(
212+
operation,
213+
dict(page_variables or {}),
214+
page_number=page_number,
215+
first_variable=first_variable,
216+
after_variable=after_variable,
217+
)
218+
page_number += 1
219+
return data
220+
221+
yield from stream_connection_nodes(
222+
execute_page,
223+
query,
224+
variables,
225+
connection_path=connection_path,
226+
page_size=page_size,
227+
first_variable=first_variable,
228+
after_variable=after_variable,
229+
)
230+
177231
def _execute_once(
178232
self,
179233
query: str,
@@ -200,15 +254,19 @@ def _execute_once(
200254
payload = self.http.json("POST", self.url, headers=self.headers, json_body=body)
201255
except HTTPClientError as exception:
202256
raise GraphQLError(
203-
f"{self.label} GraphQL request failed: {exception}"
257+
f"{self.label} GraphQL request failed: {exception}",
258+
status_code=exception.status_code,
204259
) from exception
205260
errors = payload.get("errors")
206261
data = json_dict(payload.get("data"))
207262
fields["response_fields"] = sorted(data)
208263
if errors:
209264
fields["graphql_errors"] = len(errors) if isinstance(errors, list) else 1
210265
if errors and not (self.tolerate_partial_errors and data):
211-
raise GraphQLError(f"{self.label} GraphQL errors: {errors}")
266+
raise GraphQLError(
267+
f"{self.label} GraphQL errors: {errors}",
268+
is_application_error=True,
269+
)
212270
return data
213271

214272

@@ -218,6 +276,49 @@ def operation_name(query: str) -> str:
218276
return match.group(1) if match else "anonymous"
219277

220278

279+
def stream_connection_nodes(
280+
execute: Callable[[str, Mapping[str, JSONValue] | None], JSONDict],
281+
query: str,
282+
variables: Mapping[str, JSONValue] | None = None,
283+
*,
284+
connection_path: Sequence[str],
285+
page_size: int | None = None,
286+
first_variable: str = "first",
287+
after_variable: str = "after",
288+
) -> Iterator[JSONDict]:
289+
"""Stream one GraphQL connection's nodes through any execute callable."""
290+
page_variables: JSONDict = dict(variables) if variables is not None else {}
291+
if page_size is not None:
292+
page_variables[first_variable] = page_size
293+
query_uses_after_variable = _query_uses_variable(query, after_variable)
294+
if query_uses_after_variable and after_variable not in page_variables:
295+
page_variables[after_variable] = None
296+
297+
path = tuple(connection_path)
298+
current_cursor = page_variables.get(after_variable)
299+
while True:
300+
data = execute(query, dict(page_variables))
301+
page = _node_page_at_path(data, path)
302+
for node in json_list(page.get("nodes")):
303+
yield json_dict(node)
304+
305+
page_info = json_dict(page.get("pageInfo"))
306+
has_next_page = page_info.get("hasNextPage")
307+
if not isinstance(has_next_page, bool):
308+
raise GraphQLError(
309+
f"GraphQL pagination path {_path_label(path)} missing pageInfo.hasNextPage"
310+
)
311+
if not has_next_page:
312+
return
313+
if not query_uses_after_variable:
314+
raise GraphQLError(
315+
f"GraphQL query returned more pages but does not use ${after_variable}"
316+
)
317+
next_cursor = _next_page_cursor(page_info, path, current_cursor)
318+
page_variables[after_variable] = next_cursor
319+
current_cursor = next_cursor
320+
321+
221322
def _int_variable(variables: JSONDict, name: str) -> int | None:
222323
value = variables.get(name)
223324
return value if isinstance(value, int) else None
@@ -301,9 +402,7 @@ def _fetch_remaining_pages(
301402
target_page = _node_page_at_path(data, path)
302403
target_nodes = json_list(target_page.get("nodes"))
303404
page_info = json_dict(target_page.get("pageInfo"))
304-
after = json_str(page_info, "endCursor")
305-
if not after:
306-
raise GraphQLError(f"GraphQL pagination path {'.'.join(path)} missing pageInfo.endCursor")
405+
after = _next_page_cursor(page_info, path, variables.get(after_variable))
307406

308407
while after:
309408
page_variables = dict(variables)
@@ -322,11 +421,7 @@ def _fetch_remaining_pages(
322421
)
323422
if not has_next_page:
324423
return
325-
after = json_str(next_page_info, "endCursor")
326-
if not after:
327-
raise GraphQLError(
328-
f"GraphQL pagination path {'.'.join(path)} missing pageInfo.endCursor"
329-
)
424+
after = _next_page_cursor(next_page_info, path, after)
330425

331426

332427
def _next_page_paths(data: JSONDict) -> list[tuple[str, ...]]:
@@ -355,10 +450,27 @@ def _node_page_at_path(data: JSONDict, path: tuple[str, ...]) -> JSONDict:
355450
current = json_dict(current).get(key)
356451
page = json_dict(current)
357452
if not page:
358-
label = ".".join(path) or "<root>"
359-
raise GraphQLError(f"GraphQL response did not include pagination path {label}")
453+
raise GraphQLError(f"GraphQL response did not include pagination path {_path_label(path)}")
360454
return page
361455

362456

457+
def _next_page_cursor(page_info: JSONDict, path: tuple[str, ...], current_cursor: object) -> str:
458+
next_cursor = json_str(page_info, "endCursor")
459+
if not next_cursor:
460+
raise GraphQLError(
461+
f"GraphQL pagination path {_path_label(path)} missing pageInfo.endCursor"
462+
)
463+
if isinstance(current_cursor, str) and next_cursor == current_cursor:
464+
raise GraphQLError(
465+
f"GraphQL pagination path {_path_label(path)} stalled: "
466+
f"pageInfo.endCursor did not advance from {current_cursor!r}"
467+
)
468+
return next_cursor
469+
470+
471+
def _path_label(path: tuple[str, ...]) -> str:
472+
return ".".join(path) or "<root>"
473+
474+
363475
def _query_uses_variable(query: str, variable: str) -> bool:
364476
return re.search(rf"\${re.escape(variable)}\b", query) is not None

src/src_py_lib/clients/linear.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,11 @@ class LinearClientConfig(Config):
4949
"""Config fields needed to build a Linear API client."""
5050

5151
linear_api_token: str = config_field(
52-
"",
52+
default="",
5353
env_var="LINEAR_API_TOKEN",
5454
cli_flag="--linear-api-token",
5555
metavar="TOKEN",
56-
help="Linear API token or op:// secret reference.",
56+
help="Linear API token or op:// secret reference",
5757
secret=True,
5858
required=True,
5959
)

src/src_py_lib/clients/slack.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ class SlackClientConfig(Config):
2727
"""Config fields needed to build a Slack API client."""
2828

2929
slack_bot_token: str = config_field(
30-
"",
30+
default="",
3131
env_var="SLACK_BOT_TOKEN",
3232
cli_flag="--slack-bot-token",
3333
metavar="TOKEN",
34-
help="Slack bot token or op:// secret reference.",
34+
help="Slack bot token or op:// secret reference",
3535
secret=True,
3636
required=True,
3737
)

0 commit comments

Comments
 (0)