Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 25 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ uv add git+https://github.com/sourcegraph/src-py-lib.git
- `src_py_lib.clients.graphql` — shared GraphQL execution with automatic cursor
pagination, batched alias lookups, and schema introspection export.
- `src_py_lib.clients.sourcegraph` — Sourcegraph GraphQL client with token
validation and shared config fields for `SRC_ENDPOINT` (default:
`https://sourcegraph.com`) and `SRC_ACCESS_TOKEN`.
validation, endpoint normalization, connection streaming, and shared config
fields for `SRC_ENDPOINT` (default: `https://sourcegraph.com`) and
`SRC_ACCESS_TOKEN`.
- `src_py_lib.clients.linear` — Linear GraphQL client with automatic cursor
handling, token validation, shared config fields, and injectable HTTP policy.
- `src_py_lib.clients.slack` — Slack Web API client with token validation,
Expand Down Expand Up @@ -70,7 +71,7 @@ import src_py_lib as src

class LinearExportConfig(src.LinearClientConfig):
output_dir: Path = src.config_field(
Path("."),
default=Path("."),
env_var="LINEAR_EXPORT_OUTPUT_DIR",
cli_flag="--output-dir",
metavar="PATH",
Expand All @@ -85,32 +86,45 @@ print(f"Writing files under {config.output_dir}")
Config precedence is: code defaults, `.env`, shell environment, then CLI
overrides. API client modules can provide shared Config base classes such as
`LinearClientConfig`, and `parse_args` resolves `op://...` references by
default. Pass a custom `argparse.ArgumentParser` to `parse_args` when a
CLI also has non-Config flags. Mark sensitive fields with `secret=True` so
snapshots do not expose resolved values.
default. `config_field(default=...)` supports aliases, store-true /
store-false command flags, optional values, numeric bounds, and string patterns
for simple CLIs. Pass a custom `argparse.ArgumentParser` to `parse_args` only when you
need parsing beyond Config fields. Help text preserves description and
argument-help newlines, and reserves enough option-column width for long config
flags. Mark sensitive fields with `secret=True` so snapshots do not expose
resolved values.

## Logging example

Configure logging once at process startup. Prefer configuring the root logger
(`logger_name=""`, the default) so project modules and shared `src_py_lib` modules
such as `src_py_lib.utils.http` are captured by the same terminal and JSONL handlers.
Use `logging()` in CLIs to configure logging, add the command field to all
structured events, and emit standard startup metadata.
structured events, and emit standard run/startup/run-end metadata.
Use `debug()`, `info()`, `warning()`, `error()`, and `critical()` for one-off
structured events. Use `event()` blocks around timed work; they emit `trace`,
`span`, and nested `parent_span` fields.
`span`, and nested `parent_span` fields. Use `start_level="debug"` to hide
noisy start events while keeping end timing visible, and
`omit_success_status=True` for very high-volume success events. Use `stage()`
for workflow context such as `stage="apply"`.
When the root logger is configured, noisy `httpx`/`httpcore` records are suppressed;
`HTTPClient` emits structured `http_request` events instead.
Set `SRC_LOG_LEVEL=INFO` for a run to omit DEBUG events from the log file.
Run-end events include HTTP attempt/byte/status/retry counters. Set
`LoggingSettings.resource_sample_interval_seconds` to emit DEBUG
`resource_sample` events and include process resource totals on run end. Set
`SRC_LOG_LEVEL=INFO` for a run to omit DEBUG events from the log file.
`LoggingConfig` includes `--verbose/-v`, `--quiet/-q`, and `--silent/-s`
shortcuts (also available as `SRC_LOG_VERBOSE`, `SRC_LOG_QUIET`, and
`SRC_LOG_SILENT`). Use `logging_settings_from_config()` to build
`LoggingSettings` from those conventions.

```python
import src_py_lib as src
from src_py_lib.clients.sourcegraph import SourcegraphClient

with src.logging({"src_token": "provided"}):
src.info("sync_started", repository_count=3)

client = SourcegraphClient("https://sourcegraph.example.com", "token")
client = src.SourcegraphClient("https://sourcegraph.example.com", "token")
data = client.graphql("query Viewer { currentUser { username } }")
```

Expand Down
26 changes: 26 additions & 0 deletions src/src_py_lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
from __future__ import annotations

import sys
from collections.abc import Callable, Mapping
from contextlib import AbstractContextManager
from pathlib import Path
from typing import Any

from src_py_lib.clients.github import GitHubClient, PullRequest, gh_cli_token, pr_ref_from_url
from src_py_lib.clients.google_sheets import (
Expand All @@ -18,6 +20,7 @@
GraphQLError,
aliased_batched_query,
introspect_schema,
stream_connection_nodes,
)
from src_py_lib.clients.linear import (
LinearClient,
Expand All @@ -31,6 +34,12 @@
SlackPacer,
slack_client_from_config,
)
from src_py_lib.clients.sourcegraph import (
SourcegraphClient,
SourcegraphClientConfig,
normalize_sourcegraph_endpoint,
sourcegraph_client_from_config,
)
from src_py_lib.utils.config import (
Config,
ConfigError,
Expand Down Expand Up @@ -63,7 +72,11 @@
log,
log_context,
logging_context,
logging_settings_from_config,
resolve_log_level_name,
stage,
startup_event,
submit_with_log_context,
warning,
)
from src_py_lib.utils.tsv import write_tsv
Expand All @@ -75,13 +88,17 @@ def logging(
command: str | None = None,
git_cwd: Path | str | None = None,
logging_config: LoggingSettings | None = None,
run_fields: Mapping[str, Any] | None = None,
run_summary: Callable[[], Mapping[str, Any]] | None = None,
) -> AbstractContextManager[Path | None]:
"""Configure standard CLI logging and emit startup metadata."""
return logging_context(
command or _script_name(),
config,
git_cwd=git_cwd,
logging_config=logging_config,
run_fields=run_fields,
run_summary=run_summary,
)


Expand Down Expand Up @@ -109,6 +126,8 @@ def _script_name() -> str:
"SlackClientConfig",
"SlackError",
"SlackPacer",
"SourcegraphClient",
"SourcegraphClientConfig",
"aliased_batched_query",
"config_field",
"config_snapshot",
Expand All @@ -131,14 +150,21 @@ def _script_name() -> str:
"load_json_cache",
"load_json_subset",
"logging",
"logging_settings_from_config",
"log",
"log_context",
"normalize_sourcegraph_endpoint",
"parse_args",
"pr_ref_from_url",
"quota_project_from_adc",
"resolve_log_level_name",
"save_json_cache",
"slack_client_from_config",
"sourcegraph_client_from_config",
"stage",
"startup_event",
"stream_connection_nodes",
"submit_with_log_context",
"warning",
"write_tsv",
]
138 changes: 125 additions & 13 deletions src/src_py_lib/clients/graphql.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import json
import re
from collections.abc import Callable, Mapping
from collections.abc import Callable, Iterator, Mapping, Sequence
from dataclasses import dataclass, field
from pathlib import Path
from typing import cast
Expand Down Expand Up @@ -113,6 +113,17 @@
class GraphQLError(RuntimeError):
"""Raised for GraphQL transport or application errors."""

def __init__(
self,
message: str,
*,
status_code: int | None = None,
is_application_error: bool = False,
) -> None:
super().__init__(message)
self.status_code = status_code
self.is_application_error = is_application_error


@dataclass
class GraphQLClient:
Expand Down Expand Up @@ -174,6 +185,49 @@ def execute_next_page(next_variables: JSONDict) -> JSONDict:
)
return data

def stream_connection_nodes(
self,
query: str,
variables: Mapping[str, JSONValue] | None = None,
*,
connection_path: Sequence[str],
page_size: int | None = None,
first_variable: str = "first",
after_variable: str = "after",
) -> Iterator[JSONDict]:
"""Stream one GraphQL connection's nodes page by page.

`connection_path` is the response path to the connection object that
contains `nodes` and `pageInfo`, for example `("viewer", "items")`.
Unlike `execute(..., follow_pages=True)`, this does not accumulate all
nodes in memory before returning.
"""
page_number = 1

def execute_page(
operation: str, page_variables: Mapping[str, JSONValue] | None
) -> JSONDict:
nonlocal page_number
data = self._execute_once(
operation,
dict(page_variables or {}),
page_number=page_number,
first_variable=first_variable,
after_variable=after_variable,
)
page_number += 1
return data

yield from stream_connection_nodes(
execute_page,
query,
variables,
connection_path=connection_path,
page_size=page_size,
first_variable=first_variable,
after_variable=after_variable,
)

def _execute_once(
self,
query: str,
Expand All @@ -200,15 +254,19 @@ def _execute_once(
payload = self.http.json("POST", self.url, headers=self.headers, json_body=body)
except HTTPClientError as exception:
raise GraphQLError(
f"{self.label} GraphQL request failed: {exception}"
f"{self.label} GraphQL request failed: {exception}",
status_code=exception.status_code,
) from exception
errors = payload.get("errors")
data = json_dict(payload.get("data"))
fields["response_fields"] = sorted(data)
if errors:
fields["graphql_errors"] = len(errors) if isinstance(errors, list) else 1
if errors and not (self.tolerate_partial_errors and data):
raise GraphQLError(f"{self.label} GraphQL errors: {errors}")
raise GraphQLError(
f"{self.label} GraphQL errors: {errors}",
is_application_error=True,
)
return data


Expand All @@ -218,6 +276,49 @@ def operation_name(query: str) -> str:
return match.group(1) if match else "anonymous"


def stream_connection_nodes(
execute: Callable[[str, Mapping[str, JSONValue] | None], JSONDict],
query: str,
variables: Mapping[str, JSONValue] | None = None,
*,
connection_path: Sequence[str],
page_size: int | None = None,
first_variable: str = "first",
after_variable: str = "after",
) -> Iterator[JSONDict]:
"""Stream one GraphQL connection's nodes through any execute callable."""
page_variables: JSONDict = dict(variables) if variables is not None else {}
if page_size is not None:
page_variables[first_variable] = page_size
query_uses_after_variable = _query_uses_variable(query, after_variable)
if query_uses_after_variable and after_variable not in page_variables:
page_variables[after_variable] = None

path = tuple(connection_path)
current_cursor = page_variables.get(after_variable)
while True:
data = execute(query, dict(page_variables))
page = _node_page_at_path(data, path)
for node in json_list(page.get("nodes")):
yield json_dict(node)

page_info = json_dict(page.get("pageInfo"))
has_next_page = page_info.get("hasNextPage")
if not isinstance(has_next_page, bool):
raise GraphQLError(
f"GraphQL pagination path {_path_label(path)} missing pageInfo.hasNextPage"
)
if not has_next_page:
return
if not query_uses_after_variable:
raise GraphQLError(
f"GraphQL query returned more pages but does not use ${after_variable}"
)
next_cursor = _next_page_cursor(page_info, path, current_cursor)
page_variables[after_variable] = next_cursor
current_cursor = next_cursor


def _int_variable(variables: JSONDict, name: str) -> int | None:
value = variables.get(name)
return value if isinstance(value, int) else None
Expand Down Expand Up @@ -301,9 +402,7 @@ def _fetch_remaining_pages(
target_page = _node_page_at_path(data, path)
target_nodes = json_list(target_page.get("nodes"))
page_info = json_dict(target_page.get("pageInfo"))
after = json_str(page_info, "endCursor")
if not after:
raise GraphQLError(f"GraphQL pagination path {'.'.join(path)} missing pageInfo.endCursor")
after = _next_page_cursor(page_info, path, variables.get(after_variable))

while after:
page_variables = dict(variables)
Expand All @@ -322,11 +421,7 @@ def _fetch_remaining_pages(
)
if not has_next_page:
return
after = json_str(next_page_info, "endCursor")
if not after:
raise GraphQLError(
f"GraphQL pagination path {'.'.join(path)} missing pageInfo.endCursor"
)
after = _next_page_cursor(next_page_info, path, after)


def _next_page_paths(data: JSONDict) -> list[tuple[str, ...]]:
Expand Down Expand Up @@ -355,10 +450,27 @@ def _node_page_at_path(data: JSONDict, path: tuple[str, ...]) -> JSONDict:
current = json_dict(current).get(key)
page = json_dict(current)
if not page:
label = ".".join(path) or "<root>"
raise GraphQLError(f"GraphQL response did not include pagination path {label}")
raise GraphQLError(f"GraphQL response did not include pagination path {_path_label(path)}")
return page


def _next_page_cursor(page_info: JSONDict, path: tuple[str, ...], current_cursor: object) -> str:
next_cursor = json_str(page_info, "endCursor")
if not next_cursor:
raise GraphQLError(
f"GraphQL pagination path {_path_label(path)} missing pageInfo.endCursor"
)
if isinstance(current_cursor, str) and next_cursor == current_cursor:
raise GraphQLError(
f"GraphQL pagination path {_path_label(path)} stalled: "
f"pageInfo.endCursor did not advance from {current_cursor!r}"
)
return next_cursor


def _path_label(path: tuple[str, ...]) -> str:
return ".".join(path) or "<root>"


def _query_uses_variable(query: str, variable: str) -> bool:
return re.search(rf"\${re.escape(variable)}\b", query) is not None
4 changes: 2 additions & 2 deletions src/src_py_lib/clients/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,11 @@ class LinearClientConfig(Config):
"""Config fields needed to build a Linear API client."""

linear_api_token: str = config_field(
"",
default="",
env_var="LINEAR_API_TOKEN",
cli_flag="--linear-api-token",
metavar="TOKEN",
help="Linear API token or op:// secret reference.",
help="Linear API token or op:// secret reference",
secret=True,
required=True,
)
Expand Down
4 changes: 2 additions & 2 deletions src/src_py_lib/clients/slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ class SlackClientConfig(Config):
"""Config fields needed to build a Slack API client."""

slack_bot_token: str = config_field(
"",
default="",
env_var="SLACK_BOT_TOKEN",
cli_flag="--slack-bot-token",
metavar="TOKEN",
help="Slack bot token or op:// secret reference.",
help="Slack bot token or op:// secret reference",
secret=True,
required=True,
)
Expand Down
Loading