From 3c15b6f74f558300ced71e3f2e98451f59b384e4 Mon Sep 17 00:00:00 2001
From: Paul Reinlein <paul.reinlein@datadoghq.com>
Date: Tue, 5 May 2026 09:21:08 -0400
Subject: [PATCH] Add explain-lading-config skill

---
 .claude/skills/explain-lading-config/SKILL.md | 225 ++++++++++++++++++
 .../scripts/resolve-lading-config.sh          | 207 ++++++++++++++++
 .../scripts/validate-lading-checkout.sh       |  31 +++
 .github/CODEOWNERS                            |   3 +
 4 files changed, 466 insertions(+)
 create mode 100644 .claude/skills/explain-lading-config/SKILL.md
 create mode 100755 .claude/skills/explain-lading-config/scripts/resolve-lading-config.sh
 create mode 100755 .claude/skills/explain-lading-config/scripts/validate-lading-checkout.sh
diff --git a/.claude/skills/explain-lading-config/SKILL.md b/.claude/skills/explain-lading-config/SKILL.md
new file mode 100644
index 000000000000..d387a59b558e
--- /dev/null
+++ b/.claude/skills/explain-lading-config/SKILL.md
@@ -0,0 +1,225 @@
+---
+name: explain-lading-config
+description: Explains a lading.yaml config file from the regression test suite, using the lading Rust source as ground truth for field meanings and defaults.
+user_invocable: true
+argument-hint: "[experiment name]"
+---
+
+# explain-lading-config
+
+Explain what a lading regression test config does, grounded in lading source code.
+
+## Quick Start
+
+```bash
+# 1. Verify the lading checkout exists and is on a known branch
+bash .claude/skills/explain-lading-config/scripts/validate-lading-checkout.sh
+
+# 2. Resolve $ARGUMENTS to a lading.yaml path (exact/substring/glob/path)
+bash .claude/skills/explain-lading-config/scripts/resolve-lading-config.sh "$ARGUMENTS"
+
+# 3. Read the resolved file, then grep source structs in parallel:
+grep -n 'pub struct Config\|pub enum Config\|pub enum\|fn default_\|impl Default for\|#\[serde(default' \
+    ~/dd/lading/lading/src/generator/<type>.rs
+```
+
+Then explain with defaults resolved to concrete values (not function names).
+Full workflow below.
+
+## Step 1: Validate lading checkout
+
+Run `.claude/skills/explain-lading-config/scripts/validate-lading-checkout.sh`.
+
+- Exit 0: script prints the current branch on stdout. If it is not `main`, warn
+  the user that explanations are grounded in a non-main branch, then continue.
+- Exit non-zero: the script prints a suggested `git clone` command on stderr.
+  Relay that to the user and stop.
+
+Override the checkout location with `LADING_DIR` if needed.
+
+## Step 2: Determine target file
+
+Use `.claude/skills/explain-lading-config/scripts/resolve-lading-config.sh` to
+avoid ad-hoc matching. The script enumerates experiments under
+`test/regression/cases/` (active) and `test/regression/x-disabled-cases/`
+(disabled). Each experiment is a `<case>/lading/lading.yaml` addressed by its
+case-directory name; disabled rows are flagged with a trailing `(disabled)`
+column in the listing. `ebpf/cases/` (split-mode) and
+`ebpf/config-only/cases/` are intentionally out of scope; if a user asks about
+one, tell them this skill doesn't cover it yet.
+
+The script handles path-like inputs, substring case names, and shell
+globs (`*`, `?`).
+
+**If `$ARGUMENTS` is provided:** run `resolve-lading-config.sh "$ARGUMENTS"`.
+- Exit 0: stdout is the resolved absolute path; read it.
+- Exit 3 (ambiguous): stderr lists candidates.
+  - **≤ 4 candidates:** use `AskUserQuestion` to pick one, then read that
+    path.
+  - **> 4 candidates** (a broad substring like `i` can match 20+): do not
+    try to force them into `AskUserQuestion`. Print the experiment names
+    as a short bulleted list and ask the user to narrow the query and
+    re-invoke `/explain-lading-config <name>`.
+- Exit 2 (not found): stderr may include "did you mean?" suggestions — if
+  present, offer the suggestions to the user via `AskUserQuestion` (up to
+  4 options) or as a short list; if not, relay the error and stop.
+- Exit 4 (wrong repo): the script is being run from outside the agent repo.
+  Relay the error verbatim and stop — the user needs to `cd` into the repo.
+
+**If the resolved path contains `/x-disabled-cases/`**, flag this explicitly
+in the explanation — the experiment exists on disk but is not currently
+executed by SMP. Otherwise a user may assume it's live.
+
+**Reading very large configs:** multi-sender configs (e.g.
+`uds_dogstatsd_20mb_12k_contexts_20_senders`, ~870 lines) are usually
+block-copies of one template with a few fields varying (typically only
+`seed`). Before a full `Read`, check size and duplication:
+
+```bash
+wc -l <path>                                    # scale check
+grep -c '^  - ' <path>                          # top-level list entries
+yq '.generator | length' <path> 2>/dev/null     # if yq is present
+```
+
+For highly-duplicated configs, `Read` only the first block (plus the
+blackhole/target_metrics sections) and report the generator as
+"N identical copies, seed differs" instead of walking every block. Spot-
+check one later block to confirm uniformity.
+
+**If `$ARGUMENTS` is omitted:** run `resolve-lading-config.sh` with no
+argument. It emits `<experiment>\t<path>` lines for every discovered config.
+
+Print the experiment names as a plain bulleted list to the user (preserving
+the `(disabled)` markers) and ask them to type the name (or re-invoke the
+skill with `/explain-lading-config <name>`).
+
+## Step 3: Read the lading codebase for context
+
+Before explaining, read the relevant source files from the lading checkout
+to understand config fields. Do NOT rely on embedded knowledge — always
+read the source. Source paths below use `~/dd/lading/` for readability;
+substitute `$LADING_DIR` (from Step 1) if the user overrode the location.
+
+If an expected source file doesn't exist (lading may have renamed or
+restructured), fall back to `grep -rln 'pub struct Config' ~/dd/lading/lading/src/generator/` (
+or `blackhole/`, `target_metrics/`) to locate the current file, then proceed as normal.
+Mention the rename in the explanation so the user knows the skill's default paths are out of date.
+
+First, parse the config to see which sections are populated (`generator`,
+`blackhole`, `target_metrics`). Only read source files for sections that
+actually exist. In particular: **if `generator: []`, skip the generator
+source reads entirely** — there is nothing to ground.
+
+1. If `generator` has entries: read `~/dd/lading/lading/src/generator.rs` to
+   identify generator types, then for each type used read
+   `~/dd/lading/lading/src/generator/<type>.rs` (config struct, field
+   meanings, defaults).
+2. If a payload variant is referenced (e.g. `dogstatsd`,
+   `opentelemetry_metrics`), find its module. The variant-to-module
+   mapping lives in `~/dd/lading/lading_payload/src/lib.rs` — grep for
+   the variant's PascalCase enum name (e.g. `OpentelemetryMetrics`) and
+   follow the `crate::…` path it points to. Common mappings:
+   - `dogstatsd` → `lading_payload/src/dogstatsd.rs`
+   - `opentelemetry_metrics` → `lading_payload/src/opentelemetry/metric.rs`
+   - `opentelemetry_logs` → `lading_payload/src/opentelemetry/log.rs`
+   - `datadog_logs` → `lading_payload/src/datadog_logs.rs`
+   **Variant serialization forms:**
+   - `variant: "syslog5424"` (plain string) — the enum variant carries no
+     config fields (unit/empty struct). There are no knobs to explain; the
+     module itself encodes all behaviour.
+   - `variant: { opentelemetry_metrics: {} }` (mapping with empty body) —
+     the variant has a `Config` struct and is using `Config::default()`.
+     Follow `impl Default for Config` and any nested `Default` impls.
+   - `variant: { dogstatsd: { contexts: …, kind_weights: … } }` — explicit
+     field overrides; report them alongside the defaults for any omitted
+     sibling fields.
+3. If blackholes are configured, read:
+   - `~/dd/lading/lading/src/blackhole.rs` — blackhole enum
+   - `~/dd/lading/lading/src/blackhole/<type>.rs` — per-blackhole config structs
+4. If `target_metrics` has entries, read:
+   - `~/dd/lading/lading/src/target_metrics/prometheus.rs` — `uri`, `metrics`, `tags`
+   - `~/dd/lading/lading/src/target_metrics/expvar.rs` — `uri`, `vars`, `tags`
+   (Other scrapers live alongside in `target_metrics/`.)
+
+Read these files in parallel where possible.
+
+### Reading strategy: grep before Read
+
+Lading's source files can be hundreds of lines. To ground defaults without
+reading whole files, use this invariant: every default in lading follows the
+pattern `#[serde(default = "default_foo")]` → `fn default_foo() -> T { ... }`.
+`Default` impls (for payload types like `KindWeights`, `MetricWeights`) are
+adjacent to their struct definitions.
+
+Efficient approach for a generator/blackhole/target_metrics type:
+
+```bash
+# Locate top-level Config + all named defaults + nested enum variants in one pass
+grep -n 'pub struct Config\|pub enum Config\|pub enum\|fn default_\|impl Default for\|#\[serde(default' \
+    ~/dd/lading/lading/src/generator/<type>.rs
+```
+
+Include `pub enum` — some generators' top-level `Config` is an enum
+(e.g. `file_gen::Config` discriminates on `traditional` / `logrotate` /
+`logrotate_fs`), and several structs hold nested enums (`http::Method`,
+`blackhole::datadog::Variant`) that the YAML maps into with nested keys.
+
+Then `Read` only the line ranges that matter (struct/enum body + default fns).
+Reserve full-file reads for cases where the struct body references types
+you still need to understand.
+
+## Step 4: Explain the config
+
+Using the lading source as ground truth, provide a structured explanation:
+
+### Generator summary
+
+For each generator entry, include the fields relevant to that generator type:
+- Type and protocol/variant
+- Target endpoint (`addr`, `path`, `target_uri`) — if network-based
+- Throughput (`bytes_per_second`) — if network-based
+- Parallel connections or sender count — if applicable
+- Payload characteristics (contexts, tag counts, metric type weights, body sizes, `kind_weights`) — if applicable
+- Operation rates (e.g. `open_per_second`, `rename_per_second`) — for filesystem generators
+- Container churn rate = `number_of_containers / max_lifetime_seconds` — for container generators (report containers recycled per second, since that's what the agent sees)
+- Default values for any omitted fields. **Always resolve the default to a
+  concrete value**, not just the function name — the user wants to know
+  what actually runs. Follow `#[serde(default = "default_foo")]` → the body
+  of `fn default_foo()`, or the `impl Default` block, and report the literal
+  (e.g. `block_cache_method: Fixed (via lading_payload::block::default_cache_method)`).
+  If the default is a nested struct with its own defaults, recurse one level;
+  cite further nested defaults by path rather than expanding the whole tree.
+- Cache config (`maximum_prebuild_cache_size_bytes`, `block_cache_method`) — if applicable
+
+Skip fields that don't exist on the generator type. The Rust `Config` struct is authoritative for which fields exist.
+
+### Aggregate load
+
+Summarize total load across all generators. Pick the right unit for the
+generator type — don't invent a bytes/s number for non-network load:
+- **Network** (`http`, `tcp`, `udp`, `unix_*`, `grpc`, `splunk_hec`): sum
+  `bytes_per_second` across generators.
+- **Filesystem** (`file_tree`, `file_gen`): report operation rates
+  (`open_per_second`, `rename_per_second`) or the load profile.
+- **Container** (`container`): report the churn rate
+  (`number_of_containers / max_lifetime_seconds` containers recycled per
+  second); throughput isn't meaningful.
+- **Mixed**: report each dimension separately.
+
+### Blackhole sinks
+
+What endpoints absorb target output, any simulated latency.
+
+### Target metrics
+
+What telemetry is scraped from the target (if configured). Per scraper:
+type (`prometheus`, `expvar`, …), URI, and any tags (e.g. `sub_agent`).
+**Do not enumerate large var lists verbatim** — when `vars:` has many
+entries (common for `expvar`), summarize by category (forwarder,
+serializer, writers, `memstats/*`, etc.) and cite the line range for
+follow-up. A config with no `generator` but heavy `target_metrics` is
+typically an idle-baseline experiment measuring the agent's self-cost.
+
+### Source references
+
+Cite the specific lading source files read, with relative paths from `~/dd/lading/`, so the user can dig deeper.
diff --git a/.claude/skills/explain-lading-config/scripts/resolve-lading-config.sh b/.claude/skills/explain-lading-config/scripts/resolve-lading-config.sh
new file mode 100755
index 000000000000..b710b03583f5
--- /dev/null
+++ b/.claude/skills/explain-lading-config/scripts/resolve-lading-config.sh
@@ -0,0 +1,207 @@
+#!/usr/bin/env bash
+# Resolve the target lading.yaml for the explain-lading-config skill.
+#
+# Scope: experiments under `test/regression/cases/` and
+# `test/regression/x-disabled-cases/`.
+# The skill deliberately does not enumerate `ebpf/cases/` (split-mode)
+# or `ebpf/config-only/cases/` yet. They have different semantics.
+#
+# Usage:
+#   resolve-lading-config.sh [ARG]
+#
+# When ARG is omitted, emits one `<experiment>\t<path>` line per
+# discovered lading.yaml (tab-separated). Callers list these to the user.
+#
+# When ARG is provided, resolves it to exactly one path and prints that path.
+# Exits non-zero with candidate paths on stderr if the argument is ambiguous,
+# or with "not found" if nothing matches.
+#
+# Accepted ARG forms:
+#   - absolute or relative path to a lading.yaml
+#   - path containing '/' — treated as a file path
+#   - substring of an experiment name (plain substring, no shell glob
+#     characters required)
+#   - glob with '*' or '?' — matched against experiment names, not paths
+#
+# Experiment name = the case directory name, i.e. the parent of `lading/`
+# in `test/regression/cases/<case>/lading/lading.yaml`.
+
+set -euo pipefail
+
+repo_root() {
+    git rev-parse --show-toplevel 2>/dev/null || pwd
+}
+
+# Exit early with a clear error if we cannot locate the regression suite.
+# Otherwise a user running this from the wrong directory (e.g. /tmp) would
+# see a silent "no matches" for every query.
+require_regression_dir() {
+    local root
+    root="$(repo_root)"
+    if [[ ! -d "$root/test/regression/cases" ]]; then
+        cat >&2 <<EOF
+no test/regression/cases/ directory under $root
+
+This script must run from inside the DataDog/datadog-agent repository.
+\`cd\` into the repo (or a subdirectory of it) and re-run.
+EOF
+        exit 4
+    fi
+}
+
+# Emit NUL-delimited paths for all lading.yaml files under
+# test/regression/cases (active) and test/regression/x-disabled-cases.
+find_configs() {
+    local root
+    root="$(repo_root)"
+    local d
+    for d in cases x-disabled-cases; do
+        [[ -d "$root/test/regression/$d" ]] || continue
+        find "$root/test/regression/$d" -type f -name lading.yaml -print0
+    done
+}
+
+# Extract the display name for a lading.yaml path:
+# .../{cases,x-disabled-cases}/<case>/lading/lading.yaml -> <case>
+display_name() {
+    local path="$1"
+    basename "$(dirname "$(dirname "$path")")"
+}
+
+list_all() {
+    local path
+    while IFS= read -r -d '' path; do
+        printf '%s\t%s\n' "$(display_name "$path")" "$path"
+    done < <(find_configs) | sort
+}
+
+# Render `<name>\t<path>` rows with a trailing `(disabled)` column for
+# rows that live under `x-disabled-cases/`. The first two fields stay
+# tab-separated so existing parsers still work.
+annotate_for_display() {
+    local name path
+    while IFS=$'\t' read -r name path; do
+        [[ -z "$name" ]] && continue
+        if [[ "$path" == */x-disabled-cases/* ]]; then
+            printf '%s\t%s\t%s\n' "$name" "$path" "(disabled)"
+        else
+            printf '%s\t%s\n' "$name" "$path"
+        fi
+    done
+}
+
+# Emit up to three "did you mean?" suggestions on stderr.
+# Scoring: count of tokens from the query (split on `_`, space, or `-`) that
+# appear as substrings of the candidate name. Matching is case-insensitive
+# because all experiment names are lowercase. Ties broken by shorter name.
+suggest_near_matches() {
+    local query="$1" all="$2"
+    local lower_query
+    lower_query="$(printf '%s' "$query" | tr '[:upper:]' '[:lower:]')"
+    local IFS_=$IFS
+    # shellcheck disable=SC2206
+    IFS=$' \t_-' read -ra tokens <<< "$lower_query"
+    IFS=$IFS_
+    local scored="" name path score token
+    while IFS=$'\t' read -r name path; do
+        [[ -z "$name" ]] && continue
+        score=0
+        for token in "${tokens[@]}"; do
+            [[ -n "$token" && "$name" == *"$token"* ]] && score=$((score + 1))
+        done
+        if [[ "$score" -gt 0 ]]; then
+            scored+="$score"$'\t'"$name"$'\n'
+        fi
+    done <<< "$all"
+    if [[ -n "$scored" ]]; then
+        echo "did you mean?" >&2
+        printf '%s' "$scored" | sort -k1,1rn -k2,2 | head -3 | cut -f2 | sed 's/^/  /' >&2
+    fi
+}
+
+resolve_one() {
+    local arg="$1"
+
+    # 1) Direct path — resolve and return if the file exists AND looks like a
+    #    lading config. We reject arbitrary existing files (e.g. /etc/hosts)
+    #    to avoid the downstream explainer operating on something unrelated.
+    if [[ "$arg" == */* || "$arg" == *.yaml ]]; then
+        # Expand a leading `~/` by hand — [[ -f ]] does not tilde-expand a
+        # quoted literal, and `${arg#~/}` has subtle tilde-expansion behaviour
+        # in the pattern, so just slice off the first two characters.
+        case "$arg" in
+            "~/"*) arg="$HOME/${arg:2}" ;;
+            "~")   arg="$HOME" ;;
+        esac
+        if [[ ! -f "$arg" ]]; then
+            echo "not found: $arg" >&2
+            return 2
+        fi
+        if [[ "$(basename "$arg")" != "lading.yaml" ]] \
+           && ! head -200 "$arg" 2>/dev/null | grep -qE '^(generator|blackhole|target_metrics)\s*:'; then
+            echo "not a lading config: $arg" >&2
+            echo "expected a file named 'lading.yaml' or one containing a top-level" >&2
+            echo "'generator:', 'blackhole:', or 'target_metrics:' key" >&2
+            return 2
+        fi
+        printf '%s\n' "$(cd "$(dirname "$arg")" && pwd)/$(basename "$arg")"
+        return 0
+    fi
+
+    # 2) Match against experiment names.
+    #
+    # Matching precedence:
+    #   a. Exact name match wins outright (even if the arg is a substring of
+    #      other names — e.g. `uds_dogstatsd_to_api` should not be ambiguous
+    #      just because `uds_dogstatsd_to_api_v3` exists).
+    #   b. Else if the arg contains glob metachars, shell-glob against names.
+    #   c. Else plain substring match (so `security_mean` matches
+    #      `quality_gate_security_mean_fs_load`).
+    # Matching is case-insensitive — all experiment names are lowercase in
+    # the repo, so treating the arg as lowercase adds ergonomics (typing
+    # `QUALITY_GATE_IDLE` still works) without ambiguity.
+    local all exact matches path name lower_arg lower_name
+    all="$(list_all)"
+    exact=""
+    matches=""
+    lower_arg="$(printf '%s' "$arg" | tr '[:upper:]' '[:lower:]')"
+    while IFS=$'\t' read -r name path; do
+        lower_name="$(printf '%s' "$name" | tr '[:upper:]' '[:lower:]')"
+        if [[ "$lower_name" == "$lower_arg" ]]; then
+            exact="$name"$'\t'"$path"$'\n'
+        fi
+        if [[ "$lower_arg" == *[*?[]* ]]; then
+            # shellcheck disable=SC2053
+            [[ "$lower_name" == $lower_arg ]] && matches="$matches$name"$'\t'"$path"$'\n'
+        else
+            [[ "$lower_name" == *"$lower_arg"* ]] && matches="$matches$name"$'\t'"$path"$'\n'
+        fi
+    done <<< "$all"
+
+    if [[ -n "$exact" ]]; then
+        printf '%s' "$exact" | cut -f2
+        return 0
+    fi
+
+    local count
+    count="$(printf '%s' "$matches" | grep -c . || true)"
+    if [[ "$count" -eq 0 ]]; then
+        echo "no lading.yaml matches '$arg'" >&2
+        suggest_near_matches "$arg" "$all" >&2
+        return 2
+    fi
+    if [[ "$count" -gt 1 ]]; then
+        echo "multiple matches for '$arg':" >&2
+        printf '%s' "$matches" | annotate_for_display >&2
+        return 3
+    fi
+    printf '%s' "$matches" | cut -f2
+}
+
+require_regression_dir
+
+if [[ $# -eq 0 || -z "${1-}" ]]; then
+    list_all | annotate_for_display
+else
+    resolve_one "$1"
+fi
diff --git a/.claude/skills/explain-lading-config/scripts/validate-lading-checkout.sh b/.claude/skills/explain-lading-config/scripts/validate-lading-checkout.sh
new file mode 100755
index 000000000000..4eaff9e5317e
--- /dev/null
+++ b/.claude/skills/explain-lading-config/scripts/validate-lading-checkout.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+# Validate that ~/dd/lading is a usable lading checkout.
+#
+# Exits 0 with the checkout's current branch printed on stdout when usable.
+# Exits 1 with a suggested `git clone` command on stderr when the checkout
+# is missing or not a git repo.
+#
+# Callers should print the stdout line to the user so they know which branch
+# explanations will be grounded in, and warn if the branch is not `main`.
+
+set -euo pipefail
+
+LADING_DIR="${LADING_DIR:-$HOME/dd/lading}"
+
+if [[ ! -d "$LADING_DIR" ]]; then
+    cat >&2 <<EOF
+lading checkout not found at $LADING_DIR
+
+Clone it with:
+  git clone git@github.com:DataDog/lading.git "$LADING_DIR"
+EOF
+    exit 1
+fi
+
+if ! git -C "$LADING_DIR" rev-parse --git-dir >/dev/null 2>&1; then
+    echo "lading checkout at $LADING_DIR is not a git repo" >&2
+    exit 1
+fi
+
+branch="$(git -C "$LADING_DIR" branch --show-current)"
+echo "$branch"
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 654cff49dac6..8d0e8d7207be 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -937,3 +937,6 @@
 /pkg/util/scrubber/go.sum                @DataDog/agent-runtimes
 
 /q_branch/                               @DataDog/q-branch
+
+# AI-related files
+/.claude/skills/explain-lading-config @DataDog/single-machine-performance