Skip to content

Commit 7960c90

Browse files
committed
docs: auto generate metrics documentation
1 parent 50d20dd commit 7960c90

File tree

19 files changed

+689
-16
lines changed

19 files changed

+689
-16
lines changed

.github/workflows/rust.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,11 @@ jobs:
708708
# If you encounter an error, run './dev/update_function_docs.sh' and commit
709709
./dev/update_function_docs.sh
710710
git diff --exit-code
711+
- name: Check if metrics.md has been modified
712+
run: |
713+
# If you encounter an error, run './dev/update_metrics_docs.sh' and commit
714+
./dev/update_metrics_docs.sh
715+
git diff --exit-code
711716
712717
# Verify MSRV for the crates which are directly used by other projects:
713718
# - datafusion

Cargo.lock

Lines changed: 17 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ datafusion-datasource-parquet = { workspace = true, optional = true }
129129
datafusion-execution = { workspace = true }
130130
datafusion-expr = { workspace = true, default-features = false }
131131
datafusion-expr-common = { workspace = true }
132+
datafusion-doc = { workspace = true }
132133
datafusion-functions = { workspace = true }
133134
datafusion-functions-aggregate = { workspace = true }
134135
datafusion-functions-nested = { workspace = true, default-features = false, optional = true }
@@ -167,7 +168,6 @@ ctor = { workspace = true }
167168
dashmap = "6.1.0"
168169
datafusion-doc = { workspace = true }
169170
datafusion-functions-window-common = { workspace = true }
170-
datafusion-macros = { workspace = true }
171171
datafusion-physical-optimizer = { workspace = true }
172172
doc-comment = { workspace = true }
173173
env_logger = { workspace = true }
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Print metrics documentation collected via `DocumentedMetrics`/`DocumentedExec`.
19+
//! Called from doc generation scripts to refresh `docs/source/user-guide/metrics.md`.
20+
21+
use std::{fs, path::PathBuf};
22+
23+
use datafusion_doc::metric_doc_sections::{
24+
ExecDoc, MetricsDoc, MetricsDocPosition, exec_docs, metrics_docs,
25+
};
26+
use datafusion_execution as _; // Link metrics defined in execution crate.
27+
use datafusion_physical_plan as _; // Link metrics and execs defined in physical plan.
28+
29+
const LICENSE_HEADER: &str = "<!---
30+
Licensed to the Apache Software Foundation (ASF) under one
31+
or more contributor license agreements. See the NOTICE file
32+
distributed with this work for additional information
33+
regarding copyright ownership. The ASF licenses this file
34+
to you under the Apache License, Version 2.0 (the
35+
\"License\"); you may not use this file except in compliance
36+
with the License. You may obtain a copy of the License at
37+
38+
http://www.apache.org/licenses/LICENSE-2.0
39+
40+
Unless required by applicable law or agreed to in writing,
41+
software distributed under the License is distributed on an
42+
\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
43+
KIND, either express or implied. See the License for the
44+
specific language governing permissions and limitations
45+
under the License.
46+
-->";
47+
48+
const INTRO: &str = "DataFusion operators expose runtime metrics so you can understand where time is spent and how much data flows through the pipeline. See more in [EXPLAIN ANALYZE](sql/explain.md#explain-analyze).";
49+
50+
fn main() -> std::io::Result<()> {
51+
let mut content = String::new();
52+
content.push_str(LICENSE_HEADER);
53+
content.push_str("\n\n# Metrics\n\n");
54+
content.push_str(INTRO);
55+
content.push_str("\n\n");
56+
57+
let mut metrics: Vec<&MetricsDoc> = metrics_docs().collect();
58+
metrics.sort_by(|a, b| a.name.cmp(b.name));
59+
60+
let mut execs: Vec<&ExecDoc> = exec_docs().collect();
61+
execs.sort_by(|a, b| a.name.cmp(b.name));
62+
63+
let common: Vec<&MetricsDoc> = metrics
64+
.iter()
65+
.copied()
66+
.filter(|m| m.position == MetricsDocPosition::Common)
67+
.collect();
68+
69+
if !common.is_empty() {
70+
content.push_str("## Common Metrics\n\n");
71+
for metric in common {
72+
render_metrics_doc(&mut content, metric, 3);
73+
}
74+
}
75+
76+
if !execs.is_empty() {
77+
content.push_str("## Operator-specific Metrics\n\n");
78+
for exec in execs {
79+
render_exec_doc(&mut content, exec);
80+
}
81+
}
82+
83+
let path = output_path();
84+
if let Some(parent) = path.parent() {
85+
fs::create_dir_all(parent)?;
86+
}
87+
fs::write(path, content)
88+
}
89+
90+
fn render_exec_doc(out: &mut String, exec: &ExecDoc) {
91+
out.push_str(&heading(3, exec.name));
92+
out.push_str("\n\n");
93+
94+
if let Some(doc) = summarize(exec.doc) {
95+
if !doc.is_empty() {
96+
out.push_str(&sanitize(&doc));
97+
out.push_str("\n\n");
98+
}
99+
}
100+
101+
let mut metrics: Vec<&MetricsDoc> = exec
102+
.metrics
103+
.iter()
104+
.copied()
105+
.filter(|metric| metric.position != MetricsDocPosition::Common)
106+
.collect();
107+
metrics.sort_by(|a, b| a.name.cmp(b.name));
108+
109+
if metrics.is_empty() {
110+
out.push_str("_No operator-specific metrics documented._\n\n");
111+
} else {
112+
for metric in metrics {
113+
render_metrics_doc(out, metric, 4);
114+
}
115+
}
116+
}
117+
118+
fn render_metrics_doc(out: &mut String, metric: &MetricsDoc, heading_level: usize) {
119+
out.push_str(&heading(heading_level, metric.name));
120+
out.push_str("\n\n");
121+
122+
if let Some(doc) = summarize(metric.doc) {
123+
if !doc.is_empty() {
124+
out.push_str(&sanitize(&doc));
125+
out.push_str("\n\n");
126+
}
127+
}
128+
129+
if metric.fields.is_empty() {
130+
out.push_str("_No metrics documented._\n\n");
131+
return;
132+
}
133+
134+
out.push_str("| Metric | Description |\n");
135+
out.push_str("| --- | --- |\n");
136+
for field in metric.fields {
137+
out.push_str(&format!("| {} | {} |\n", field.name, sanitize(field.doc)));
138+
}
139+
out.push('\n');
140+
}
141+
142+
fn heading(level: usize, title: &str) -> String {
143+
format!("{} {}", "#".repeat(level), title)
144+
}
145+
146+
fn summarize(doc: &str) -> Option<String> {
147+
let trimmed = doc.trim();
148+
if trimmed.is_empty() {
149+
return None;
150+
}
151+
152+
let summary = trimmed
153+
.split("\n\n")
154+
.next()
155+
.map(str::trim)
156+
.unwrap_or_default();
157+
158+
Some(summary.to_string())
159+
}
160+
161+
fn sanitize(doc: &str) -> String {
162+
doc.split_whitespace().collect::<Vec<_>>().join(" ")
163+
}
164+
165+
fn output_path() -> PathBuf {
166+
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
167+
manifest_dir
168+
.parent()
169+
.and_then(|p| p.parent())
170+
.unwrap_or(&manifest_dir)
171+
.join("docs/source/user-guide/metrics.md")
172+
}

datafusion/doc/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ license = { workspace = true }
2828
authors = { workspace = true }
2929
rust-version = { workspace = true }
3030

31+
[dependencies]
32+
inventory = "0.3.15"
33+
3134
[package.metadata.docs.rs]
3235
all-features = true
3336

datafusion/doc/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@
2323
)]
2424
#![cfg_attr(docsrs, feature(doc_cfg))]
2525

26+
mod metrics;
2627
mod udaf;
2728
mod udf;
2829
mod udwf;
2930

31+
pub use metrics::metric_doc_sections;
3032
pub use udaf::aggregate_doc_sections;
3133
pub use udf::scalar_doc_sections;
3234
pub use udwf::window_doc_sections;

0 commit comments

Comments
 (0)