Skip to content

Commit 9525f6a

Browse files
committed
feat: converting doc/python/*.md using bin/run_markdown.py
- Add `bin/run_markdown.py` (with help from Claude). - Runs Python chunks embedded in Markdown, writing result as Markdown. - Has option to embed interactive figures as well as generate PNG. - Modify `Makefile` to run the script on selected files for testing purposes. - Commented-out target runs on all. To do: - [ ] Figure out why `bin/run_markdown.py` fails with "too many open files" for large numbers of input files. - [ ] Modify `Makefile` to allow select re-running as well as batch runs. - [ ] Modify `bin/run_markdown.py` to use a single Kaleido sub-process to speed up image generation.
1 parent 96af9f6 commit 9525f6a

File tree

23 files changed

+392
-107
lines changed

23 files changed

+392
-107
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ doc/python/raw.githubusercontent.com/
1515

1616
docs/
1717
docs_tmp/
18+
pages/examples/
1819

1920
# Don't ignore dataset files
2021
!*.csv.gz

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
RUN = uv run
44
PACKAGE_DIRS = _plotly_utils plotly
55
CODE_DIRS = ${PACKAGE_DIRS} scripts
6+
EXAMPLE_SRC = doc/python/cone-plot.md doc/python/strip-charts.md
7+
# EXAMPLE_SRC = $(wildcard doc/python/*.md)
68

79
## commands: show available commands
810
commands:
@@ -21,6 +23,10 @@ docs-lint:
2123
docs-tmp:
2224
MKDOCS_TEMP_DIR=./docs_tmp ${RUN} mkdocs build
2325

26+
## examples: generate Markdown from doc/python
27+
examples:
28+
${RUN} bin/run_markdown.py --outdir pages/examples --inline --verbose ${EXAMPLE_SRC}
29+
2430
## format: reformat code
2531
format:
2632
${RUN} ruff format ${CODE_DIRS}
@@ -52,6 +58,8 @@ clean:
5258
@rm -rf .pytest_cache
5359
@rm -rf .ruff_cache
5460
@rm -rf dist
61+
@rm -rf docs
62+
@rm -rf pages/examples
5563

5664
## sync: update Python packages
5765
sync:

bin/codegen/datatypes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,8 @@ def _subplot_re_match(self, prop):
218218
else:
219219
property_docstring = property_description
220220

221-
# Fix `][`.
222-
property_docstring = property_docstring.replace("][", "]\\[")
221+
# FIXME: replace '][' with ']\[' to avoid confusion with Markdown reference links
222+
# property_docstring = property_docstring.replace("][", "]\\[")
223223

224224
# Write get property
225225
buffer.write(

bin/codegen/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,8 @@ def format_description(desc):
163163
# replace {2D arrays} with 2D lists
164164
desc = desc.replace("{2D arrays}", "2D lists")
165165

166-
# replace '][' with ']\[' to avoid confusion with Markdown reference links
167-
desc = desc.replace("][", r"]\\[")
166+
# FIXME: replace '][' with ']\[' to avoid confusion with Markdown reference links
167+
# desc = desc.replace("][", r"]\\[")
168168

169169
return desc
170170

bin/run_markdown.py

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Process Markdown files with embedded Python code blocks, saving
4+
the output and images.
5+
"""
6+
7+
import argparse
8+
from contextlib import redirect_stdout, redirect_stderr
9+
import io
10+
from pathlib import Path
11+
import plotly.graph_objects as go
12+
import sys
13+
import traceback
14+
15+
16+
def main():
17+
args = _parse_args()
18+
for filename in args.input:
19+
_do_file(args, Path(filename))
20+
21+
22+
def _do_file(args, input_file):
23+
"""Process a single file."""
24+
25+
# Validate input file
26+
if not input_file.exists():
27+
print(f"Error: '{input_file}' not found", file=sys.stderr)
28+
sys.exit(1)
29+
30+
# Determine output file path etc.
31+
stem = input_file.stem
32+
output_file = args.outdir / f"{input_file.stem}{input_file.suffix}"
33+
if input_file.resolve() == output_file.resolve():
34+
print(f"Error: output would overwrite input '{input_file}'", file=sys.stderr)
35+
sys.exit(1)
36+
37+
# Read input
38+
try:
39+
with open(input_file, "r", encoding="utf-8") as f:
40+
content = f.read()
41+
except Exception as e:
42+
print(f"Error reading input file: {e}", file=sys.stderr)
43+
sys.exit(1)
44+
45+
# Parse markdown and extract code blocks
46+
_report(args.verbose, f"Processing {input_file}...")
47+
code_blocks = _parse_md(content)
48+
_report(args.verbose, f"- Found {len(code_blocks)} code blocks")
49+
50+
# Execute code blocks and collect results
51+
execution_results = []
52+
figure_counter = 0
53+
for i, block in enumerate(code_blocks):
54+
_report(args.verbose, f"- Executing block {i + 1}/{len(code_blocks)}")
55+
figure_counter, result = _run_code(block["code"], args.outdir, stem, figure_counter)
56+
execution_results.append(result)
57+
_report(result["error"], f" - Warning: block {i + 1} had an error")
58+
_report(result["images"], f" - Generated {len(result['images'])} image(s)")
59+
60+
# Generate and save output
61+
content = _generate_markdown(args, content, code_blocks, execution_results, args.outdir)
62+
try:
63+
with open(output_file, "w", encoding="utf-8") as f:
64+
f.write(content)
65+
_report(args.verbose, f"- Output written to {output_file}")
66+
_report(any(result["images"] for result in execution_results), f"- Images saved to {args.outdir}")
67+
except Exception as e:
68+
print(f"Error writing output file: {e}", file=sys.stderr)
69+
sys.exit(1)
70+
71+
72+
def _capture_plotly_show(fig, counter, result, output_dir, stem):
73+
"""Saves figures instead of displaying them."""
74+
# Save PNG
75+
png_filename = f"{stem}_{counter}.png"
76+
png_path = output_dir / png_filename
77+
fig.write_image(png_path, width=800, height=600)
78+
result["images"].append(png_filename)
79+
80+
# Save HTML and get the content for embedding
81+
html_filename = f"{stem}_{counter}.html"
82+
html_path = output_dir / html_filename
83+
fig.write_html(html_path, include_plotlyjs="cdn")
84+
html_content = fig.to_html(include_plotlyjs="cdn", div_id=f"plotly-div-{counter}", full_html=False)
85+
result["html_files"].append(html_filename)
86+
result.setdefault("html_content", []).append(html_content)
87+
88+
89+
def _generate_markdown(args, content, code_blocks, execution_results, output_dir):
90+
"""Generate the output markdown with embedded results."""
91+
lines = content.split("\n")
92+
93+
# Sort code blocks by start line in reverse order for safe insertion
94+
sorted_blocks = sorted(
95+
enumerate(code_blocks), key=lambda x: x[1]["start_line"], reverse=True
96+
)
97+
98+
# Process each code block and insert results
99+
for block_idx, block in sorted_blocks:
100+
result = execution_results[block_idx]
101+
insert_lines = []
102+
103+
# Add output if there's stdout
104+
if result["stdout"].strip():
105+
insert_lines.append("")
106+
insert_lines.append("**Output:**")
107+
insert_lines.append("```")
108+
insert_lines.extend(result["stdout"].rstrip().split("\n"))
109+
insert_lines.append("```")
110+
111+
# Add error if there was one
112+
if result["error"]:
113+
insert_lines.append("")
114+
insert_lines.append("**Error:**")
115+
insert_lines.append("```")
116+
insert_lines.extend(result["error"].rstrip().split("\n"))
117+
insert_lines.append("```")
118+
119+
# Add stderr if there's content
120+
if result["stderr"].strip():
121+
insert_lines.append("")
122+
insert_lines.append("**Warnings/Messages:**")
123+
insert_lines.append("```")
124+
insert_lines.extend(result["stderr"].rstrip().split("\n"))
125+
insert_lines.append("```")
126+
127+
# Add images
128+
for image in result["images"]:
129+
insert_lines.append("")
130+
insert_lines.append(f"![Generated Plot](./{image})")
131+
132+
# Embed HTML content for plotly figures
133+
if args.inline:
134+
for html_content in result.get("html_content", []):
135+
insert_lines.append("")
136+
insert_lines.append("**Interactive Plot:**")
137+
insert_lines.append("")
138+
insert_lines.extend(html_content.split("\n"))
139+
140+
# Insert the results after the code block
141+
if insert_lines:
142+
# Insert after the closing ``` of the code block
143+
insertion_point = block["end_line"] + 1
144+
lines[insertion_point:insertion_point] = insert_lines
145+
146+
return "\n".join(lines)
147+
148+
149+
def _parse_args():
150+
"""Parse command-line arguments."""
151+
parser = argparse.ArgumentParser(description="Process Markdown files with code blocks")
152+
parser.add_argument("input", nargs="+", help="Input .md file")
153+
parser.add_argument("--inline", action="store_true", help="Inline HTML in .md")
154+
parser.add_argument("--outdir", type=Path, help="Output directory")
155+
parser.add_argument("--verbose", action="store_true", help="Report progress")
156+
return parser.parse_args()
157+
158+
159+
def _parse_md(content):
160+
"""Parse Markdown and extract Python code blocks."""
161+
lines = content.split("\n")
162+
blocks = []
163+
current_block = None
164+
in_code_block = False
165+
166+
for i, line in enumerate(lines):
167+
# Start of Python code block
168+
if line.strip().startswith("```python"):
169+
in_code_block = True
170+
current_block = {
171+
"start_line": i,
172+
"end_line": None,
173+
"code": [],
174+
"type": "python",
175+
}
176+
177+
# End of code block
178+
elif line.strip() == "```" and in_code_block:
179+
in_code_block = False
180+
current_block["end_line"] = i
181+
current_block["code"] = "\n".join(current_block["code"])
182+
blocks.append(current_block)
183+
current_block = None
184+
185+
# Line inside code block
186+
elif in_code_block:
187+
current_block["code"].append(line)
188+
189+
return blocks
190+
191+
192+
def _report(condition, message):
193+
"""Report if condition is true."""
194+
if condition:
195+
print(message, file=sys.stderr)
196+
197+
198+
def _run_code(code, output_dir, stem, figure_counter):
199+
"""Execute code capturing output and generated files."""
200+
# Capture stdout and stderr
201+
stdout_buffer = io.StringIO()
202+
stderr_buffer = io.StringIO()
203+
204+
# Track files created during execution
205+
if not output_dir.exists():
206+
output_dir.mkdir(parents=True, exist_ok=True)
207+
208+
files_before = set(f.name for f in output_dir.iterdir())
209+
result = {"stdout": "", "stderr": "", "error": None, "images": [], "html_files": []}
210+
try:
211+
212+
# Create a namespace for code execution
213+
exec_globals = {
214+
"__name__": "__main__",
215+
"__file__": "<markdown_code>",
216+
}
217+
218+
# Execute the code with output capture
219+
with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
220+
# Try to import plotly and patch the show method
221+
def patched_show(self, *args, **kwargs):
222+
nonlocal figure_counter
223+
figure_counter += 1
224+
_capture_plotly_show(self, figure_counter, result, output_dir, stem)
225+
original_show = go.Figure.show
226+
go.Figure.show = patched_show
227+
exec(code, exec_globals)
228+
go.Figure.show = original_show
229+
230+
except Exception as e:
231+
result["error"] = f"Error executing code: {str(e)}\n{traceback.format_exc()}"
232+
233+
result["stdout"] = stdout_buffer.getvalue()
234+
result["stderr"] = stderr_buffer.getvalue()
235+
236+
# Check for any additional files created
237+
files_after = set(f.name for f in output_dir.iterdir())
238+
for f in (files_after - files_before):
239+
if f not in result["images"] and f.lower().endswith(".png"):
240+
result["images"].append(f)
241+
242+
return figure_counter, result
243+
244+
245+
if __name__ == "__main__":
246+
main()

notes.txt

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
- Can we get rid of `plotly/api` entirely?
2+
- Can we eliminate `plotly/conftest.py` and fix breaking tests?
3+
- Why the distinction between `graph_objects` and `graph_objs`?
4+
- Historical reasons, but `graph_objs` is widely used.
5+
- Generate code into `graph_objects` and have `graph_objs` point at it
6+
instead of vice versa.
7+
- Switch focus for now to the main documentation in `./doc`.
8+
9+
- Ran this to create a `.ipynb` file:
10+
11+
```
12+
jupytext --to ipynb --execute --output pages/strip-charts.ipynb doc/python/strip-charts.md
13+
```
14+
15+
- Loading the notebook like this, the charts don't show up:
16+
17+
```
18+
jupyter notebook pages/strip-charts.ipynb
19+
```
20+
21+
- Had to add this in a cell at the top:
22+
23+
```
24+
import plotly.io as pio
25+
pio.renderers.default = "notebook"
26+
```
27+
28+
- `mkdocs build` produces many (many) lines like this that did *not* appear
29+
before `mkdocs-jupyter` was added to `mkdocs.yml`:
30+
31+
```
32+
[WARNING] Div at /var/folders/w2/l51fjbjd25n9zbwkz9fw9jp00000gn/T/tmpgvlxh1sq line 3 column 1 unclosed at /var/folders/w2/l51fjbjd25n9zbwkz9fw9jp00000gn/T/tmpgvlxh1sq line 6 column 1, closing implicitly.
33+
```
34+
35+
- But with the `plotly.io` line, the `.ipynb` file is converted to usable HTML.
36+
- Still clearly originated as a notebook, but the chart shows up.

plotly/graph_objs/_figure.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9977,7 +9977,7 @@ def add_image(
99779977
source
99789978
Specifies the data URI of the image to be visualized.
99799979
The URI consists of "data:image/[<media
9980-
subtype>]\\[;base64],<data>"
9980+
subtype>][;base64],<data>"
99819981
stream
99829982
:class:`plotly.graph_objects.image.Stream` instance or
99839983
dict with compatible properties

plotly/graph_objs/_figurewidget.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9979,7 +9979,7 @@ def add_image(
99799979
source
99809980
Specifies the data URI of the image to be visualized.
99819981
The URI consists of "data:image/[<media
9982-
subtype>]\\[;base64],<data>"
9982+
subtype>][;base64],<data>"
99839983
stream
99849984
:class:`plotly.graph_objects.image.Stream` instance or
99859985
dict with compatible properties

plotly/graph_objs/_image.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ def opacity(self, val):
521521
def source(self):
522522
"""
523523
Specifies the data URI of the image to be visualized. The URI
524-
consists of "data:image/[<media subtype>]\\[;base64],<data>"
524+
consists of "data:image/[<media subtype>][;base64],<data>"
525525
526526
The 'source' property is a string and must be specified as:
527527
- A string
@@ -1021,7 +1021,7 @@ def _prop_descriptions(self):
10211021
source
10221022
Specifies the data URI of the image to be visualized.
10231023
The URI consists of "data:image/[<media
1024-
subtype>]\\[;base64],<data>"
1024+
subtype>][;base64],<data>"
10251025
stream
10261026
:class:`plotly.graph_objects.image.Stream` instance or
10271027
dict with compatible properties
@@ -1294,7 +1294,7 @@ def __init__(
12941294
source
12951295
Specifies the data URI of the image to be visualized.
12961296
The URI consists of "data:image/[<media
1297-
subtype>]\\[;base64],<data>"
1297+
subtype>][;base64],<data>"
12981298
stream
12991299
:class:`plotly.graph_objects.image.Stream` instance or
13001300
dict with compatible properties

0 commit comments

Comments
 (0)