|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Process Markdown files with embedded Python code blocks, saving |
| 4 | +the output and images. |
| 5 | +""" |
| 6 | + |
| 7 | +import argparse |
| 8 | +from contextlib import redirect_stdout, redirect_stderr |
| 9 | +import io |
| 10 | +from pathlib import Path |
| 11 | +import sys |
| 12 | +import traceback |
| 13 | + |
| 14 | + |
| 15 | +def parse_markdown(content): |
| 16 | + """Parse markdown content and extract Python code blocks.""" |
| 17 | + lines = content.split("\n") |
| 18 | + blocks = [] |
| 19 | + current_block = None |
| 20 | + in_code_block = False |
| 21 | + |
| 22 | + for i, line in enumerate(lines): |
| 23 | + # Start of Python code block |
| 24 | + if line.strip().startswith("```python"): |
| 25 | + in_code_block = True |
| 26 | + current_block = { |
| 27 | + "start_line": i, |
| 28 | + "end_line": None, |
| 29 | + "code": [], |
| 30 | + "type": "python", |
| 31 | + } |
| 32 | + |
| 33 | + # End of code block |
| 34 | + elif line.strip() == "```" and in_code_block: |
| 35 | + in_code_block = False |
| 36 | + current_block["end_line"] = i |
| 37 | + current_block["code"] = "\n".join(current_block["code"]) |
| 38 | + blocks.append(current_block) |
| 39 | + current_block = None |
| 40 | + |
| 41 | + # Line inside code block |
| 42 | + elif in_code_block: |
| 43 | + current_block["code"].append(line) |
| 44 | + |
| 45 | + return blocks |
| 46 | + |
| 47 | + |
| 48 | +def execute_python_code(code, output_dir, output_figure_stem): |
| 49 | + """Execute Python code and capture output and generated files.""" |
| 50 | + # Capture stdout and stderr |
| 51 | + stdout_buffer = io.StringIO() |
| 52 | + stderr_buffer = io.StringIO() |
| 53 | + |
| 54 | + # Track files created during execution |
| 55 | + output_path = Path(output_dir) |
| 56 | + if not output_path.exists(): |
| 57 | + output_path.mkdir(parents=True, exist_ok=True) |
| 58 | + |
| 59 | + files_before = set(f.name for f in output_path.iterdir()) |
| 60 | + result = {"stdout": "", "stderr": "", "error": None, "images": [], "html_files": []} |
| 61 | + figures = [] |
| 62 | + try: |
| 63 | + # Create a custom show function to capture plotly figures |
| 64 | + def capture_plotly_show(fig): |
| 65 | + """Custom show function that saves plotly figures instead of displaying them.""" |
| 66 | + nonlocal figures |
| 67 | + figures.append(fig) |
| 68 | + png_filename = ( |
| 69 | + f"{output_figure_stem}_{len(figures)}.png" |
| 70 | + ) |
| 71 | + png_path = Path(output_dir) / png_filename |
| 72 | + fig.write_image(png_path, width=800, height=600) |
| 73 | + result["images"].append(png_filename) |
| 74 | + print(f"Plotly figure saved as PNG: {png_filename}") |
| 75 | + return |
| 76 | + |
| 77 | + # Create a namespace for code execution |
| 78 | + exec_globals = { |
| 79 | + "__name__": "__main__", |
| 80 | + "__file__": "<markdown_code>", |
| 81 | + } |
| 82 | + |
| 83 | + # Monkey patch plotly show method to capture figures |
| 84 | + original_show = None |
| 85 | + |
| 86 | + # Execute the code with output capture |
| 87 | + with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer): |
| 88 | + # Try to import plotly and patch the show method |
| 89 | + def patched_show(self, *args, **kwargs): |
| 90 | + capture_plotly_show(self) |
| 91 | + import plotly.graph_objects as go |
| 92 | + original_show = go.Figure.show |
| 93 | + go.Figure.show = patched_show |
| 94 | + |
| 95 | + # Execute the code |
| 96 | + exec(code, exec_globals) |
| 97 | + |
| 98 | + # Try to find and handle any plotly figures that were created and not already processed |
| 99 | + for name, obj in exec_globals.items(): |
| 100 | + if ( |
| 101 | + hasattr(obj, "__class__") |
| 102 | + and "plotly" in str(type(obj)).lower() |
| 103 | + and hasattr(obj, "show") |
| 104 | + ): |
| 105 | + # This looks like a plotly figure that wasn't already processed by show() |
| 106 | + if obj not in figures: |
| 107 | + print("NOT ALREADY PROCESSED", obj, file=sys.stderr) |
| 108 | + capture_plotly_show(obj) |
| 109 | + |
| 110 | + # Restore original show method if we patched it |
| 111 | + if original_show: |
| 112 | + import plotly.graph_objects as go |
| 113 | + go.Figure.show = original_show |
| 114 | + |
| 115 | + except Exception as e: |
| 116 | + result["error"] = f"Error executing code: {str(e)}\n{traceback.format_exc()}" |
| 117 | + |
| 118 | + result["stdout"] = stdout_buffer.getvalue() |
| 119 | + result["stderr"] = stderr_buffer.getvalue() |
| 120 | + |
| 121 | + # Check for any additional files created |
| 122 | + output_path = Path(output_dir) |
| 123 | + if output_path.exists(): |
| 124 | + files_after = set(f.name for f in output_path.iterdir()) |
| 125 | + for f in (files_after - files_before): |
| 126 | + if f not in result["images"] and file.lower().endswith(".png"): |
| 127 | + result["images"].append(f) |
| 128 | + |
| 129 | + return result |
| 130 | + |
| 131 | + |
| 132 | +def generate_output_markdown(content, code_blocks, execution_results, output_dir): |
| 133 | + """Generate the output markdown with embedded results.""" |
| 134 | + lines = content.split("\n") |
| 135 | + output_lines = [] |
| 136 | + |
| 137 | + # Sort code blocks by start line in reverse order for safe insertion |
| 138 | + sorted_blocks = sorted( |
| 139 | + enumerate(code_blocks), key=lambda x: x[1]["start_line"], reverse=True |
| 140 | + ) |
| 141 | + |
| 142 | + # Process each code block and insert results |
| 143 | + for block_idx, block in sorted_blocks: |
| 144 | + result = execution_results[block_idx] |
| 145 | + insert_lines = [] |
| 146 | + |
| 147 | + # Add output if there's stdout |
| 148 | + if result["stdout"].strip(): |
| 149 | + insert_lines.append("") |
| 150 | + insert_lines.append("**Output:**") |
| 151 | + insert_lines.append("```") |
| 152 | + insert_lines.extend(result["stdout"].rstrip().split("\n")) |
| 153 | + insert_lines.append("```") |
| 154 | + |
| 155 | + # Add error if there was one |
| 156 | + if result["error"]: |
| 157 | + insert_lines.append("") |
| 158 | + insert_lines.append("**Error:**") |
| 159 | + insert_lines.append("```") |
| 160 | + insert_lines.extend(result["error"].rstrip().split("\n")) |
| 161 | + insert_lines.append("```") |
| 162 | + |
| 163 | + # Add stderr if there's content |
| 164 | + if result["stderr"].strip(): |
| 165 | + insert_lines.append("") |
| 166 | + insert_lines.append("**Warnings/Messages:**") |
| 167 | + insert_lines.append("```") |
| 168 | + insert_lines.extend(result["stderr"].rstrip().split("\n")) |
| 169 | + insert_lines.append("```") |
| 170 | + |
| 171 | + # Add images |
| 172 | + for image in result["images"]: |
| 173 | + insert_lines.append("") |
| 174 | + insert_lines.append(f"") |
| 175 | + |
| 176 | + # Add HTML files (for plotly figures) |
| 177 | + for html_file in result.get("html_files", []): |
| 178 | + insert_lines.append("") |
| 179 | + insert_lines.append(f"[Interactive Plot](./{html_file})") |
| 180 | + |
| 181 | + # Insert the results after the code block |
| 182 | + if insert_lines: |
| 183 | + # Insert after the closing ``` of the code block |
| 184 | + insertion_point = block["end_line"] + 1 |
| 185 | + lines[insertion_point:insertion_point] = insert_lines |
| 186 | + |
| 187 | + return "\n".join(lines) |
| 188 | + |
| 189 | + |
| 190 | +def main(): |
| 191 | + parser = argparse.ArgumentParser( |
| 192 | + description="Process Markdown files with Python code blocks and generate output with results" |
| 193 | + ) |
| 194 | + parser.add_argument("input_file", help="Input Markdown file") |
| 195 | + parser.add_argument( |
| 196 | + "-o", "--output", help="Output Markdown file (default: input_output.md)" |
| 197 | + ) |
| 198 | + args = parser.parse_args() |
| 199 | + |
| 200 | + # Validate input file |
| 201 | + if not Path(args.input_file).exists(): |
| 202 | + print(f"Error: Input file '{args.input_file}' not found", file=sys.stderr) |
| 203 | + sys.exit(1) |
| 204 | + |
| 205 | + # Determine output file path |
| 206 | + if args.output: |
| 207 | + output_file = args.output |
| 208 | + else: |
| 209 | + input_path = Path(args.input_file) |
| 210 | + output_file = str( |
| 211 | + input_path.parent / f"{input_path.stem}_output{input_path.suffix}" |
| 212 | + ) |
| 213 | + |
| 214 | + # Determine output directory for images |
| 215 | + output_dir = str(Path(output_file).parent) |
| 216 | + |
| 217 | + # Read input file |
| 218 | + try: |
| 219 | + with open(args.input_file, "r", encoding="utf-8") as f: |
| 220 | + content = f.read() |
| 221 | + except Exception as e: |
| 222 | + print(f"Error reading input file: {e}", file=sys.stderr) |
| 223 | + sys.exit(1) |
| 224 | + |
| 225 | + print(f"Processing {args.input_file}...") |
| 226 | + output_figure_stem = Path(output_file).stem |
| 227 | + |
| 228 | + # Parse markdown and extract code blocks |
| 229 | + code_blocks = parse_markdown(content) |
| 230 | + print(f"Found {len(code_blocks)} Python code blocks") |
| 231 | + |
| 232 | + # Execute code blocks and collect results |
| 233 | + execution_results = [] |
| 234 | + for i, block in enumerate(code_blocks): |
| 235 | + print(f"Executing code block {i + 1}/{len(code_blocks)}...") |
| 236 | + result = execute_python_code(block["code"], output_dir, output_figure_stem) |
| 237 | + execution_results.append(result) |
| 238 | + |
| 239 | + if result["error"]: |
| 240 | + print(f" Warning: Code block {i + 1} had an error") |
| 241 | + if result["images"]: |
| 242 | + print(f" Generated {len(result['images'])} image(s)") |
| 243 | + |
| 244 | + # Generate output markdown |
| 245 | + output_content = generate_output_markdown( |
| 246 | + content, code_blocks, execution_results, output_dir |
| 247 | + ) |
| 248 | + |
| 249 | + # Write output file |
| 250 | + try: |
| 251 | + with open(output_file, "w", encoding="utf-8") as f: |
| 252 | + f.write(output_content) |
| 253 | + print(f"Output written to {output_file}") |
| 254 | + if any(result["images"] for result in execution_results): |
| 255 | + print(f"Images saved to {output_dir}") |
| 256 | + except Exception as e: |
| 257 | + print(f"Error writing output file: {e}", file=sys.stderr) |
| 258 | + sys.exit(1) |
| 259 | + |
| 260 | + |
| 261 | +if __name__ == "__main__": |
| 262 | + main() |
0 commit comments