diff --git a/bench.sh b/bench.sh index cc2c8915..b80edcf5 100755 --- a/bench.sh +++ b/bench.sh @@ -170,7 +170,6 @@ for i in "${!benchmark_names[@]}"; do iteration_pass_data=() iteration_memory=() binary_size=0 - for ((iter=1; iter<=ITERATIONS; iter++)); do output_binary="$TEMP_DIR/bench_output_$$" @@ -220,6 +219,11 @@ for i in "${!benchmark_names[@]}"; do fi fi + # Capture binary size from first successful iteration + if [[ $binary_size -eq 0 && -f "$output_binary" ]]; then + binary_size=$(stat -f%z "$output_binary" 2>/dev/null || stat -c%s "$output_binary" 2>/dev/null || echo 0) + fi + rm -f "$output_binary" done @@ -274,36 +278,60 @@ for i in "${!benchmark_names[@]}"; do log_info " $name: time=${mean}ms (±${stddev}), mem=${mem_mean_mb}MB, binary=${binary_size_kb}KB (n=$count)" - # Extract and aggregate per-pass timing data + # Extract and aggregate per-pass timing data, source metrics, and memory # Use Python to parse JSON and compute per-pass means - pass_json=$(python3 -c " + extra_json=$(python3 -c " import json import sys pass_data = {} +source_metrics = None +peak_memory_samples = [] + for json_str in sys.argv[1:]: try: data = json.loads(json_str) for p in data.get('passes', []): - name = p['name'] + pname = p['name'] duration = p['duration_ms'] - if name not in pass_data: - pass_data[name] = [] - pass_data[name].append(duration) + if pname not in pass_data: + pass_data[pname] = [] + pass_data[pname].append(duration) + # Get source_metrics from first run (they're constant) + if source_metrics is None and 'source_metrics' in data: + source_metrics = data['source_metrics'] + # Collect peak memory samples + if 'peak_memory_bytes' in data and data['peak_memory_bytes']: + peak_memory_samples.append(data['peak_memory_bytes']) except: pass -# Calculate means -result = {} -for name, durations in pass_data.items(): +# Calculate means for passes +passes = {} +for pname, durations in pass_data.items(): mean = sum(durations) / len(durations) if durations else 0 - result[name] = {'mean_ms': round(mean, 3)} + passes[pname] = {'mean_ms': round(mean, 3)} + +result = {'passes': passes} +if source_metrics: + result['source_metrics'] = source_metrics +if peak_memory_samples: + result['peak_memory_bytes'] = int(sum(peak_memory_samples) / len(peak_memory_samples)) print(json.dumps(result)) -" "${iteration_pass_data[@]}" 2>/dev/null || echo "{}") +" "${iteration_pass_data[@]}" 2>/dev/null || echo "{\"passes\":{}}") + + # Extract components from the JSON + passes_json=$(echo "$extra_json" | python3 -c "import sys, json; d=json.load(sys.stdin); print(json.dumps(d.get('passes', {})))") + source_metrics_json=$(echo "$extra_json" | python3 -c "import sys, json; d=json.load(sys.stdin); sm=d.get('source_metrics'); print(json.dumps(sm) if sm else 'null')") + + # Store result with all data (including memory and binary size from iteration tracking) + result_parts=("\"name\":\"$name\"" "\"iterations\":$count" "\"mean_ms\":$mean" "\"std_ms\":$stddev" "\"passes\":$passes_json") + [[ "$source_metrics_json" != "null" ]] && result_parts+=("\"source_metrics\":$source_metrics_json") + [[ "$mem_mean" -gt 0 ]] && result_parts+=("\"peak_memory_bytes\":$mem_mean") + [[ "$binary_size" -gt 0 ]] && result_parts+=("\"binary_size_bytes\":$binary_size") - # Store result with pass data and new metrics - all_results+=("{\"name\":\"$name\",\"iterations\":$count,\"mean_ms\":$mean,\"std_ms\":$stddev,\"peak_memory_bytes\":$mem_mean,\"memory_std_bytes\":$mem_stddev,\"binary_size_bytes\":$binary_size,\"passes\":$pass_json}") + all_results+=("{$(IFS=,; echo "${result_parts[*]}")}") done # Get metadata diff --git a/crates/rue/BUCK b/crates/rue/BUCK index 6c27809d..13223136 100644 --- a/crates/rue/BUCK +++ b/crates/rue/BUCK @@ -8,6 +8,7 @@ rust_binary( "//crates/rue-compiler:rue-compiler", "//crates/rue-rir:rue-rir", "//crates/rue-target:rue-target", + "//third-party:libc", "//third-party:serde", "//third-party:serde_json", "//third-party:tracing", @@ -26,6 +27,7 @@ rust_test( "//crates/rue-compiler:rue-compiler", "//crates/rue-rir:rue-rir", "//crates/rue-target:rue-target", + "//third-party:libc", "//third-party:serde", "//third-party:serde_json", "//third-party:tracing", diff --git a/crates/rue/src/main.rs b/crates/rue/src/main.rs index 833684ab..9e6bee00 100644 --- a/crates/rue/src/main.rs +++ b/crates/rue/src/main.rs @@ -586,12 +586,21 @@ fn print_timing_output( time_passes: bool, benchmark_json: bool, target: &Target, + source_metrics: Option, ) { if let Some(timing) = timing_data { if benchmark_json { // JSON output goes to stdout for easy capture - // Include metadata for historical analysis - println!("{}", timing.to_json(&target.to_string(), VERSION)); + // Include metadata and source metrics for historical analysis + println!( + "{}", + timing.to_json_with_metrics( + &target.to_string(), + VERSION, + source_metrics, + get_peak_memory_bytes(), + ) + ); } else if time_passes { // Human-readable output goes to stderr eprintln!("{}", timing.report()); @@ -599,6 +608,53 @@ fn print_timing_output( } } +/// Get peak memory usage in bytes (platform-specific). +/// +/// Returns None if memory usage cannot be determined. +fn get_peak_memory_bytes() -> Option { + #[cfg(target_os = "linux")] + { + // On Linux, read from /proc/self/status + if let Ok(status) = fs::read_to_string("/proc/self/status") { + for line in status.lines() { + if line.starts_with("VmHWM:") { + // VmHWM is "high water mark" - peak resident set size + // Format: "VmHWM: 12345 kB" + let parts: Vec<&str> = line.split_whitespace().collect(); + if parts.len() >= 2 { + if let Ok(kb) = parts[1].parse::() { + return Some(kb * 1024); + } + } + } + } + } + None + } + + #[cfg(target_os = "macos")] + { + // On macOS, use rusage + use std::mem::MaybeUninit; + let mut rusage = MaybeUninit::uninit(); + // SAFETY: rusage is properly aligned and getrusage is a standard POSIX call + let result = unsafe { libc::getrusage(libc::RUSAGE_SELF, rusage.as_mut_ptr()) }; + if result == 0 { + // SAFETY: getrusage succeeded, so rusage is initialized + let rusage = unsafe { rusage.assume_init() }; + // ru_maxrss is in bytes on macOS (unlike Linux where it's in KB) + Some(rusage.ru_maxrss as u64) + } else { + None + } + } + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + None + } +} + fn main() { let options = match parse_args() { Some(opts) => opts, @@ -624,6 +680,23 @@ fn main() { let source_info = SourceInfo::new(&source, &options.source_path); let formatter = DiagnosticFormatter::new(&source_info); + // Compute source metrics if benchmark JSON is requested + let source_metrics = if options.benchmark_json { + // We need token count, so do a quick lex + let lexer = Lexer::new(&source); + let token_count = match lexer.tokenize() { + Ok((tokens, _interner)) => tokens.len(), + Err(_) => 0, // If lexing fails, we'll get the error during compilation anyway + }; + Some(timing::SourceMetrics { + bytes: source.len(), + lines: source.lines().count(), + tokens: token_count, + }) + } else { + None + }; + // Handle emit modes if !options.emit_stages.is_empty() { if let Err(()) = handle_emit(&source, &options, &formatter) { @@ -634,6 +707,7 @@ fn main() { options.time_passes, options.benchmark_json, &options.target, + source_metrics, ); return; } @@ -700,6 +774,7 @@ fn main() { options.time_passes, options.benchmark_json, &options.target, + source_metrics, ); } Err(errors) => { diff --git a/crates/rue/src/timing.rs b/crates/rue/src/timing.rs index 7aca5d70..9915db67 100644 --- a/crates/rue/src/timing.rs +++ b/crates/rue/src/timing.rs @@ -83,6 +83,23 @@ pub struct BenchmarkTiming { pub passes: Vec, /// Total compilation time in milliseconds. pub total_ms: f64, + /// Source code metrics (lines, bytes, tokens). + #[serde(skip_serializing_if = "Option::is_none")] + pub source_metrics: Option, + /// Peak memory usage in bytes (if available). + #[serde(skip_serializing_if = "Option::is_none")] + pub peak_memory_bytes: Option, +} + +/// Source code metrics for throughput calculations. +#[derive(Debug, Clone, Serialize)] +pub struct SourceMetrics { + /// Number of bytes in the source file. + pub bytes: usize, + /// Number of lines in the source file. + pub lines: usize, + /// Number of tokens produced by the lexer. + pub tokens: usize, } /// Metadata about a benchmark run for historical analysis. @@ -196,6 +213,23 @@ impl TimingData { /// * `target` - The target platform string (e.g., "x86_64-linux") /// * `version` - The compiler version string pub fn to_benchmark_timing(&self, target: &str, version: &str) -> BenchmarkTiming { + self.to_benchmark_timing_with_metrics(target, version, None, None) + } + + /// Generate structured timing data with optional source metrics and memory usage. + /// + /// # Arguments + /// * `target` - The target platform string (e.g., "x86_64-linux") + /// * `version` - The compiler version string + /// * `source_metrics` - Optional source code metrics (bytes, lines, tokens) + /// * `peak_memory_bytes` - Optional peak memory usage in bytes + pub fn to_benchmark_timing_with_metrics( + &self, + target: &str, + version: &str, + source_metrics: Option, + peak_memory_bytes: Option, + ) -> BenchmarkTiming { let inner = self.inner.lock().unwrap(); let total: Duration = inner.passes.values().sum(); @@ -231,6 +265,8 @@ impl TimingData { metadata, passes, total_ms, + source_metrics, + peak_memory_bytes, } } @@ -262,6 +298,29 @@ impl TimingData { serde_json::to_string(&timing).unwrap_or_else(|_| "{}".to_string()) } + /// Generate JSON output with additional source metrics. + /// + /// # Arguments + /// * `target` - The target platform string + /// * `version` - The compiler version string + /// * `source_metrics` - Source code metrics (bytes, lines, tokens) + /// * `peak_memory_bytes` - Optional peak memory usage + pub fn to_json_with_metrics( + &self, + target: &str, + version: &str, + source_metrics: Option, + peak_memory_bytes: Option, + ) -> String { + let timing = self.to_benchmark_timing_with_metrics( + target, + version, + source_metrics, + peak_memory_bytes, + ); + serde_json::to_string(&timing).unwrap_or_else(|_| "{}".to_string()) + } + /// Generate pretty-printed JSON output for benchmark timing. /// /// Same as `to_json()` but with indentation for human readability. diff --git a/scripts/generate-charts.py b/scripts/generate-charts.py index 10314bbb..4ca03c1b 100755 --- a/scripts/generate-charts.py +++ b/scripts/generate-charts.py @@ -261,6 +261,159 @@ def get_benchmark_names(runs: list[dict]) -> list[str]: return sorted(names) +# Colors for different benchmark programs +BENCHMARK_COLORS = [ + "#4f6ddb", # blue + "#10b981", # emerald + "#f59e0b", # amber + "#ef4444", # red + "#8b5cf6", # violet + "#06b6d4", # cyan + "#ec4899", # pink +] + + +def get_benchmark_time(run: dict, benchmark_name: str) -> float: + """Get timing for a specific benchmark from a run.""" + for bench in run.get("benchmarks", []): + if bench.get("name") == benchmark_name: + if "mean_ms" in bench: + return bench["mean_ms"] + if "total_ms" in bench: + total = bench["total_ms"] + if isinstance(total, dict): + return total.get("mean", 0) + return total + return 0 + + +def generate_multi_timeline_chart(runs: list[dict], benchmark_names: list[str]) -> str: + """Generate time-series SVG chart showing each benchmark program as a separate line.""" + if not runs or not benchmark_names: + return generate_empty_chart(TIMELINE_WIDTH, TIMELINE_HEIGHT + 50, "No benchmark data available yet") + + # Extract data points for each benchmark + commits = [short_commit(run.get("commit", "")) for run in runs[-20:]] + benchmark_data = {} + + for name in benchmark_names: + points = [] + for run in runs[-20:]: + time = get_benchmark_time(run, name) + points.append(time) + benchmark_data[name] = points + + # Check if we have any data + all_times = [t for pts in benchmark_data.values() for t in pts] + if not all_times or all(t == 0 for t in all_times): + return generate_empty_chart(TIMELINE_WIDTH, TIMELINE_HEIGHT + 50, "No timing data in benchmarks") + + # Chart layout (taller to accommodate legend) + height = TIMELINE_HEIGHT + 80 + margin = {"top": 40, "right": 30, "bottom": 60, "left": 70} + chart_width = TIMELINE_WIDTH - margin["left"] - margin["right"] + chart_height = TIMELINE_HEIGHT - margin["top"] - margin["bottom"] + + # Scale calculations + max_time = max(all_times) * 1.1 # 10% padding + if max_time == 0: + max_time = 1 + + def scale_x(i: int) -> float: + if len(commits) == 1: + return margin["left"] + chart_width / 2 + return margin["left"] + (i / (len(commits) - 1)) * chart_width + + def scale_y(v: float) -> float: + return margin["top"] + chart_height - (v / max_time) * chart_height + + # Build SVG + svg_parts = [ + f'', + ''' ''', + f' ', + f' Compilation Time by Program', + ] + + # Y-axis grid lines and labels + num_grid_lines = 5 + for i in range(num_grid_lines + 1): + y = margin["top"] + (i / num_grid_lines) * chart_height + value = max_time * (1 - i / num_grid_lines) + svg_parts.append( + f' ' + ) + svg_parts.append( + f' {value:.1f}ms' + ) + + # Axes + svg_parts.append( + f' ' + ) + svg_parts.append( + f' ' + ) + + # Draw lines and points for each benchmark + for idx, name in enumerate(benchmark_names): + color = BENCHMARK_COLORS[idx % len(BENCHMARK_COLORS)] + points = benchmark_data[name] + + # Draw connecting line + if len(points) > 1: + line_points = [] + for i, time in enumerate(points): + if time > 0: + line_points.append(f"{scale_x(i)},{scale_y(time)}") + if line_points: + path_d = "M " + " L ".join(line_points) + svg_parts.append(f' ') + + # Draw points + for i, time in enumerate(points): + if time > 0: + x = scale_x(i) + y = scale_y(time) + svg_parts.append(f' ') + + # X-axis labels (commits) + for i, commit in enumerate(commits): + x = scale_x(i) + label_y = TIMELINE_HEIGHT - margin["bottom"] + 15 + svg_parts.append( + f' {escape_xml(commit)}' + ) + + # Legend at bottom + legend_y = TIMELINE_HEIGHT + 10 + legend_x_start = margin["left"] + for idx, name in enumerate(benchmark_names): + color = BENCHMARK_COLORS[idx % len(BENCHMARK_COLORS)] + x = legend_x_start + (idx % 3) * 200 + y = legend_y + (idx // 3) * 20 + svg_parts.append(f' ') + svg_parts.append( + f' {escape_xml(name)}' + ) + + svg_parts.append("") + return "\n".join(svg_parts) + + def get_pass_times_for_benchmark(run: dict, benchmark_name: str) -> dict[str, float]: """Extract pass timing for a specific benchmark from a run.""" for bench in run.get("benchmarks", []): @@ -645,13 +798,25 @@ def main(): # Ensure output directory exists output_dir.mkdir(parents=True, exist_ok=True) - # Generate timeline chart + # Get benchmark names first (needed for multi-timeline) + benchmark_names = get_benchmark_names(runs) + print(f"Found {len(benchmark_names)} benchmarks: {', '.join(benchmark_names)}") + + # Generate aggregate timeline chart timeline_svg = generate_timeline_chart(runs) timeline_path = output_dir / "timeline.svg" with open(timeline_path, "w") as f: f.write(timeline_svg) print(f"Generated {timeline_path}") + # Generate per-program timeline chart (multi-line) + if benchmark_names: + multi_timeline_svg = generate_multi_timeline_chart(runs, benchmark_names) + multi_timeline_path = output_dir / "timeline_by_program.svg" + with open(multi_timeline_path, "w") as f: + f.write(multi_timeline_svg) + print(f"Generated {multi_timeline_path}") + # Generate aggregate breakdown chart (for backwards compatibility) breakdown_svg = generate_breakdown_chart(runs) breakdown_path = output_dir / "breakdown.svg" @@ -674,9 +839,6 @@ def main(): print(f"Generated {binary_path}") # Generate per-benchmark breakdown charts - benchmark_names = get_benchmark_names(runs) - print(f"Found {len(benchmark_names)} benchmarks: {', '.join(benchmark_names)}") - for bench_name in benchmark_names: bench_svg = generate_breakdown_chart(runs, bench_name) # Use sanitized filename @@ -689,12 +851,36 @@ def main(): # Generate summary statistics summary = generate_summary_data(runs) - # Write metadata JSON for the website to consume (includes summary) + # Include latest run's metrics for display + latest_benchmarks = [] + if runs: + latest_run = runs[-1] + for bench in latest_run.get("benchmarks", []): + bench_info = { + "name": bench.get("name", ""), + "mean_ms": bench.get("mean_ms", 0), + } + if "source_metrics" in bench: + sm = bench["source_metrics"] + bench_info["source_metrics"] = sm + # Calculate throughput metrics + if bench_info["mean_ms"] > 0: + seconds = bench_info["mean_ms"] / 1000 + bench_info["lines_per_sec"] = int(sm.get("lines", 0) / seconds) + bench_info["tokens_per_sec"] = int(sm.get("tokens", 0) / seconds) + if "peak_memory_bytes" in bench: + bench_info["peak_memory_mb"] = round(bench["peak_memory_bytes"] / (1024 * 1024), 2) + if "binary_size_bytes" in bench: + bench_info["binary_size_kb"] = round(bench["binary_size_bytes"] / 1024, 2) + latest_benchmarks.append(bench_info) + + # Write metadata JSON for the website to consume (includes summary and detailed metrics) metadata = { "benchmarks": benchmark_names, "run_count": len(runs), "latest_commit": short_commit(runs[-1].get("commit", "")) if runs else None, "summary": summary, + "latest_benchmarks": latest_benchmarks, } metadata_path = output_dir / "metadata.json" with open(metadata_path, "w") as f: diff --git a/third-party/BUCK b/third-party/BUCK index 5735cc46..def6591f 100644 --- a/third-party/BUCK +++ b/third-party/BUCK @@ -954,6 +954,12 @@ cargo.rust_library( visibility = [], ) +alias( + name = "libc", + actual = ":libc-0.2.178", + visibility = ["PUBLIC"], +) + cargo.rust_library( name = "libc-0.2.178", srcs = [ diff --git a/third-party/Cargo.lock b/third-party/Cargo.lock index 5ae13221..e2745a6a 100644 --- a/third-party/Cargo.lock +++ b/third-party/Cargo.lock @@ -500,6 +500,7 @@ dependencies = [ "annotate-snippets", "anyhow", "chumsky", + "libc", "libtest2-mimic", "logos", "once_cell", diff --git a/third-party/Cargo.toml b/third-party/Cargo.toml index 4c3ccf73..57fd5f47 100644 --- a/third-party/Cargo.toml +++ b/third-party/Cargo.toml @@ -15,6 +15,7 @@ path = "/dev/null" annotate-snippets = "0.11" anyhow = "1.0" chumsky = { version = "1.0.0-alpha.7", features = ["pratt"] } +libc = "0.2" libtest2-mimic = "0.0.4" logos = "0.14" once_cell = "1.19" diff --git a/website/templates/performance.html b/website/templates/performance.html index 62287f02..b6fb8282 100644 --- a/website/templates/performance.html +++ b/website/templates/performance.html @@ -46,7 +46,7 @@

{{ page.title }}

- +

Compilation Time Trend

@@ -58,6 +58,18 @@

Compilation Time Trend

+ +
+

Compilation Time by Program

+

+ Each benchmark program shown as a separate line to identify which programs regress. +

+
+ {% set timeline_by_program = load_data(path="static/benchmarks/timeline_by_program.svg", format="plain") %} + {{ timeline_by_program | safe }} +
+
+
@@ -103,12 +115,24 @@

Output Binary Size

+ +
+

Detailed Metrics

+

+ Source metrics, throughput, memory usage, and binary size for the latest benchmark run. +

+
+

Loading metrics...

+
+
+