|
| 1 | +import yaml |
| 2 | +import pandas as pd |
| 3 | +import matplotlib.pyplot as plt |
| 4 | +import seaborn as sns |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +def process_benchmark_yaml(yaml_file): |
| 8 | + """Process the benchmark YAML file and return a DataFrame with the data.""" |
| 9 | + with open(yaml_file, 'r') as f: |
| 10 | + data = yaml.safe_load(f) |
| 11 | + |
| 12 | + # Extract concurrency levels from the benchmark configuration |
| 13 | + concurrency_levels = data['benchmarks'][0]['args']['profile']['measured_concurrencies'] |
| 14 | + |
| 15 | + # Process metrics for each concurrency level |
| 16 | + processed_data = [] |
| 17 | + for i, benchmark in enumerate(data['benchmarks']): |
| 18 | + if 'metrics' in benchmark: |
| 19 | + metrics = benchmark['metrics'] |
| 20 | + concurrency = concurrency_levels[i] if i < len(concurrency_levels) else 1.0 |
| 21 | + |
| 22 | + # Extract successful metrics |
| 23 | + for metric_name, metric_data in metrics.items(): |
| 24 | + if 'successful' in metric_data: |
| 25 | + successful = metric_data['successful'] |
| 26 | + processed_data.append({ |
| 27 | + 'concurrency': concurrency, |
| 28 | + 'metric': metric_name, |
| 29 | + 'count': successful.get('count', 0), |
| 30 | + 'mean': successful.get('mean', 0), |
| 31 | + 'median': successful.get('median', 0), |
| 32 | + 'min': successful.get('min', 0), |
| 33 | + 'max': successful.get('max', 0), |
| 34 | + 'std_dev': successful.get('std_dev', 0), |
| 35 | + 'p95': successful.get('percentiles', {}).get('p95', 0), |
| 36 | + 'p99': successful.get('percentiles', {}).get('p99', 0) |
| 37 | + }) |
| 38 | + |
| 39 | + # Convert to DataFrame |
| 40 | + df = pd.DataFrame(processed_data) |
| 41 | + return df |
| 42 | + |
| 43 | +def create_visualizations(df): |
| 44 | + """Create visualizations for the benchmark data.""" |
| 45 | + # Create plots directory if it doesn't exist |
| 46 | + plot_dir = Path('benchmark_plots') |
| 47 | + plot_dir.mkdir(exist_ok=True) |
| 48 | + |
| 49 | + # Set style |
| 50 | + plt.style.use('default') |
| 51 | + |
| 52 | + # Sort by concurrency for better visualization |
| 53 | + df = df.sort_values('concurrency') |
| 54 | + |
| 55 | + # Create visualizations for each metric |
| 56 | + metrics_to_plot = [ |
| 57 | + 'request_latency', |
| 58 | + 'time_to_first_token_ms', |
| 59 | + 'tokens_per_second', |
| 60 | + 'inter_token_latency_ms' |
| 61 | + ] |
| 62 | + |
| 63 | + for metric in metrics_to_plot: |
| 64 | + metric_df = df[df['metric'] == metric] |
| 65 | + if not metric_df.empty: |
| 66 | + # Mean vs Median |
| 67 | + plt.figure(figsize=(12, 6)) |
| 68 | + plt.plot(metric_df['concurrency'], metric_df['mean'], 'b-', label='Mean') |
| 69 | + plt.plot(metric_df['concurrency'], metric_df['median'], 'r--', label='Median') |
| 70 | + plt.title(f'{metric.replace("_", " ").title()} vs Concurrency') |
| 71 | + plt.xlabel('Concurrency Level') |
| 72 | + plt.ylabel('Value') |
| 73 | + plt.legend() |
| 74 | + plt.grid(True) |
| 75 | + plt.tight_layout() |
| 76 | + plt.savefig(plot_dir / f'{metric}_mean_median.png') |
| 77 | + plt.close() |
| 78 | + |
| 79 | + # Min-Max Range |
| 80 | + plt.figure(figsize=(12, 6)) |
| 81 | + plt.fill_between(metric_df['concurrency'], |
| 82 | + metric_df['min'], |
| 83 | + metric_df['max'], |
| 84 | + alpha=0.3, |
| 85 | + label='Min-Max Range') |
| 86 | + plt.plot(metric_df['concurrency'], metric_df['mean'], 'b-', label='Mean') |
| 87 | + plt.title(f'{metric.replace("_", " ").title()} Range vs Concurrency') |
| 88 | + plt.xlabel('Concurrency Level') |
| 89 | + plt.ylabel('Value') |
| 90 | + plt.legend() |
| 91 | + plt.grid(True) |
| 92 | + plt.tight_layout() |
| 93 | + plt.savefig(plot_dir / f'{metric}_range.png') |
| 94 | + plt.close() |
| 95 | + |
| 96 | + # Percentiles |
| 97 | + plt.figure(figsize=(12, 6)) |
| 98 | + plt.plot(metric_df['concurrency'], metric_df['p95'], 'g--', label='95th Percentile') |
| 99 | + plt.plot(metric_df['concurrency'], metric_df['p99'], 'r--', label='99th Percentile') |
| 100 | + plt.plot(metric_df['concurrency'], metric_df['mean'], 'b-', label='Mean') |
| 101 | + plt.title(f'{metric.replace("_", " ").title()} Percentiles vs Concurrency') |
| 102 | + plt.xlabel('Concurrency Level') |
| 103 | + plt.ylabel('Value') |
| 104 | + plt.legend() |
| 105 | + plt.grid(True) |
| 106 | + plt.tight_layout() |
| 107 | + plt.savefig(plot_dir / f'{metric}_percentiles.png') |
| 108 | + plt.close() |
| 109 | + |
| 110 | +def main(): |
| 111 | + # Process the YAML file |
| 112 | + df = process_benchmark_yaml('llama32-3b.yaml') |
| 113 | + |
| 114 | + # Create visualizations |
| 115 | + create_visualizations(df) |
| 116 | + |
| 117 | + # Print summary statistics by concurrency level |
| 118 | + print("\nSummary Statistics by Concurrency Level:") |
| 119 | + for concurrency in sorted(df['concurrency'].unique()): |
| 120 | + print(f"\nConcurrency Level: {concurrency:.2f}") |
| 121 | + subset = df[df['concurrency'] == concurrency] |
| 122 | + |
| 123 | + for metric in subset['metric'].unique(): |
| 124 | + metric_data = subset[subset['metric'] == metric] |
| 125 | + print(f"\n{metric.replace('_', ' ').title()}:") |
| 126 | + print(f"Count: {metric_data['count'].iloc[0]}") |
| 127 | + print(f"Mean: {metric_data['mean'].iloc[0]:.2f}") |
| 128 | + print(f"Median: {metric_data['median'].iloc[0]:.2f}") |
| 129 | + print(f"Min: {metric_data['min'].iloc[0]:.2f}") |
| 130 | + print(f"Max: {metric_data['max'].iloc[0]:.2f}") |
| 131 | + print(f"Std Dev: {metric_data['std_dev'].iloc[0]:.2f}") |
| 132 | + print(f"95th Percentile: {metric_data['p95'].iloc[0]:.2f}") |
| 133 | + print(f"99th Percentile: {metric_data['p99'].iloc[0]:.2f}") |
| 134 | + |
| 135 | + # Save processed data |
| 136 | + df.to_csv('benchmark_processed_data.csv', index=False) |
| 137 | + print("\nProcessed data saved to benchmark_processed_data.csv") |
| 138 | + |
| 139 | +if __name__ == "__main__": |
| 140 | + main() |
0 commit comments