From 34ee3091aa8e86a7e6e7e1f679bc1b3bd501b238 Mon Sep 17 00:00:00 2001 From: "codegen-sh[bot]" <131295404+codegen-sh[bot]@users.noreply.github.com> Date: Wed, 2 Apr 2025 18:07:45 +0000 Subject: [PATCH 1/2] Add memory profiling and optimization to CLI --- src/codegen/cli/cli.py | 2 + src/codegen/cli/commands/memprof/__init__.py | 0 src/codegen/cli/commands/memprof/main.py | 72 ++++ src/codegen/cli/commands/run/run_local.py | 37 +- src/codegen/cli/utils/memory_profiler.py | 371 +++++++++++++++++++ 5 files changed, 479 insertions(+), 3 deletions(-) create mode 100644 src/codegen/cli/commands/memprof/__init__.py create mode 100644 src/codegen/cli/commands/memprof/main.py create mode 100644 src/codegen/cli/utils/memory_profiler.py diff --git a/src/codegen/cli/cli.py b/src/codegen/cli/cli.py index 8910f615a..7a0eb79d6 100644 --- a/src/codegen/cli/cli.py +++ b/src/codegen/cli/cli.py @@ -11,6 +11,7 @@ from codegen.cli.commands.login.main import login_command from codegen.cli.commands.logout.main import logout_command from codegen.cli.commands.lsp.lsp import lsp_command +from codegen.cli.commands.memprof.main import memprof_command from codegen.cli.commands.notebook.main import notebook_command from codegen.cli.commands.profile.main import profile_command from codegen.cli.commands.reset.main import reset_command @@ -51,6 +52,7 @@ def main(): main.add_command(lsp_command) main.add_command(serve_command) main.add_command(start_command) +main.add_command(memprof_command) # Add the memory profiling command if __name__ == "__main__": diff --git a/src/codegen/cli/commands/memprof/__init__.py b/src/codegen/cli/commands/memprof/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/codegen/cli/commands/memprof/main.py b/src/codegen/cli/commands/memprof/main.py new file mode 100644 index 000000000..f13bf08bd --- /dev/null +++ b/src/codegen/cli/commands/memprof/main.py @@ -0,0 +1,72 @@ +import os +import shutil +from pathlib import Path +from typing import List, Optional + +import rich +import rich_click as click +from rich import box +from rich.panel import Panel + +from codegen.cli.utils.memory_profiler import profile_command + + +@click.command(name="memprof") +@click.argument("command", nargs=-1, required=True) +@click.option( + "--output-dir", + "-o", + type=click.Path(file_okay=False), + help="Directory to save memory profile reports", +) +def memprof_command(command: List[str], output_dir: Optional[str] = None): + """ + Profile memory usage of a Codegen CLI command. + + Example: + codegen memprof run my-codemod --arguments '{"param": "value"}' + """ + if not command: + rich.print("[bold red]Error:[/bold red] No command specified") + return + + # Convert command tuple to list + cmd_args = list(command) + + # Set default output directory if not provided + if not output_dir: + home_dir = os.path.expanduser("~") + output_dir = os.path.join(home_dir, ".codegen", "memory_profiles") + + # Run the profiling + rich.print( + Panel( + f"[cyan]Profiling command:[/cyan] codegen {' '.join(cmd_args)}", + title="🔍 [bold]Memory Profiler[/bold]", + border_style="cyan", + box=box.ROUNDED, + padding=(1, 2), + ) + ) + + try: + report_dir = profile_command(cmd_args, output_dir=output_dir) + rich.print( + Panel( + f"[green]Memory profile saved to:[/green] {report_dir}", + title="✅ [bold]Profiling Complete[/bold]", + border_style="green", + box=box.ROUNDED, + padding=(1, 2), + ) + ) + except Exception as e: + rich.print( + Panel( + f"[red]Error during profiling:[/red] {str(e)}", + title="❌ [bold]Profiling Failed[/bold]", + border_style="red", + box=box.ROUNDED, + padding=(1, 2), + ) + ) diff --git a/src/codegen/cli/commands/run/run_local.py b/src/codegen/cli/commands/run/run_local.py index 4ca737dd1..cf7b7d80d 100644 --- a/src/codegen/cli/commands/run/run_local.py +++ b/src/codegen/cli/commands/run/run_local.py @@ -1,4 +1,9 @@ from pathlib import Path +import gc +import os +import psutil +import time +from typing import Optional import rich from rich.panel import Panel @@ -27,6 +32,9 @@ def parse_codebase( Returns: Parsed Codebase object """ + # Force garbage collection before parsing to free up memory + gc.collect() + codebase = Codebase( projects=[ ProjectConfig( @@ -51,21 +59,36 @@ def run_local( function: The function to run diff_preview: Number of lines of diff to preview (None for all) """ + # Get initial memory usage + process = psutil.Process(os.getpid()) + initial_memory = process.memory_info().rss / (1024 * 1024) # Convert to MB + # Parse codebase and run with Status(f"[bold]Parsing codebase at {session.repo_path} with subdirectories {function.subdirectories or 'ALL'} and language {function.language or 'AUTO'} ...", spinner="dots") as status: + start_time = time.time() codebase = parse_codebase(repo_path=session.repo_path, subdirectories=function.subdirectories, language=function.language) - status.update("[bold green]✓ Parsed codebase") + parse_time = time.time() - start_time + status.update(f"[bold green]✓ Parsed codebase in {parse_time:.2f}s") + # Memory usage after parsing + post_parse_memory = process.memory_info().rss / (1024 * 1024) + status.update("[bold]Running codemod...") + start_time = time.time() function.run(codebase) # Run the function - status.update("[bold green]✓ Completed codemod") + run_time = time.time() - start_time + status.update(f"[bold green]✓ Completed codemod in {run_time:.2f}s") # Get the diff from the codebase result = codebase.get_diff() - + + # Final memory usage + final_memory = process.memory_info().rss / (1024 * 1024) + # Handle no changes case if not result: rich.print("\n[yellow]No changes were produced by this codemod[/yellow]") + rich.print(f"\n[dim]Memory usage: {initial_memory:.2f}MB → {final_memory:.2f}MB (Δ {final_memory - initial_memory:.2f}MB)[/dim]") return # Show diff preview if requested @@ -84,3 +107,11 @@ def run_local( # Apply changes rich.print("") rich.print("[green]✓ Changes have been applied to your local filesystem[/green]") + + # Print memory usage statistics + rich.print(f"\n[dim]Memory usage: {initial_memory:.2f}MB → {final_memory:.2f}MB (Δ {final_memory - initial_memory:.2f}MB)[/dim]") + rich.print(f"[dim]Parsing: {parse_time:.2f}s, Execution: {run_time:.2f}s[/dim]") + + # Clean up to free memory + del codebase + gc.collect() diff --git a/src/codegen/cli/utils/memory_profiler.py b/src/codegen/cli/utils/memory_profiler.py new file mode 100644 index 000000000..8ab034c6b --- /dev/null +++ b/src/codegen/cli/utils/memory_profiler.py @@ -0,0 +1,371 @@ +import os +import time +import psutil +import tracemalloc +from typing import Callable, Dict, List, Optional, Tuple, Union +from dataclasses import dataclass +import functools +import subprocess +import sys +from pathlib import Path +import json +import tempfile +from rich.console import Console +from rich.table import Table +import matplotlib.pyplot as plt + +console = Console() + +@dataclass +class MemorySnapshot: + """A snapshot of memory usage at a point in time.""" + timestamp: float + rss_mb: float # Resident Set Size in MB + vms_mb: float # Virtual Memory Size in MB + tracemalloc_mb: Optional[float] = None # Tracemalloc total in MB + + def to_dict(self) -> Dict: + return { + "timestamp": self.timestamp, + "rss_mb": self.rss_mb, + "vms_mb": self.vms_mb, + "tracemalloc_mb": self.tracemalloc_mb + } + +class MemoryProfiler: + """A memory profiler that tracks memory usage over time.""" + + def __init__(self, interval: float = 0.1, use_tracemalloc: bool = True): + """ + Initialize the memory profiler. + + Args: + interval: The interval in seconds between memory snapshots. + use_tracemalloc: Whether to use tracemalloc for detailed memory tracking. + """ + self.interval = interval + self.use_tracemalloc = use_tracemalloc + self.snapshots: List[MemorySnapshot] = [] + self.process = psutil.Process(os.getpid()) + self.start_time = None + self._running = False + + def start(self): + """Start memory profiling.""" + if self._running: + return + + self.snapshots = [] + self.start_time = time.time() + + if self.use_tracemalloc: + tracemalloc.start() + + self._running = True + self._take_snapshot() + + def stop(self) -> List[MemorySnapshot]: + """Stop memory profiling and return the snapshots.""" + if not self._running: + return self.snapshots + + self._take_snapshot() # Take one final snapshot + + if self.use_tracemalloc: + tracemalloc.stop() + + self._running = False + return self.snapshots + + def _take_snapshot(self): + """Take a snapshot of the current memory usage.""" + mem_info = self.process.memory_info() + + snapshot = MemorySnapshot( + timestamp=time.time() - self.start_time, + rss_mb=mem_info.rss / (1024 * 1024), + vms_mb=mem_info.vms / (1024 * 1024), + ) + + if self.use_tracemalloc: + current, peak = tracemalloc.get_traced_memory() + snapshot.tracemalloc_mb = current / (1024 * 1024) + + self.snapshots.append(snapshot) + + def get_peak_memory(self) -> Tuple[float, float]: + """Get the peak RSS and VMS memory usage in MB.""" + if not self.snapshots: + return 0.0, 0.0 + + peak_rss = max(s.rss_mb for s in self.snapshots) + peak_vms = max(s.vms_mb for s in self.snapshots) + return peak_rss, peak_vms + + def get_tracemalloc_stats(self, top_n: int = 10) -> List: + """Get the top memory allocations from tracemalloc.""" + if not self.use_tracemalloc or not tracemalloc.is_tracing(): + return [] + + snapshot = tracemalloc.take_snapshot() + stats = snapshot.statistics('lineno') + return stats[:top_n] + + def save_report(self, output_dir: Union[str, Path], command_name: str): + """Save a memory profiling report to the specified directory.""" + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Save raw data as JSON + data_file = output_dir / f"{command_name}_memory_profile.json" + with open(data_file, 'w') as f: + json.dump([s.to_dict() for s in self.snapshots], f, indent=2) + + # Generate and save plot + self._generate_plot(output_dir / f"{command_name}_memory_profile.png") + + # Generate text report with tracemalloc stats if available + report_file = output_dir / f"{command_name}_memory_report.txt" + with open(report_file, 'w') as f: + peak_rss, peak_vms = self.get_peak_memory() + f.write(f"Memory Profile for: {command_name}\n") + f.write(f"{'=' * 50}\n") + f.write(f"Duration: {self.snapshots[-1].timestamp:.2f} seconds\n") + f.write(f"Peak RSS: {peak_rss:.2f} MB\n") + f.write(f"Peak VMS: {peak_vms:.2f} MB\n\n") + + if self.use_tracemalloc: + f.write("Top Memory Allocations:\n") + f.write(f"{'-' * 50}\n") + for stat in self.get_tracemalloc_stats(top_n=20): + f.write(f"{stat.size / (1024 * 1024):.2f} MB: {stat.traceback.format()[0]}\n") + + return output_dir + + def _generate_plot(self, output_file: Path): + """Generate a plot of memory usage over time.""" + if not self.snapshots: + return + + timestamps = [s.timestamp for s in self.snapshots] + rss_values = [s.rss_mb for s in self.snapshots] + vms_values = [s.vms_mb for s in self.snapshots] + + plt.figure(figsize=(10, 6)) + plt.plot(timestamps, rss_values, label='RSS (MB)', linewidth=2) + plt.plot(timestamps, vms_values, label='VMS (MB)', linewidth=2) + + if self.use_tracemalloc: + tracemalloc_values = [s.tracemalloc_mb for s in self.snapshots if s.tracemalloc_mb is not None] + if tracemalloc_values: + tracemalloc_timestamps = timestamps[:len(tracemalloc_values)] + plt.plot(tracemalloc_timestamps, tracemalloc_values, label='Tracemalloc (MB)', linewidth=2, linestyle='--') + + plt.xlabel('Time (seconds)') + plt.ylabel('Memory Usage (MB)') + plt.title('Memory Usage Over Time') + plt.grid(True, linestyle='--', alpha=0.7) + plt.legend() + + plt.tight_layout() + plt.savefig(output_file) + plt.close() + +def profile_memory(func=None, *, interval: float = 0.1, use_tracemalloc: bool = True, + output_dir: Optional[Union[str, Path]] = None): + """ + Decorator to profile memory usage of a function. + + Args: + func: The function to profile. + interval: The interval in seconds between memory snapshots. + use_tracemalloc: Whether to use tracemalloc for detailed memory tracking. + output_dir: Directory to save the memory profile report. If None, a temporary directory is used. + """ + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + profiler = MemoryProfiler(interval=interval, use_tracemalloc=use_tracemalloc) + profiler.start() + + try: + result = func(*args, **kwargs) + return result + finally: + profiler.stop() + + # Determine output directory + out_dir = output_dir + if out_dir is None: + out_dir = Path(tempfile.gettempdir()) / "codegen_memory_profiles" + + # Save report + func_name = func.__name__ + report_dir = profiler.save_report(out_dir, func_name) + + console.print(f"\n[bold green]Memory profile saved to:[/bold green] {report_dir}") + + # Print summary + peak_rss, peak_vms = profiler.get_peak_memory() + table = Table(title="Memory Usage Summary") + table.add_column("Metric", style="cyan") + table.add_column("Value", style="green") + + table.add_row("Peak RSS", f"{peak_rss:.2f} MB") + table.add_row("Peak VMS", f"{peak_vms:.2f} MB") + table.add_row("Duration", f"{profiler.snapshots[-1].timestamp:.2f} seconds") + + console.print(table) + + return wrapper + + if func is None: + return decorator + return decorator(func) + +def profile_command(cmd_args: List[str], output_dir: Optional[Union[str, Path]] = None) -> Path: + """ + Profile memory usage of a command. + + Args: + cmd_args: The command arguments to profile. + output_dir: Directory to save the memory profile report. If None, a temporary directory is used. + + Returns: + Path to the output directory containing the profile report. + """ + if output_dir is None: + output_dir = Path(tempfile.gettempdir()) / "codegen_memory_profiles" + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Create a unique filename for this run + timestamp = int(time.time()) + cmd_name = "_".join(cmd_args).replace("/", "_")[:50] # Limit length and remove problematic chars + output_file = output_dir / f"{cmd_name}_{timestamp}_memory.json" + + # Run the command with memory profiling + env = os.environ.copy() + env["PYTHONPATH"] = f"{os.getcwd()}:{env.get('PYTHONPATH', '')}" + + # Prepare the profiling script + script = f""" +import sys +import time +import json +import psutil +import tracemalloc +from pathlib import Path + +output_file = "{output_file}" +interval = 0.1 +process = psutil.Process() +snapshots = [] +start_time = time.time() + +# Start tracemalloc +tracemalloc.start() + +# Take snapshots at regular intervals +try: + while True: + mem_info = process.memory_info() + current, peak = tracemalloc.get_traced_memory() + + snapshots.append({{ + "timestamp": time.time() - start_time, + "rss_mb": mem_info.rss / (1024 * 1024), + "vms_mb": mem_info.vms / (1024 * 1024), + "tracemalloc_mb": current / (1024 * 1024) + }}) + + time.sleep(interval) +except KeyboardInterrupt: + pass +finally: + # Save the snapshots + with open(output_file, 'w') as f: + json.dump(snapshots, f, indent=2) + + # Print summary + if snapshots: + peak_rss = max(s["rss_mb"] for s in snapshots) + peak_vms = max(s["vms_mb"] for s in snapshots) + duration = snapshots[-1]["timestamp"] + + print(f"\\nMemory Profile Summary:") + print(f"Peak RSS: {{peak_rss:.2f}} MB") + print(f"Peak VMS: {{peak_vms:.2f}} MB") + print(f"Duration: {{duration:.2f}} seconds") + print(f"Profile saved to: {{output_file}}") + """ + + script_file = output_dir / f"memory_profiler_{timestamp}.py" + with open(script_file, 'w') as f: + f.write(script) + + # Start the profiler in a separate process + profiler_process = subprocess.Popen([sys.executable, str(script_file)], + env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + # Run the command + cmd_process = subprocess.Popen([sys.executable, "-m", "codegen.cli.cli"] + cmd_args, + env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + # Wait for the command to finish + stdout, stderr = cmd_process.communicate() + + # Stop the profiler + profiler_process.terminate() + profiler_stdout, profiler_stderr = profiler_process.communicate() + + # Print command output + console.print("[bold]Command Output:[/bold]") + console.print(stdout.decode()) + if stderr: + console.print("[bold red]Command Errors:[/bold red]") + console.print(stderr.decode()) + + # Print profiler output + if profiler_stdout: + console.print(profiler_stdout.decode()) + + # Generate visualization if the profile data exists + if output_file.exists(): + try: + with open(output_file, 'r') as f: + data = json.load(f) + + if data: + # Generate plot + plot_file = output_dir / f"{cmd_name}_{timestamp}_memory.png" + + timestamps = [s["timestamp"] for s in data] + rss_values = [s["rss_mb"] for s in data] + vms_values = [s["vms_mb"] for s in data] + tracemalloc_values = [s["tracemalloc_mb"] for s in data if "tracemalloc_mb" in s] + + plt.figure(figsize=(10, 6)) + plt.plot(timestamps, rss_values, label='RSS (MB)', linewidth=2) + plt.plot(timestamps, vms_values, label='VMS (MB)', linewidth=2) + + if tracemalloc_values: + tracemalloc_timestamps = timestamps[:len(tracemalloc_values)] + plt.plot(tracemalloc_timestamps, tracemalloc_values, + label='Tracemalloc (MB)', linewidth=2, linestyle='--') + + plt.xlabel('Time (seconds)') + plt.ylabel('Memory Usage (MB)') + plt.title(f'Memory Usage: {" ".join(cmd_args)}') + plt.grid(True, linestyle='--', alpha=0.7) + plt.legend() + + plt.tight_layout() + plt.savefig(plot_file) + plt.close() + + console.print(f"[bold green]Memory profile visualization saved to:[/bold green] {plot_file}") + except Exception as e: + console.print(f"[bold red]Error generating visualization:[/bold red] {e}") + + return output_dir From 66f6eea6dac67aff85e98888451690b6043edc03 Mon Sep 17 00:00:00 2001 From: "codegen-sh[bot]" <131295404+codegen-sh[bot]@users.noreply.github.com> Date: Wed, 2 Apr 2025 18:08:56 +0000 Subject: [PATCH 2/2] Automated pre-commit update --- src/codegen/cli/commands/memprof/main.py | 21 +- src/codegen/cli/commands/run/run_local.py | 19 +- src/codegen/cli/utils/memory_profiler.py | 235 +++++++++++----------- 3 files changed, 133 insertions(+), 142 deletions(-) diff --git a/src/codegen/cli/commands/memprof/main.py b/src/codegen/cli/commands/memprof/main.py index f13bf08bd..6b42b1b8f 100644 --- a/src/codegen/cli/commands/memprof/main.py +++ b/src/codegen/cli/commands/memprof/main.py @@ -1,7 +1,5 @@ import os -import shutil -from pathlib import Path -from typing import List, Optional +from typing import Optional import rich import rich_click as click @@ -19,25 +17,24 @@ type=click.Path(file_okay=False), help="Directory to save memory profile reports", ) -def memprof_command(command: List[str], output_dir: Optional[str] = None): - """ - Profile memory usage of a Codegen CLI command. - +def memprof_command(command: list[str], output_dir: Optional[str] = None): + """Profile memory usage of a Codegen CLI command. + Example: codegen memprof run my-codemod --arguments '{"param": "value"}' """ if not command: rich.print("[bold red]Error:[/bold red] No command specified") return - + # Convert command tuple to list cmd_args = list(command) - + # Set default output directory if not provided if not output_dir: home_dir = os.path.expanduser("~") output_dir = os.path.join(home_dir, ".codegen", "memory_profiles") - + # Run the profiling rich.print( Panel( @@ -48,7 +45,7 @@ def memprof_command(command: List[str], output_dir: Optional[str] = None): padding=(1, 2), ) ) - + try: report_dir = profile_command(cmd_args, output_dir=output_dir) rich.print( @@ -63,7 +60,7 @@ def memprof_command(command: List[str], output_dir: Optional[str] = None): except Exception as e: rich.print( Panel( - f"[red]Error during profiling:[/red] {str(e)}", + f"[red]Error during profiling:[/red] {e!s}", title="❌ [bold]Profiling Failed[/bold]", border_style="red", box=box.ROUNDED, diff --git a/src/codegen/cli/commands/run/run_local.py b/src/codegen/cli/commands/run/run_local.py index cf7b7d80d..99d051e7f 100644 --- a/src/codegen/cli/commands/run/run_local.py +++ b/src/codegen/cli/commands/run/run_local.py @@ -1,10 +1,9 @@ -from pathlib import Path import gc import os -import psutil import time -from typing import Optional +from pathlib import Path +import psutil import rich from rich.panel import Panel from rich.status import Status @@ -34,7 +33,7 @@ def parse_codebase( """ # Force garbage collection before parsing to free up memory gc.collect() - + codebase = Codebase( projects=[ ProjectConfig( @@ -62,7 +61,7 @@ def run_local( # Get initial memory usage process = psutil.Process(os.getpid()) initial_memory = process.memory_info().rss / (1024 * 1024) # Convert to MB - + # Parse codebase and run with Status(f"[bold]Parsing codebase at {session.repo_path} with subdirectories {function.subdirectories or 'ALL'} and language {function.language or 'AUTO'} ...", spinner="dots") as status: start_time = time.time() @@ -72,7 +71,7 @@ def run_local( # Memory usage after parsing post_parse_memory = process.memory_info().rss / (1024 * 1024) - + status.update("[bold]Running codemod...") start_time = time.time() function.run(codebase) # Run the function @@ -81,10 +80,10 @@ def run_local( # Get the diff from the codebase result = codebase.get_diff() - + # Final memory usage final_memory = process.memory_info().rss / (1024 * 1024) - + # Handle no changes case if not result: rich.print("\n[yellow]No changes were produced by this codemod[/yellow]") @@ -107,11 +106,11 @@ def run_local( # Apply changes rich.print("") rich.print("[green]✓ Changes have been applied to your local filesystem[/green]") - + # Print memory usage statistics rich.print(f"\n[dim]Memory usage: {initial_memory:.2f}MB → {final_memory:.2f}MB (Δ {final_memory - initial_memory:.2f}MB)[/dim]") rich.print(f"[dim]Parsing: {parse_time:.2f}s, Execution: {run_time:.2f}s[/dim]") - + # Clean up to free memory del codebase gc.collect() diff --git a/src/codegen/cli/utils/memory_profiler.py b/src/codegen/cli/utils/memory_profiler.py index 8ab034c6b..dd4d22aa8 100644 --- a/src/codegen/cli/utils/memory_profiler.py +++ b/src/codegen/cli/utils/memory_profiler.py @@ -1,236 +1,234 @@ +import functools +import json import os +import subprocess +import sys +import tempfile import time -import psutil import tracemalloc -from typing import Callable, Dict, List, Optional, Tuple, Union from dataclasses import dataclass -import functools -import subprocess -import sys from pathlib import Path -import json -import tempfile +from typing import Optional, Union + +import matplotlib.pyplot as plt +import psutil from rich.console import Console from rich.table import Table -import matplotlib.pyplot as plt console = Console() + @dataclass class MemorySnapshot: """A snapshot of memory usage at a point in time.""" + timestamp: float rss_mb: float # Resident Set Size in MB vms_mb: float # Virtual Memory Size in MB tracemalloc_mb: Optional[float] = None # Tracemalloc total in MB - - def to_dict(self) -> Dict: - return { - "timestamp": self.timestamp, - "rss_mb": self.rss_mb, - "vms_mb": self.vms_mb, - "tracemalloc_mb": self.tracemalloc_mb - } + + def to_dict(self) -> dict: + return {"timestamp": self.timestamp, "rss_mb": self.rss_mb, "vms_mb": self.vms_mb, "tracemalloc_mb": self.tracemalloc_mb} + class MemoryProfiler: """A memory profiler that tracks memory usage over time.""" - + def __init__(self, interval: float = 0.1, use_tracemalloc: bool = True): - """ - Initialize the memory profiler. - + """Initialize the memory profiler. + Args: interval: The interval in seconds between memory snapshots. use_tracemalloc: Whether to use tracemalloc for detailed memory tracking. """ self.interval = interval self.use_tracemalloc = use_tracemalloc - self.snapshots: List[MemorySnapshot] = [] + self.snapshots: list[MemorySnapshot] = [] self.process = psutil.Process(os.getpid()) self.start_time = None self._running = False - + def start(self): """Start memory profiling.""" if self._running: return - + self.snapshots = [] self.start_time = time.time() - + if self.use_tracemalloc: tracemalloc.start() - + self._running = True self._take_snapshot() - - def stop(self) -> List[MemorySnapshot]: + + def stop(self) -> list[MemorySnapshot]: """Stop memory profiling and return the snapshots.""" if not self._running: return self.snapshots - + self._take_snapshot() # Take one final snapshot - + if self.use_tracemalloc: tracemalloc.stop() - + self._running = False return self.snapshots - + def _take_snapshot(self): """Take a snapshot of the current memory usage.""" mem_info = self.process.memory_info() - + snapshot = MemorySnapshot( timestamp=time.time() - self.start_time, rss_mb=mem_info.rss / (1024 * 1024), vms_mb=mem_info.vms / (1024 * 1024), ) - + if self.use_tracemalloc: current, peak = tracemalloc.get_traced_memory() snapshot.tracemalloc_mb = current / (1024 * 1024) - + self.snapshots.append(snapshot) - - def get_peak_memory(self) -> Tuple[float, float]: + + def get_peak_memory(self) -> tuple[float, float]: """Get the peak RSS and VMS memory usage in MB.""" if not self.snapshots: return 0.0, 0.0 - + peak_rss = max(s.rss_mb for s in self.snapshots) peak_vms = max(s.vms_mb for s in self.snapshots) return peak_rss, peak_vms - - def get_tracemalloc_stats(self, top_n: int = 10) -> List: + + def get_tracemalloc_stats(self, top_n: int = 10) -> list: """Get the top memory allocations from tracemalloc.""" if not self.use_tracemalloc or not tracemalloc.is_tracing(): return [] - + snapshot = tracemalloc.take_snapshot() - stats = snapshot.statistics('lineno') + stats = snapshot.statistics("lineno") return stats[:top_n] - + def save_report(self, output_dir: Union[str, Path], command_name: str): """Save a memory profiling report to the specified directory.""" output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) - + # Save raw data as JSON data_file = output_dir / f"{command_name}_memory_profile.json" - with open(data_file, 'w') as f: + with open(data_file, "w") as f: json.dump([s.to_dict() for s in self.snapshots], f, indent=2) - + # Generate and save plot self._generate_plot(output_dir / f"{command_name}_memory_profile.png") - + # Generate text report with tracemalloc stats if available report_file = output_dir / f"{command_name}_memory_report.txt" - with open(report_file, 'w') as f: + with open(report_file, "w") as f: peak_rss, peak_vms = self.get_peak_memory() f.write(f"Memory Profile for: {command_name}\n") f.write(f"{'=' * 50}\n") f.write(f"Duration: {self.snapshots[-1].timestamp:.2f} seconds\n") f.write(f"Peak RSS: {peak_rss:.2f} MB\n") f.write(f"Peak VMS: {peak_vms:.2f} MB\n\n") - + if self.use_tracemalloc: f.write("Top Memory Allocations:\n") f.write(f"{'-' * 50}\n") for stat in self.get_tracemalloc_stats(top_n=20): f.write(f"{stat.size / (1024 * 1024):.2f} MB: {stat.traceback.format()[0]}\n") - + return output_dir - + def _generate_plot(self, output_file: Path): """Generate a plot of memory usage over time.""" if not self.snapshots: return - + timestamps = [s.timestamp for s in self.snapshots] rss_values = [s.rss_mb for s in self.snapshots] vms_values = [s.vms_mb for s in self.snapshots] - + plt.figure(figsize=(10, 6)) - plt.plot(timestamps, rss_values, label='RSS (MB)', linewidth=2) - plt.plot(timestamps, vms_values, label='VMS (MB)', linewidth=2) - + plt.plot(timestamps, rss_values, label="RSS (MB)", linewidth=2) + plt.plot(timestamps, vms_values, label="VMS (MB)", linewidth=2) + if self.use_tracemalloc: tracemalloc_values = [s.tracemalloc_mb for s in self.snapshots if s.tracemalloc_mb is not None] if tracemalloc_values: - tracemalloc_timestamps = timestamps[:len(tracemalloc_values)] - plt.plot(tracemalloc_timestamps, tracemalloc_values, label='Tracemalloc (MB)', linewidth=2, linestyle='--') - - plt.xlabel('Time (seconds)') - plt.ylabel('Memory Usage (MB)') - plt.title('Memory Usage Over Time') - plt.grid(True, linestyle='--', alpha=0.7) + tracemalloc_timestamps = timestamps[: len(tracemalloc_values)] + plt.plot(tracemalloc_timestamps, tracemalloc_values, label="Tracemalloc (MB)", linewidth=2, linestyle="--") + + plt.xlabel("Time (seconds)") + plt.ylabel("Memory Usage (MB)") + plt.title("Memory Usage Over Time") + plt.grid(True, linestyle="--", alpha=0.7) plt.legend() - + plt.tight_layout() plt.savefig(output_file) plt.close() -def profile_memory(func=None, *, interval: float = 0.1, use_tracemalloc: bool = True, - output_dir: Optional[Union[str, Path]] = None): - """ - Decorator to profile memory usage of a function. - + +def profile_memory(func=None, *, interval: float = 0.1, use_tracemalloc: bool = True, output_dir: Optional[Union[str, Path]] = None): + """Decorator to profile memory usage of a function. + Args: func: The function to profile. interval: The interval in seconds between memory snapshots. use_tracemalloc: Whether to use tracemalloc for detailed memory tracking. output_dir: Directory to save the memory profile report. If None, a temporary directory is used. """ + def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): profiler = MemoryProfiler(interval=interval, use_tracemalloc=use_tracemalloc) profiler.start() - + try: result = func(*args, **kwargs) return result finally: profiler.stop() - + # Determine output directory out_dir = output_dir if out_dir is None: out_dir = Path(tempfile.gettempdir()) / "codegen_memory_profiles" - + # Save report func_name = func.__name__ report_dir = profiler.save_report(out_dir, func_name) - + console.print(f"\n[bold green]Memory profile saved to:[/bold green] {report_dir}") - + # Print summary peak_rss, peak_vms = profiler.get_peak_memory() table = Table(title="Memory Usage Summary") table.add_column("Metric", style="cyan") table.add_column("Value", style="green") - + table.add_row("Peak RSS", f"{peak_rss:.2f} MB") table.add_row("Peak VMS", f"{peak_vms:.2f} MB") table.add_row("Duration", f"{profiler.snapshots[-1].timestamp:.2f} seconds") - + console.print(table) - + return wrapper - + if func is None: return decorator return decorator(func) -def profile_command(cmd_args: List[str], output_dir: Optional[Union[str, Path]] = None) -> Path: - """ - Profile memory usage of a command. - + +def profile_command(cmd_args: list[str], output_dir: Optional[Union[str, Path]] = None) -> Path: + """Profile memory usage of a command. + Args: cmd_args: The command arguments to profile. output_dir: Directory to save the memory profile report. If None, a temporary directory is used. - + Returns: Path to the output directory containing the profile report. """ @@ -238,16 +236,16 @@ def profile_command(cmd_args: List[str], output_dir: Optional[Union[str, Path]] output_dir = Path(tempfile.gettempdir()) / "codegen_memory_profiles" output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) - + # Create a unique filename for this run timestamp = int(time.time()) cmd_name = "_".join(cmd_args).replace("/", "_")[:50] # Limit length and remove problematic chars output_file = output_dir / f"{cmd_name}_{timestamp}_memory.json" - + # Run the command with memory profiling env = os.environ.copy() env["PYTHONPATH"] = f"{os.getcwd()}:{env.get('PYTHONPATH', '')}" - + # Prepare the profiling script script = f""" import sys @@ -271,14 +269,14 @@ def profile_command(cmd_args: List[str], output_dir: Optional[Union[str, Path]] while True: mem_info = process.memory_info() current, peak = tracemalloc.get_traced_memory() - + snapshots.append({{ "timestamp": time.time() - start_time, "rss_mb": mem_info.rss / (1024 * 1024), "vms_mb": mem_info.vms / (1024 * 1024), "tracemalloc_mb": current / (1024 * 1024) }}) - + time.sleep(interval) except KeyboardInterrupt: pass @@ -286,86 +284,83 @@ def profile_command(cmd_args: List[str], output_dir: Optional[Union[str, Path]] # Save the snapshots with open(output_file, 'w') as f: json.dump(snapshots, f, indent=2) - + # Print summary if snapshots: peak_rss = max(s["rss_mb"] for s in snapshots) peak_vms = max(s["vms_mb"] for s in snapshots) duration = snapshots[-1]["timestamp"] - + print(f"\\nMemory Profile Summary:") print(f"Peak RSS: {{peak_rss:.2f}} MB") print(f"Peak VMS: {{peak_vms:.2f}} MB") print(f"Duration: {{duration:.2f}} seconds") print(f"Profile saved to: {{output_file}}") """ - + script_file = output_dir / f"memory_profiler_{timestamp}.py" - with open(script_file, 'w') as f: + with open(script_file, "w") as f: f.write(script) - + # Start the profiler in a separate process - profiler_process = subprocess.Popen([sys.executable, str(script_file)], - env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - + profiler_process = subprocess.Popen([sys.executable, str(script_file)], env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + # Run the command - cmd_process = subprocess.Popen([sys.executable, "-m", "codegen.cli.cli"] + cmd_args, - env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - + cmd_process = subprocess.Popen([sys.executable, "-m", "codegen.cli.cli", *cmd_args], env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + # Wait for the command to finish stdout, stderr = cmd_process.communicate() - + # Stop the profiler profiler_process.terminate() profiler_stdout, profiler_stderr = profiler_process.communicate() - + # Print command output console.print("[bold]Command Output:[/bold]") console.print(stdout.decode()) if stderr: console.print("[bold red]Command Errors:[/bold red]") console.print(stderr.decode()) - + # Print profiler output if profiler_stdout: console.print(profiler_stdout.decode()) - + # Generate visualization if the profile data exists if output_file.exists(): try: - with open(output_file, 'r') as f: + with open(output_file) as f: data = json.load(f) - + if data: # Generate plot plot_file = output_dir / f"{cmd_name}_{timestamp}_memory.png" - + timestamps = [s["timestamp"] for s in data] rss_values = [s["rss_mb"] for s in data] vms_values = [s["vms_mb"] for s in data] tracemalloc_values = [s["tracemalloc_mb"] for s in data if "tracemalloc_mb" in s] - + plt.figure(figsize=(10, 6)) - plt.plot(timestamps, rss_values, label='RSS (MB)', linewidth=2) - plt.plot(timestamps, vms_values, label='VMS (MB)', linewidth=2) - + plt.plot(timestamps, rss_values, label="RSS (MB)", linewidth=2) + plt.plot(timestamps, vms_values, label="VMS (MB)", linewidth=2) + if tracemalloc_values: - tracemalloc_timestamps = timestamps[:len(tracemalloc_values)] - plt.plot(tracemalloc_timestamps, tracemalloc_values, - label='Tracemalloc (MB)', linewidth=2, linestyle='--') - - plt.xlabel('Time (seconds)') - plt.ylabel('Memory Usage (MB)') - plt.title(f'Memory Usage: {" ".join(cmd_args)}') - plt.grid(True, linestyle='--', alpha=0.7) + tracemalloc_timestamps = timestamps[: len(tracemalloc_values)] + plt.plot(tracemalloc_timestamps, tracemalloc_values, label="Tracemalloc (MB)", linewidth=2, linestyle="--") + + plt.xlabel("Time (seconds)") + plt.ylabel("Memory Usage (MB)") + plt.title(f"Memory Usage: {' '.join(cmd_args)}") + plt.grid(True, linestyle="--", alpha=0.7) plt.legend() - + plt.tight_layout() plt.savefig(plot_file) plt.close() - + console.print(f"[bold green]Memory profile visualization saved to:[/bold green] {plot_file}") except Exception as e: console.print(f"[bold red]Error generating visualization:[/bold red] {e}") - + return output_dir