From 34ee3091aa8e86a7e6e7e1f679bc1b3bd501b238 Mon Sep 17 00:00:00 2001
From: "codegen-sh[bot]" <131295404+codegen-sh[bot]@users.noreply.github.com>
Date: Wed, 2 Apr 2025 18:07:45 +0000
Subject: [PATCH 1/2] Add memory profiling and optimization to CLI

---
 src/codegen/cli/cli.py                       |   2 +
 src/codegen/cli/commands/memprof/__init__.py |   0
 src/codegen/cli/commands/memprof/main.py     |  72 ++++
 src/codegen/cli/commands/run/run_local.py    |  37 +-
 src/codegen/cli/utils/memory_profiler.py     | 371 +++++++++++++++++++
 5 files changed, 479 insertions(+), 3 deletions(-)
 create mode 100644 src/codegen/cli/commands/memprof/__init__.py
 create mode 100644 src/codegen/cli/commands/memprof/main.py
 create mode 100644 src/codegen/cli/utils/memory_profiler.py

diff --git a/src/codegen/cli/cli.py b/src/codegen/cli/cli.py
index 8910f615a..7a0eb79d6 100644
--- a/src/codegen/cli/cli.py
+++ b/src/codegen/cli/cli.py
@@ -11,6 +11,7 @@
 from codegen.cli.commands.login.main import login_command
 from codegen.cli.commands.logout.main import logout_command
 from codegen.cli.commands.lsp.lsp import lsp_command
+from codegen.cli.commands.memprof.main import memprof_command
 from codegen.cli.commands.notebook.main import notebook_command
 from codegen.cli.commands.profile.main import profile_command
 from codegen.cli.commands.reset.main import reset_command
@@ -51,6 +52,7 @@ def main():
 main.add_command(lsp_command)
 main.add_command(serve_command)
 main.add_command(start_command)
+main.add_command(memprof_command)  # Add the memory profiling command
 
 
 if __name__ == "__main__":
diff --git a/src/codegen/cli/commands/memprof/__init__.py b/src/codegen/cli/commands/memprof/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/src/codegen/cli/commands/memprof/main.py b/src/codegen/cli/commands/memprof/main.py
new file mode 100644
index 000000000..f13bf08bd
--- /dev/null
+++ b/src/codegen/cli/commands/memprof/main.py
@@ -0,0 +1,72 @@
+import os
+import shutil
+from pathlib import Path
+from typing import List, Optional
+
+import rich
+import rich_click as click
+from rich import box
+from rich.panel import Panel
+
+from codegen.cli.utils.memory_profiler import profile_command
+
+
+@click.command(name="memprof")
+@click.argument("command", nargs=-1, required=True)
+@click.option(
+    "--output-dir",
+    "-o",
+    type=click.Path(file_okay=False),
+    help="Directory to save memory profile reports",
+)
+def memprof_command(command: List[str], output_dir: Optional[str] = None):
+    """
+    Profile memory usage of a Codegen CLI command.
+    
+    Example:
+        codegen memprof run my-codemod --arguments '{"param": "value"}'
+    """
+    if not command:
+        rich.print("[bold red]Error:[/bold red] No command specified")
+        return
+    
+    # Convert command tuple to list
+    cmd_args = list(command)
+    
+    # Set default output directory if not provided
+    if not output_dir:
+        home_dir = os.path.expanduser("~")
+        output_dir = os.path.join(home_dir, ".codegen", "memory_profiles")
+    
+    # Run the profiling
+    rich.print(
+        Panel(
+            f"[cyan]Profiling command:[/cyan] codegen {' '.join(cmd_args)}",
+            title="🔍 [bold]Memory Profiler[/bold]",
+            border_style="cyan",
+            box=box.ROUNDED,
+            padding=(1, 2),
+        )
+    )
+    
+    try:
+        report_dir = profile_command(cmd_args, output_dir=output_dir)
+        rich.print(
+            Panel(
+                f"[green]Memory profile saved to:[/green] {report_dir}",
+                title="✅ [bold]Profiling Complete[/bold]",
+                border_style="green",
+                box=box.ROUNDED,
+                padding=(1, 2),
+            )
+        )
+    except Exception as e:
+        rich.print(
+            Panel(
+                f"[red]Error during profiling:[/red] {str(e)}",
+                title="❌ [bold]Profiling Failed[/bold]",
+                border_style="red",
+                box=box.ROUNDED,
+                padding=(1, 2),
+            )
+        )
diff --git a/src/codegen/cli/commands/run/run_local.py b/src/codegen/cli/commands/run/run_local.py
index 4ca737dd1..cf7b7d80d 100644
--- a/src/codegen/cli/commands/run/run_local.py
+++ b/src/codegen/cli/commands/run/run_local.py
@@ -1,4 +1,9 @@
 from pathlib import Path
+import gc
+import os
+import psutil
+import time
+from typing import Optional
 
 import rich
 from rich.panel import Panel
@@ -27,6 +32,9 @@ def parse_codebase(
     Returns:
         Parsed Codebase object
     """
+    # Force garbage collection before parsing to free up memory
+    gc.collect()
+    
     codebase = Codebase(
         projects=[
             ProjectConfig(
@@ -51,21 +59,36 @@ def run_local(
         function: The function to run
         diff_preview: Number of lines of diff to preview (None for all)
     """
+    # Get initial memory usage
+    process = psutil.Process(os.getpid())
+    initial_memory = process.memory_info().rss / (1024 * 1024)  # Convert to MB
+    
     # Parse codebase and run
     with Status(f"[bold]Parsing codebase at {session.repo_path} with subdirectories {function.subdirectories or 'ALL'} and language {function.language or 'AUTO'} ...", spinner="dots") as status:
+        start_time = time.time()
         codebase = parse_codebase(repo_path=session.repo_path, subdirectories=function.subdirectories, language=function.language)
-        status.update("[bold green]✓ Parsed codebase")
+        parse_time = time.time() - start_time
+        status.update(f"[bold green]✓ Parsed codebase in {parse_time:.2f}s")
 
+        # Memory usage after parsing
+        post_parse_memory = process.memory_info().rss / (1024 * 1024)
+        
         status.update("[bold]Running codemod...")
+        start_time = time.time()
         function.run(codebase)  # Run the function
-        status.update("[bold green]✓ Completed codemod")
+        run_time = time.time() - start_time
+        status.update(f"[bold green]✓ Completed codemod in {run_time:.2f}s")
 
     # Get the diff from the codebase
     result = codebase.get_diff()
-
+    
+    # Final memory usage
+    final_memory = process.memory_info().rss / (1024 * 1024)
+    
     # Handle no changes case
     if not result:
         rich.print("\n[yellow]No changes were produced by this codemod[/yellow]")
+        rich.print(f"\n[dim]Memory usage: {initial_memory:.2f}MB → {final_memory:.2f}MB (Δ {final_memory - initial_memory:.2f}MB)[/dim]")
         return
 
     # Show diff preview if requested
@@ -84,3 +107,11 @@ def run_local(
     # Apply changes
     rich.print("")
     rich.print("[green]✓ Changes have been applied to your local filesystem[/green]")
+    
+    # Print memory usage statistics
+    rich.print(f"\n[dim]Memory usage: {initial_memory:.2f}MB → {final_memory:.2f}MB (Δ {final_memory - initial_memory:.2f}MB)[/dim]")
+    rich.print(f"[dim]Parsing: {parse_time:.2f}s, Execution: {run_time:.2f}s[/dim]")
+    
+    # Clean up to free memory
+    del codebase
+    gc.collect()
diff --git a/src/codegen/cli/utils/memory_profiler.py b/src/codegen/cli/utils/memory_profiler.py
new file mode 100644
index 000000000..8ab034c6b
--- /dev/null
+++ b/src/codegen/cli/utils/memory_profiler.py
@@ -0,0 +1,371 @@
+import os
+import time
+import psutil
+import tracemalloc
+from typing import Callable, Dict, List, Optional, Tuple, Union
+from dataclasses import dataclass
+import functools
+import subprocess
+import sys
+from pathlib import Path
+import json
+import tempfile
+from rich.console import Console
+from rich.table import Table
+import matplotlib.pyplot as plt
+
+console = Console()
+
+@dataclass
+class MemorySnapshot:
+    """A snapshot of memory usage at a point in time."""
+    timestamp: float
+    rss_mb: float  # Resident Set Size in MB
+    vms_mb: float  # Virtual Memory Size in MB
+    tracemalloc_mb: Optional[float] = None  # Tracemalloc total in MB
+    
+    def to_dict(self) -> Dict:
+        return {
+            "timestamp": self.timestamp,
+            "rss_mb": self.rss_mb,
+            "vms_mb": self.vms_mb,
+            "tracemalloc_mb": self.tracemalloc_mb
+        }
+
+class MemoryProfiler:
+    """A memory profiler that tracks memory usage over time."""
+    
+    def __init__(self, interval: float = 0.1, use_tracemalloc: bool = True):
+        """
+        Initialize the memory profiler.
+        
+        Args:
+            interval: The interval in seconds between memory snapshots.
+            use_tracemalloc: Whether to use tracemalloc for detailed memory tracking.
+        """
+        self.interval = interval
+        self.use_tracemalloc = use_tracemalloc
+        self.snapshots: List[MemorySnapshot] = []
+        self.process = psutil.Process(os.getpid())
+        self.start_time = None
+        self._running = False
+        
+    def start(self):
+        """Start memory profiling."""
+        if self._running:
+            return
+            
+        self.snapshots = []
+        self.start_time = time.time()
+        
+        if self.use_tracemalloc:
+            tracemalloc.start()
+            
+        self._running = True
+        self._take_snapshot()
+    
+    def stop(self) -> List[MemorySnapshot]:
+        """Stop memory profiling and return the snapshots."""
+        if not self._running:
+            return self.snapshots
+            
+        self._take_snapshot()  # Take one final snapshot
+        
+        if self.use_tracemalloc:
+            tracemalloc.stop()
+            
+        self._running = False
+        return self.snapshots
+    
+    def _take_snapshot(self):
+        """Take a snapshot of the current memory usage."""
+        mem_info = self.process.memory_info()
+        
+        snapshot = MemorySnapshot(
+            timestamp=time.time() - self.start_time,
+            rss_mb=mem_info.rss / (1024 * 1024),
+            vms_mb=mem_info.vms / (1024 * 1024),
+        )
+        
+        if self.use_tracemalloc:
+            current, peak = tracemalloc.get_traced_memory()
+            snapshot.tracemalloc_mb = current / (1024 * 1024)
+            
+        self.snapshots.append(snapshot)
+    
+    def get_peak_memory(self) -> Tuple[float, float]:
+        """Get the peak RSS and VMS memory usage in MB."""
+        if not self.snapshots:
+            return 0.0, 0.0
+            
+        peak_rss = max(s.rss_mb for s in self.snapshots)
+        peak_vms = max(s.vms_mb for s in self.snapshots)
+        return peak_rss, peak_vms
+    
+    def get_tracemalloc_stats(self, top_n: int = 10) -> List:
+        """Get the top memory allocations from tracemalloc."""
+        if not self.use_tracemalloc or not tracemalloc.is_tracing():
+            return []
+            
+        snapshot = tracemalloc.take_snapshot()
+        stats = snapshot.statistics('lineno')
+        return stats[:top_n]
+    
+    def save_report(self, output_dir: Union[str, Path], command_name: str):
+        """Save a memory profiling report to the specified directory."""
+        output_dir = Path(output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Save raw data as JSON
+        data_file = output_dir / f"{command_name}_memory_profile.json"
+        with open(data_file, 'w') as f:
+            json.dump([s.to_dict() for s in self.snapshots], f, indent=2)
+        
+        # Generate and save plot
+        self._generate_plot(output_dir / f"{command_name}_memory_profile.png")
+        
+        # Generate text report with tracemalloc stats if available
+        report_file = output_dir / f"{command_name}_memory_report.txt"
+        with open(report_file, 'w') as f:
+            peak_rss, peak_vms = self.get_peak_memory()
+            f.write(f"Memory Profile for: {command_name}\n")
+            f.write(f"{'=' * 50}\n")
+            f.write(f"Duration: {self.snapshots[-1].timestamp:.2f} seconds\n")
+            f.write(f"Peak RSS: {peak_rss:.2f} MB\n")
+            f.write(f"Peak VMS: {peak_vms:.2f} MB\n\n")
+            
+            if self.use_tracemalloc:
+                f.write("Top Memory Allocations:\n")
+                f.write(f"{'-' * 50}\n")
+                for stat in self.get_tracemalloc_stats(top_n=20):
+                    f.write(f"{stat.size / (1024 * 1024):.2f} MB: {stat.traceback.format()[0]}\n")
+        
+        return output_dir
+    
+    def _generate_plot(self, output_file: Path):
+        """Generate a plot of memory usage over time."""
+        if not self.snapshots:
+            return
+            
+        timestamps = [s.timestamp for s in self.snapshots]
+        rss_values = [s.rss_mb for s in self.snapshots]
+        vms_values = [s.vms_mb for s in self.snapshots]
+        
+        plt.figure(figsize=(10, 6))
+        plt.plot(timestamps, rss_values, label='RSS (MB)', linewidth=2)
+        plt.plot(timestamps, vms_values, label='VMS (MB)', linewidth=2)
+        
+        if self.use_tracemalloc:
+            tracemalloc_values = [s.tracemalloc_mb for s in self.snapshots if s.tracemalloc_mb is not None]
+            if tracemalloc_values:
+                tracemalloc_timestamps = timestamps[:len(tracemalloc_values)]
+                plt.plot(tracemalloc_timestamps, tracemalloc_values, label='Tracemalloc (MB)', linewidth=2, linestyle='--')
+        
+        plt.xlabel('Time (seconds)')
+        plt.ylabel('Memory Usage (MB)')
+        plt.title('Memory Usage Over Time')
+        plt.grid(True, linestyle='--', alpha=0.7)
+        plt.legend()
+        
+        plt.tight_layout()
+        plt.savefig(output_file)
+        plt.close()
+
+def profile_memory(func=None, *, interval: float = 0.1, use_tracemalloc: bool = True, 
+                  output_dir: Optional[Union[str, Path]] = None):
+    """
+    Decorator to profile memory usage of a function.
+    
+    Args:
+        func: The function to profile.
+        interval: The interval in seconds between memory snapshots.
+        use_tracemalloc: Whether to use tracemalloc for detailed memory tracking.
+        output_dir: Directory to save the memory profile report. If None, a temporary directory is used.
+    """
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            profiler = MemoryProfiler(interval=interval, use_tracemalloc=use_tracemalloc)
+            profiler.start()
+            
+            try:
+                result = func(*args, **kwargs)
+                return result
+            finally:
+                profiler.stop()
+                
+                # Determine output directory
+                out_dir = output_dir
+                if out_dir is None:
+                    out_dir = Path(tempfile.gettempdir()) / "codegen_memory_profiles"
+                
+                # Save report
+                func_name = func.__name__
+                report_dir = profiler.save_report(out_dir, func_name)
+                
+                console.print(f"\n[bold green]Memory profile saved to:[/bold green] {report_dir}")
+                
+                # Print summary
+                peak_rss, peak_vms = profiler.get_peak_memory()
+                table = Table(title="Memory Usage Summary")
+                table.add_column("Metric", style="cyan")
+                table.add_column("Value", style="green")
+                
+                table.add_row("Peak RSS", f"{peak_rss:.2f} MB")
+                table.add_row("Peak VMS", f"{peak_vms:.2f} MB")
+                table.add_row("Duration", f"{profiler.snapshots[-1].timestamp:.2f} seconds")
+                
+                console.print(table)
+        
+        return wrapper
+    
+    if func is None:
+        return decorator
+    return decorator(func)
+
+def profile_command(cmd_args: List[str], output_dir: Optional[Union[str, Path]] = None) -> Path:
+    """
+    Profile memory usage of a command.
+    
+    Args:
+        cmd_args: The command arguments to profile.
+        output_dir: Directory to save the memory profile report. If None, a temporary directory is used.
+        
+    Returns:
+        Path to the output directory containing the profile report.
+    """
+    if output_dir is None:
+        output_dir = Path(tempfile.gettempdir()) / "codegen_memory_profiles"
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Create a unique filename for this run
+    timestamp = int(time.time())
+    cmd_name = "_".join(cmd_args).replace("/", "_")[:50]  # Limit length and remove problematic chars
+    output_file = output_dir / f"{cmd_name}_{timestamp}_memory.json"
+    
+    # Run the command with memory profiling
+    env = os.environ.copy()
+    env["PYTHONPATH"] = f"{os.getcwd()}:{env.get('PYTHONPATH', '')}"
+    
+    # Prepare the profiling script
+    script = f"""
+import sys
+import time
+import json
+import psutil
+import tracemalloc
+from pathlib import Path
+
+output_file = "{output_file}"
+interval = 0.1
+process = psutil.Process()
+snapshots = []
+start_time = time.time()
+
+# Start tracemalloc
+tracemalloc.start()
+
+# Take snapshots at regular intervals
+try:
+    while True:
+        mem_info = process.memory_info()
+        current, peak = tracemalloc.get_traced_memory()
+        
+        snapshots.append({{
+            "timestamp": time.time() - start_time,
+            "rss_mb": mem_info.rss / (1024 * 1024),
+            "vms_mb": mem_info.vms / (1024 * 1024),
+            "tracemalloc_mb": current / (1024 * 1024)
+        }})
+        
+        time.sleep(interval)
+except KeyboardInterrupt:
+    pass
+finally:
+    # Save the snapshots
+    with open(output_file, 'w') as f:
+        json.dump(snapshots, f, indent=2)
+    
+    # Print summary
+    if snapshots:
+        peak_rss = max(s["rss_mb"] for s in snapshots)
+        peak_vms = max(s["vms_mb"] for s in snapshots)
+        duration = snapshots[-1]["timestamp"]
+        
+        print(f"\\nMemory Profile Summary:")
+        print(f"Peak RSS: {{peak_rss:.2f}} MB")
+        print(f"Peak VMS: {{peak_vms:.2f}} MB")
+        print(f"Duration: {{duration:.2f}} seconds")
+        print(f"Profile saved to: {{output_file}}")
+    """
+    
+    script_file = output_dir / f"memory_profiler_{timestamp}.py"
+    with open(script_file, 'w') as f:
+        f.write(script)
+    
+    # Start the profiler in a separate process
+    profiler_process = subprocess.Popen([sys.executable, str(script_file)], 
+                                        env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    
+    # Run the command
+    cmd_process = subprocess.Popen([sys.executable, "-m", "codegen.cli.cli"] + cmd_args, 
+                                   env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    
+    # Wait for the command to finish
+    stdout, stderr = cmd_process.communicate()
+    
+    # Stop the profiler
+    profiler_process.terminate()
+    profiler_stdout, profiler_stderr = profiler_process.communicate()
+    
+    # Print command output
+    console.print("[bold]Command Output:[/bold]")
+    console.print(stdout.decode())
+    if stderr:
+        console.print("[bold red]Command Errors:[/bold red]")
+        console.print(stderr.decode())
+    
+    # Print profiler output
+    if profiler_stdout:
+        console.print(profiler_stdout.decode())
+    
+    # Generate visualization if the profile data exists
+    if output_file.exists():
+        try:
+            with open(output_file, 'r') as f:
+                data = json.load(f)
+            
+            if data:
+                # Generate plot
+                plot_file = output_dir / f"{cmd_name}_{timestamp}_memory.png"
+                
+                timestamps = [s["timestamp"] for s in data]
+                rss_values = [s["rss_mb"] for s in data]
+                vms_values = [s["vms_mb"] for s in data]
+                tracemalloc_values = [s["tracemalloc_mb"] for s in data if "tracemalloc_mb" in s]
+                
+                plt.figure(figsize=(10, 6))
+                plt.plot(timestamps, rss_values, label='RSS (MB)', linewidth=2)
+                plt.plot(timestamps, vms_values, label='VMS (MB)', linewidth=2)
+                
+                if tracemalloc_values:
+                    tracemalloc_timestamps = timestamps[:len(tracemalloc_values)]
+                    plt.plot(tracemalloc_timestamps, tracemalloc_values, 
+                             label='Tracemalloc (MB)', linewidth=2, linestyle='--')
+                
+                plt.xlabel('Time (seconds)')
+                plt.ylabel('Memory Usage (MB)')
+                plt.title(f'Memory Usage: {" ".join(cmd_args)}')
+                plt.grid(True, linestyle='--', alpha=0.7)
+                plt.legend()
+                
+                plt.tight_layout()
+                plt.savefig(plot_file)
+                plt.close()
+                
+                console.print(f"[bold green]Memory profile visualization saved to:[/bold green] {plot_file}")
+        except Exception as e:
+            console.print(f"[bold red]Error generating visualization:[/bold red] {e}")
+    
+    return output_dir

From 66f6eea6dac67aff85e98888451690b6043edc03 Mon Sep 17 00:00:00 2001
From: "codegen-sh[bot]" <131295404+codegen-sh[bot]@users.noreply.github.com>
Date: Wed, 2 Apr 2025 18:08:56 +0000
Subject: [PATCH 2/2] Automated pre-commit update

---
 src/codegen/cli/commands/memprof/main.py  |  21 +-
 src/codegen/cli/commands/run/run_local.py |  19 +-
 src/codegen/cli/utils/memory_profiler.py  | 235 +++++++++++-----------
 3 files changed, 133 insertions(+), 142 deletions(-)

diff --git a/src/codegen/cli/commands/memprof/main.py b/src/codegen/cli/commands/memprof/main.py
index f13bf08bd..6b42b1b8f 100644
--- a/src/codegen/cli/commands/memprof/main.py
+++ b/src/codegen/cli/commands/memprof/main.py
@@ -1,7 +1,5 @@
 import os
-import shutil
-from pathlib import Path
-from typing import List, Optional
+from typing import Optional
 
 import rich
 import rich_click as click
@@ -19,25 +17,24 @@
     type=click.Path(file_okay=False),
     help="Directory to save memory profile reports",
 )
-def memprof_command(command: List[str], output_dir: Optional[str] = None):
-    """
-    Profile memory usage of a Codegen CLI command.
-    
+def memprof_command(command: list[str], output_dir: Optional[str] = None):
+    """Profile memory usage of a Codegen CLI command.
+
     Example:
         codegen memprof run my-codemod --arguments '{"param": "value"}'
     """
     if not command:
         rich.print("[bold red]Error:[/bold red] No command specified")
         return
-    
+
     # Convert command tuple to list
     cmd_args = list(command)
-    
+
     # Set default output directory if not provided
     if not output_dir:
         home_dir = os.path.expanduser("~")
         output_dir = os.path.join(home_dir, ".codegen", "memory_profiles")
-    
+
     # Run the profiling
     rich.print(
         Panel(
@@ -48,7 +45,7 @@ def memprof_command(command: List[str], output_dir: Optional[str] = None):
             padding=(1, 2),
         )
     )
-    
+
     try:
         report_dir = profile_command(cmd_args, output_dir=output_dir)
         rich.print(
@@ -63,7 +60,7 @@ def memprof_command(command: List[str], output_dir: Optional[str] = None):
     except Exception as e:
         rich.print(
             Panel(
-                f"[red]Error during profiling:[/red] {str(e)}",
+                f"[red]Error during profiling:[/red] {e!s}",
                 title="❌ [bold]Profiling Failed[/bold]",
                 border_style="red",
                 box=box.ROUNDED,
diff --git a/src/codegen/cli/commands/run/run_local.py b/src/codegen/cli/commands/run/run_local.py
index cf7b7d80d..99d051e7f 100644
--- a/src/codegen/cli/commands/run/run_local.py
+++ b/src/codegen/cli/commands/run/run_local.py
@@ -1,10 +1,9 @@
-from pathlib import Path
 import gc
 import os
-import psutil
 import time
-from typing import Optional
+from pathlib import Path
 
+import psutil
 import rich
 from rich.panel import Panel
 from rich.status import Status
@@ -34,7 +33,7 @@ def parse_codebase(
     """
     # Force garbage collection before parsing to free up memory
     gc.collect()
-    
+
     codebase = Codebase(
         projects=[
             ProjectConfig(
@@ -62,7 +61,7 @@ def run_local(
     # Get initial memory usage
     process = psutil.Process(os.getpid())
     initial_memory = process.memory_info().rss / (1024 * 1024)  # Convert to MB
-    
+
     # Parse codebase and run
     with Status(f"[bold]Parsing codebase at {session.repo_path} with subdirectories {function.subdirectories or 'ALL'} and language {function.language or 'AUTO'} ...", spinner="dots") as status:
         start_time = time.time()
@@ -72,7 +71,7 @@ def run_local(
 
         # Memory usage after parsing
         post_parse_memory = process.memory_info().rss / (1024 * 1024)
-        
+
         status.update("[bold]Running codemod...")
         start_time = time.time()
         function.run(codebase)  # Run the function
@@ -81,10 +80,10 @@ def run_local(
 
     # Get the diff from the codebase
     result = codebase.get_diff()
-    
+
     # Final memory usage
     final_memory = process.memory_info().rss / (1024 * 1024)
-    
+
     # Handle no changes case
     if not result:
         rich.print("\n[yellow]No changes were produced by this codemod[/yellow]")
@@ -107,11 +106,11 @@ def run_local(
     # Apply changes
     rich.print("")
     rich.print("[green]✓ Changes have been applied to your local filesystem[/green]")
-    
+
     # Print memory usage statistics
     rich.print(f"\n[dim]Memory usage: {initial_memory:.2f}MB → {final_memory:.2f}MB (Δ {final_memory - initial_memory:.2f}MB)[/dim]")
     rich.print(f"[dim]Parsing: {parse_time:.2f}s, Execution: {run_time:.2f}s[/dim]")
-    
+
     # Clean up to free memory
     del codebase
     gc.collect()
diff --git a/src/codegen/cli/utils/memory_profiler.py b/src/codegen/cli/utils/memory_profiler.py
index 8ab034c6b..dd4d22aa8 100644
--- a/src/codegen/cli/utils/memory_profiler.py
+++ b/src/codegen/cli/utils/memory_profiler.py
@@ -1,236 +1,234 @@
+import functools
+import json
 import os
+import subprocess
+import sys
+import tempfile
 import time
-import psutil
 import tracemalloc
-from typing import Callable, Dict, List, Optional, Tuple, Union
 from dataclasses import dataclass
-import functools
-import subprocess
-import sys
 from pathlib import Path
-import json
-import tempfile
+from typing import Optional, Union
+
+import matplotlib.pyplot as plt
+import psutil
 from rich.console import Console
 from rich.table import Table
-import matplotlib.pyplot as plt
 
 console = Console()
 
+
 @dataclass
 class MemorySnapshot:
     """A snapshot of memory usage at a point in time."""
+
     timestamp: float
     rss_mb: float  # Resident Set Size in MB
     vms_mb: float  # Virtual Memory Size in MB
     tracemalloc_mb: Optional[float] = None  # Tracemalloc total in MB
-    
-    def to_dict(self) -> Dict:
-        return {
-            "timestamp": self.timestamp,
-            "rss_mb": self.rss_mb,
-            "vms_mb": self.vms_mb,
-            "tracemalloc_mb": self.tracemalloc_mb
-        }
+
+    def to_dict(self) -> dict:
+        return {"timestamp": self.timestamp, "rss_mb": self.rss_mb, "vms_mb": self.vms_mb, "tracemalloc_mb": self.tracemalloc_mb}
+
 
 class MemoryProfiler:
     """A memory profiler that tracks memory usage over time."""
-    
+
     def __init__(self, interval: float = 0.1, use_tracemalloc: bool = True):
-        """
-        Initialize the memory profiler.
-        
+        """Initialize the memory profiler.
+
         Args:
             interval: The interval in seconds between memory snapshots.
             use_tracemalloc: Whether to use tracemalloc for detailed memory tracking.
         """
         self.interval = interval
         self.use_tracemalloc = use_tracemalloc
-        self.snapshots: List[MemorySnapshot] = []
+        self.snapshots: list[MemorySnapshot] = []
         self.process = psutil.Process(os.getpid())
         self.start_time = None
         self._running = False
-        
+
     def start(self):
         """Start memory profiling."""
         if self._running:
             return
-            
+
         self.snapshots = []
         self.start_time = time.time()
-        
+
         if self.use_tracemalloc:
             tracemalloc.start()
-            
+
         self._running = True
         self._take_snapshot()
-    
-    def stop(self) -> List[MemorySnapshot]:
+
+    def stop(self) -> list[MemorySnapshot]:
         """Stop memory profiling and return the snapshots."""
         if not self._running:
             return self.snapshots
-            
+
         self._take_snapshot()  # Take one final snapshot
-        
+
         if self.use_tracemalloc:
             tracemalloc.stop()
-            
+
         self._running = False
         return self.snapshots
-    
+
     def _take_snapshot(self):
         """Take a snapshot of the current memory usage."""
         mem_info = self.process.memory_info()
-        
+
         snapshot = MemorySnapshot(
             timestamp=time.time() - self.start_time,
             rss_mb=mem_info.rss / (1024 * 1024),
             vms_mb=mem_info.vms / (1024 * 1024),
         )
-        
+
         if self.use_tracemalloc:
             current, peak = tracemalloc.get_traced_memory()
             snapshot.tracemalloc_mb = current / (1024 * 1024)
-            
+
         self.snapshots.append(snapshot)
-    
-    def get_peak_memory(self) -> Tuple[float, float]:
+
+    def get_peak_memory(self) -> tuple[float, float]:
         """Get the peak RSS and VMS memory usage in MB."""
         if not self.snapshots:
             return 0.0, 0.0
-            
+
         peak_rss = max(s.rss_mb for s in self.snapshots)
         peak_vms = max(s.vms_mb for s in self.snapshots)
         return peak_rss, peak_vms
-    
-    def get_tracemalloc_stats(self, top_n: int = 10) -> List:
+
+    def get_tracemalloc_stats(self, top_n: int = 10) -> list:
         """Get the top memory allocations from tracemalloc."""
         if not self.use_tracemalloc or not tracemalloc.is_tracing():
             return []
-            
+
         snapshot = tracemalloc.take_snapshot()
-        stats = snapshot.statistics('lineno')
+        stats = snapshot.statistics("lineno")
         return stats[:top_n]
-    
+
     def save_report(self, output_dir: Union[str, Path], command_name: str):
         """Save a memory profiling report to the specified directory."""
         output_dir = Path(output_dir)
         output_dir.mkdir(parents=True, exist_ok=True)
-        
+
         # Save raw data as JSON
         data_file = output_dir / f"{command_name}_memory_profile.json"
-        with open(data_file, 'w') as f:
+        with open(data_file, "w") as f:
             json.dump([s.to_dict() for s in self.snapshots], f, indent=2)
-        
+
         # Generate and save plot
         self._generate_plot(output_dir / f"{command_name}_memory_profile.png")
-        
+
         # Generate text report with tracemalloc stats if available
         report_file = output_dir / f"{command_name}_memory_report.txt"
-        with open(report_file, 'w') as f:
+        with open(report_file, "w") as f:
             peak_rss, peak_vms = self.get_peak_memory()
             f.write(f"Memory Profile for: {command_name}\n")
             f.write(f"{'=' * 50}\n")
             f.write(f"Duration: {self.snapshots[-1].timestamp:.2f} seconds\n")
             f.write(f"Peak RSS: {peak_rss:.2f} MB\n")
             f.write(f"Peak VMS: {peak_vms:.2f} MB\n\n")
-            
+
             if self.use_tracemalloc:
                 f.write("Top Memory Allocations:\n")
                 f.write(f"{'-' * 50}\n")
                 for stat in self.get_tracemalloc_stats(top_n=20):
                     f.write(f"{stat.size / (1024 * 1024):.2f} MB: {stat.traceback.format()[0]}\n")
-        
+
         return output_dir
-    
+
     def _generate_plot(self, output_file: Path):
         """Generate a plot of memory usage over time."""
         if not self.snapshots:
             return
-            
+
         timestamps = [s.timestamp for s in self.snapshots]
         rss_values = [s.rss_mb for s in self.snapshots]
         vms_values = [s.vms_mb for s in self.snapshots]
-        
+
         plt.figure(figsize=(10, 6))
-        plt.plot(timestamps, rss_values, label='RSS (MB)', linewidth=2)
-        plt.plot(timestamps, vms_values, label='VMS (MB)', linewidth=2)
-        
+        plt.plot(timestamps, rss_values, label="RSS (MB)", linewidth=2)
+        plt.plot(timestamps, vms_values, label="VMS (MB)", linewidth=2)
+
         if self.use_tracemalloc:
             tracemalloc_values = [s.tracemalloc_mb for s in self.snapshots if s.tracemalloc_mb is not None]
             if tracemalloc_values:
-                tracemalloc_timestamps = timestamps[:len(tracemalloc_values)]
-                plt.plot(tracemalloc_timestamps, tracemalloc_values, label='Tracemalloc (MB)', linewidth=2, linestyle='--')
-        
-        plt.xlabel('Time (seconds)')
-        plt.ylabel('Memory Usage (MB)')
-        plt.title('Memory Usage Over Time')
-        plt.grid(True, linestyle='--', alpha=0.7)
+                tracemalloc_timestamps = timestamps[: len(tracemalloc_values)]
+                plt.plot(tracemalloc_timestamps, tracemalloc_values, label="Tracemalloc (MB)", linewidth=2, linestyle="--")
+
+        plt.xlabel("Time (seconds)")
+        plt.ylabel("Memory Usage (MB)")
+        plt.title("Memory Usage Over Time")
+        plt.grid(True, linestyle="--", alpha=0.7)
         plt.legend()
-        
+
         plt.tight_layout()
         plt.savefig(output_file)
         plt.close()
 
-def profile_memory(func=None, *, interval: float = 0.1, use_tracemalloc: bool = True, 
-                  output_dir: Optional[Union[str, Path]] = None):
-    """
-    Decorator to profile memory usage of a function.
-    
+
+def profile_memory(func=None, *, interval: float = 0.1, use_tracemalloc: bool = True, output_dir: Optional[Union[str, Path]] = None):
+    """Decorator to profile memory usage of a function.
+
     Args:
         func: The function to profile.
         interval: The interval in seconds between memory snapshots.
         use_tracemalloc: Whether to use tracemalloc for detailed memory tracking.
         output_dir: Directory to save the memory profile report. If None, a temporary directory is used.
     """
+
     def decorator(func):
         @functools.wraps(func)
         def wrapper(*args, **kwargs):
             profiler = MemoryProfiler(interval=interval, use_tracemalloc=use_tracemalloc)
             profiler.start()
-            
+
             try:
                 result = func(*args, **kwargs)
                 return result
             finally:
                 profiler.stop()
-                
+
                 # Determine output directory
                 out_dir = output_dir
                 if out_dir is None:
                     out_dir = Path(tempfile.gettempdir()) / "codegen_memory_profiles"
-                
+
                 # Save report
                 func_name = func.__name__
                 report_dir = profiler.save_report(out_dir, func_name)
-                
+
                 console.print(f"\n[bold green]Memory profile saved to:[/bold green] {report_dir}")
-                
+
                 # Print summary
                 peak_rss, peak_vms = profiler.get_peak_memory()
                 table = Table(title="Memory Usage Summary")
                 table.add_column("Metric", style="cyan")
                 table.add_column("Value", style="green")
-                
+
                 table.add_row("Peak RSS", f"{peak_rss:.2f} MB")
                 table.add_row("Peak VMS", f"{peak_vms:.2f} MB")
                 table.add_row("Duration", f"{profiler.snapshots[-1].timestamp:.2f} seconds")
-                
+
                 console.print(table)
-        
+
         return wrapper
-    
+
     if func is None:
         return decorator
     return decorator(func)
 
-def profile_command(cmd_args: List[str], output_dir: Optional[Union[str, Path]] = None) -> Path:
-    """
-    Profile memory usage of a command.
-    
+
+def profile_command(cmd_args: list[str], output_dir: Optional[Union[str, Path]] = None) -> Path:
+    """Profile memory usage of a command.
+
     Args:
         cmd_args: The command arguments to profile.
         output_dir: Directory to save the memory profile report. If None, a temporary directory is used.
-        
+
     Returns:
         Path to the output directory containing the profile report.
     """
@@ -238,16 +236,16 @@ def profile_command(cmd_args: List[str], output_dir: Optional[Union[str, Path]]
         output_dir = Path(tempfile.gettempdir()) / "codegen_memory_profiles"
     output_dir = Path(output_dir)
     output_dir.mkdir(parents=True, exist_ok=True)
-    
+
     # Create a unique filename for this run
     timestamp = int(time.time())
     cmd_name = "_".join(cmd_args).replace("/", "_")[:50]  # Limit length and remove problematic chars
     output_file = output_dir / f"{cmd_name}_{timestamp}_memory.json"
-    
+
     # Run the command with memory profiling
     env = os.environ.copy()
     env["PYTHONPATH"] = f"{os.getcwd()}:{env.get('PYTHONPATH', '')}"
-    
+
     # Prepare the profiling script
     script = f"""
 import sys
@@ -271,14 +269,14 @@ def profile_command(cmd_args: List[str], output_dir: Optional[Union[str, Path]]
     while True:
         mem_info = process.memory_info()
         current, peak = tracemalloc.get_traced_memory()
-        
+
         snapshots.append({{
             "timestamp": time.time() - start_time,
             "rss_mb": mem_info.rss / (1024 * 1024),
             "vms_mb": mem_info.vms / (1024 * 1024),
             "tracemalloc_mb": current / (1024 * 1024)
         }})
-        
+
         time.sleep(interval)
 except KeyboardInterrupt:
     pass
@@ -286,86 +284,83 @@ def profile_command(cmd_args: List[str], output_dir: Optional[Union[str, Path]]
     # Save the snapshots
     with open(output_file, 'w') as f:
         json.dump(snapshots, f, indent=2)
-    
+
     # Print summary
     if snapshots:
         peak_rss = max(s["rss_mb"] for s in snapshots)
         peak_vms = max(s["vms_mb"] for s in snapshots)
         duration = snapshots[-1]["timestamp"]
-        
+
         print(f"\\nMemory Profile Summary:")
         print(f"Peak RSS: {{peak_rss:.2f}} MB")
         print(f"Peak VMS: {{peak_vms:.2f}} MB")
         print(f"Duration: {{duration:.2f}} seconds")
         print(f"Profile saved to: {{output_file}}")
     """
-    
+
     script_file = output_dir / f"memory_profiler_{timestamp}.py"
-    with open(script_file, 'w') as f:
+    with open(script_file, "w") as f:
         f.write(script)
-    
+
     # Start the profiler in a separate process
-    profiler_process = subprocess.Popen([sys.executable, str(script_file)], 
-                                        env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    
+    profiler_process = subprocess.Popen([sys.executable, str(script_file)], env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
     # Run the command
-    cmd_process = subprocess.Popen([sys.executable, "-m", "codegen.cli.cli"] + cmd_args, 
-                                   env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    
+    cmd_process = subprocess.Popen([sys.executable, "-m", "codegen.cli.cli", *cmd_args], env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
     # Wait for the command to finish
     stdout, stderr = cmd_process.communicate()
-    
+
     # Stop the profiler
     profiler_process.terminate()
     profiler_stdout, profiler_stderr = profiler_process.communicate()
-    
+
     # Print command output
     console.print("[bold]Command Output:[/bold]")
     console.print(stdout.decode())
     if stderr:
         console.print("[bold red]Command Errors:[/bold red]")
         console.print(stderr.decode())
-    
+
     # Print profiler output
     if profiler_stdout:
         console.print(profiler_stdout.decode())
-    
+
     # Generate visualization if the profile data exists
     if output_file.exists():
         try:
-            with open(output_file, 'r') as f:
+            with open(output_file) as f:
                 data = json.load(f)
-            
+
             if data:
                 # Generate plot
                 plot_file = output_dir / f"{cmd_name}_{timestamp}_memory.png"
-                
+
                 timestamps = [s["timestamp"] for s in data]
                 rss_values = [s["rss_mb"] for s in data]
                 vms_values = [s["vms_mb"] for s in data]
                 tracemalloc_values = [s["tracemalloc_mb"] for s in data if "tracemalloc_mb" in s]
-                
+
                 plt.figure(figsize=(10, 6))
-                plt.plot(timestamps, rss_values, label='RSS (MB)', linewidth=2)
-                plt.plot(timestamps, vms_values, label='VMS (MB)', linewidth=2)
-                
+                plt.plot(timestamps, rss_values, label="RSS (MB)", linewidth=2)
+                plt.plot(timestamps, vms_values, label="VMS (MB)", linewidth=2)
+
                 if tracemalloc_values:
-                    tracemalloc_timestamps = timestamps[:len(tracemalloc_values)]
-                    plt.plot(tracemalloc_timestamps, tracemalloc_values, 
-                             label='Tracemalloc (MB)', linewidth=2, linestyle='--')
-                
-                plt.xlabel('Time (seconds)')
-                plt.ylabel('Memory Usage (MB)')
-                plt.title(f'Memory Usage: {" ".join(cmd_args)}')
-                plt.grid(True, linestyle='--', alpha=0.7)
+                    tracemalloc_timestamps = timestamps[: len(tracemalloc_values)]
+                    plt.plot(tracemalloc_timestamps, tracemalloc_values, label="Tracemalloc (MB)", linewidth=2, linestyle="--")
+
+                plt.xlabel("Time (seconds)")
+                plt.ylabel("Memory Usage (MB)")
+                plt.title(f"Memory Usage: {' '.join(cmd_args)}")
+                plt.grid(True, linestyle="--", alpha=0.7)
                 plt.legend()
-                
+
                 plt.tight_layout()
                 plt.savefig(plot_file)
                 plt.close()
-                
+
                 console.print(f"[bold green]Memory profile visualization saved to:[/bold green] {plot_file}")
         except Exception as e:
             console.print(f"[bold red]Error generating visualization:[/bold red] {e}")
-    
+
     return output_dir