codegen-sh · devin-ai-integration · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025
diff --git a/.codegen/codemods/dict_to_schema/dict_to_schema.py b/.codegen/codemods/dict_to_schema/dict_to_schema.py
@@ -0,0 +1,203 @@
+import codegen
+from codegen import Codebase
+import sys
+
+
+def infer_type(value) -> str:
+    """Infer type hint from a value."""
+    if isinstance(value, bool):
+        return "bool"
+    elif isinstance(value, int):
+        return "int"
+    elif isinstance(value, float):
+        return "float"
+    elif isinstance(value, str):
+        return "str"
+    elif isinstance(value, list):
+        return "List[Any]"
+    elif isinstance(value, dict):
+        return "Dict[str, Any]"
+    return "Any"
+
+
+def print_progress(current: int, total: int, width: int = 40) -> None:
+    filled = int(width * current / total)
+    bar = "█" * filled + "░" * (width - filled)
+    percent = int(100 * current / total)
+    print(f"\r[{bar}] {percent}% ({current}/{total})", end="", file=sys.stderr)
+    if current == total:
+        print(file=sys.stderr)
+
+
+@codegen.function('dict-to-schema')
+def run(codebase: Codebase):
+    """Convert dictionary literals to dataclasses with proper type hints."""
+    files_modified = 0
+    models_created = 0
+
+    # Process all Python files in the codebase
+    total_files = len([f for f in codebase.files if str(f.path).endswith('.py')])
+    print("\n\033[1;36m📁 Scanning files for dictionary literals...\033[0m")
+    print(f"Found {total_files} Python files to process")
+
+    def process_dict_assignment(source: str, name: str) -> tuple[str, str]:
+        """Process dictionary assignment and return model definition and initialization."""
+        dict_str = source.split("=", 1)[1].strip()
+        if not dict_str.startswith("{") or not dict_str.endswith("}"):
+            return None, None
+
+        dict_items = parse_dict_str(dict_str)
+        if not dict_items:
+            return None, None
+
+        class_name = name.title()
+        fields = []
+        for key, value, comment in dict_items:
+            type_hint = infer_type_from_value(value)
+            field = f"    {key}: {type_hint} | None = None"
+            if comment:
+                field += f"  # {comment}"
+            fields.append(field)
+
+        model_def = f"@dataclass\nclass {class_name}:\n" + "\n".join(fields)
+        init_code = f"{name} = {class_name}(**{dict_str})"
+        return model_def, init_code
+
+    for i, file in enumerate([f for f in codebase.files if str(f.path).endswith('.py')], 1):
+        needs_imports = False
+        file_modified = False
+
+        print_progress(i, total_files)
+        print(f"\n\033[1;34m🔍 Processing: {file.path}\033[0m")
+
+        for global_var in file.global_vars:
+            try:
+                def parse_dict_str(dict_str: str) -> list:
+                    """Parse dictionary string into list of (key, value, comment) tuples."""
+                    items = []
+                    lines = dict_str.strip("{}").split("\n")
+                    for line in lines:
+                        line = line.strip()
+                        if not line or line.startswith("#"):
+                            continue
+
+                        # Split line into key-value and comment
+                        parts = line.split("#", 1)
+                        kv_part = parts[0].strip().rstrip(",")
+                        comment = parts[1].strip() if len(parts) > 1 else None
+
+                        if ":" not in kv_part:
+                            continue
+
+                        key, value = kv_part.split(":", 1)
+                        key = key.strip().strip('"\'')
+                        value = value.strip()
+                        items.append((key, value, comment))
+                    return items
+
+                def infer_type_from_value(value: str) -> str:
+                    """Infer type hint from a string value."""
+                    value = value.strip()
+                    if value.startswith('"') or value.startswith("'"):
+                        return "str"
+                    elif value in ("True", "False"):
+                        return "bool"
+                    elif "." in value and value.replace(".", "").isdigit():
+                        return "float"
+                    elif value.isdigit():
+                        return "int"
+                    return "Any"
+
+                if "{" in global_var.source and "}" in global_var.source:
+                    model_def, init_code = process_dict_assignment(global_var.source, global_var.name)
+                    if not model_def:
+                        continue
+
+                    print("\n" + "═" * 60)
+                    print(f"\033[1;32m🔄 Converting global variable '{global_var.name}' to schema\033[0m")
+                    print("─" * 60)
+                    print("\033[1;34m📝 Original code:\033[0m")
+                    print(f"    {global_var.source}")
+                    print("\n\033[1;35m✨ Generated schema:\033[0m")
+                    print("    " + model_def.replace("\n", "\n    "))
+                    print("\n\033[1;32m✅ Updated code:\033[0m")
+                    print(f"    {init_code}")
+                    print("═" * 60)
+
+                    global_var.file.add_symbol_from_source(model_def + "\n")
+                    global_var.edit(init_code)
+                    needs_imports = True
+                    models_created += 1
+                    file_modified = True
+                elif "[" in global_var.source and "]" in global_var.source and "{" in global_var.source:
+                    list_str = global_var.source.split("=", 1)[1].strip()
+                    if not list_str.startswith("[") or not list_str.endswith("]"):
+                        continue
+
+                    dict_start = list_str.find("{")
+                    dict_end = list_str.find("}")
+                    if dict_start == -1 or dict_end == -1:
+                        continue
+
+                    dict_str = list_str[dict_start:dict_end + 1]
+                    model_def, _ = process_dict_assignment(f"temp = {dict_str}", global_var.name.rstrip('s'))
+                    if not model_def:
+                        continue
+
+                    list_init = f"[{global_var.name.rstrip('s').title()}(**item) for item in {list_str}]"
+
+                    print("\n" + "═" * 60)
+                    print(f"\033[1;32m🔄 Converting list items in '{global_var.name}' to schema\033[0m")
+                    print("─" * 60)
+                    print("\033[1;34m📝 Original code:\033[0m")
+                    print(f"    {global_var.source}")
+                    print("\n\033[1;35m✨ Generated schema:\033[0m")
+                    print("    " + model_def.replace("\n", "\n    "))
+                    print("\n\033[1;32m✅ Updated code:\033[0m")
+                    print(f"    {global_var.name} = {list_init}")
+                    print("═" * 60)
+
+                    global_var.file.add_symbol_from_source(model_def + "\n")
+                    global_var.edit(list_init)
+                    needs_imports = True
+                    models_created += 1
+                    file_modified = True
+            except Exception as e:
+                print(f"\n❌ Error processing global variable '{global_var.name}':")
+                print(f"   {str(e)}")
+                print("   Skipping this variable and continuing...\n")
+
+        if needs_imports:
+            print(f"   ➕ Adding dataclass imports to {file.path}")
+            file.add_import_from_import_string("from dataclasses import dataclass")
+            file.add_import_from_import_string("from typing import Any, Dict, List, Optional")
+
+        # Process class attributes
+        for cls in file.classes:
+            for attr in cls.attributes:
+                try:
+                    if "{" in attr.source and "}" in attr.source:
+                        model_def, init_code = process_dict_assignment(attr.source, attr.name)
+                        if not model_def:
+                            continue
+
+                        cls.insert_before(model_def + "\n")
+                        attr.edit(init_code.split("=", 1)[1].strip())
+                        needs_imports = True
+                        models_created += 1
+                        file_modified = True
+                except Exception as e:
+                    print(f"\n❌ Error processing class attribute '{attr.name}':")
+                    print(f"   {str(e)}")
+                    print("   Skipping this attribute and continuing...\n")
+
+        if file_modified:
+            print(f"   ✅ Successfully modified {file.path}")
+            files_modified += 1
+
+    print("\n" + "═" * 60)
+    print("\033[1;35m📊 Summary of Changes\033[0m")
+    print("═" * 60)
+    print(f"\033[1;32m✨ Files modified: {files_modified}\033[0m")
+    print(f"\033[1;32m🔄 Schemas created: {models_created}\033[0m")
+    print("═" * 60)
diff --git a/examples/dict_to_schema/README.md b/examples/dict_to_schema/README.md
@@ -1,96 +1,85 @@
 # Dict to Schema
 
-This example demonstrates how to automatically convert Python dictionary literals into Pydantic models. The codemod makes this process simple by handling all the tedious manual updates automatically.
-
-> [!NOTE]
-> View example transformations created by this codemod on the `modal-labs/modal-client` repository [here](https://www.codegen.sh/codemod/6b5f2dfa-948a-4953-b283-9bd4b8545632/public/diff).
+This example demonstrates how to automatically convert Python dictionary literals into dataclasses with proper type hints. The codemod makes this process simple by handling all the tedious manual updates automatically.
 
 ## How the Conversion Script Works
 
 The script (`run.py`) automates the entire conversion process in a few key steps:
 
-1. **Codebase Loading**
-   ```python
-   codebase = Codebase.from_repo("modal-labs/modal-client")
-   ```
-   - Loads your codebase into Codegen's intelligent code analysis engine
-   - Provides a simple SDK for making codebase-wide changes
-   - Supports any Git repository as input
-
-2. **Dictionary Detection**
-   ```python
-   if "{" in global_var.source and "}" in global_var.source:
-       dict_content = global_var.value.source.strip("{}")
-   ```
+1. **Dictionary Detection**
    - Automatically identifies dictionary literals in your code
    - Processes both global variables and class attributes
    - Skips empty dictionaries to avoid unnecessary conversions
 
-3. **Schema Creation**
-   ```python
-   class_name = global_var.name.title() + "Schema"
-   model_def = f"""class {class_name}(BaseModel):
-       {dict_content.replace(",", "\n    ")}"""
-   ```
+2. **Schema Creation**
    - Generates meaningful model names based on variable names
    - Converts dictionary key-value pairs to class attributes
    - Maintains proper Python indentation
 
-4. **Code Updates**
-   ```python
-   global_var.insert_before(model_def + "\n\n")
-   global_var.set_value(f"{class_name}(**{global_var.value.source})")
-   ```
-   - Inserts new Pydantic models in appropriate locations
-   - Updates dictionary assignments to use the new models
-   - Automatically adds required Pydantic imports
-
+3. **Code Updates**
+   - Inserts new dataclass definitions in appropriate locations
+   - Updates dictionary assignments to use the new dataclasses
+   - Automatically adds required imports for dataclasses and typing
 
-## Common Conversion Patterns
+## Example Transformations
 
 ### Global Variables
 ```python
 # Before
-config = {"host": "localhost", "port": 8080}
+app_config = {"host": "localhost", "port": 8080}
+
+# After
+@dataclass
+class AppConfig:
+    host: str | None = None
+    port: int | None = None
+
+app_config = AppConfig(host="localhost", port=8080)
+
+# List Example
+books = [
+    {"id": 1, "title": "Book One", "author": "Author A"},
+    {"id": 2, "title": "Book Two", "author": "Author B"}
+]
 
 # After
-class ConfigSchema(BaseModel):
-    host: str = "localhost"
-    port: int = 8080
+@dataclass
+class Book:
+    id: int | None = None
+    title: str | None = None
+    author: str | None = None
 
-config = ConfigSchema(**{"host": "localhost", "port": 8080})
+books = [Book(**item) for item in books]
 ```
 
 ### Class Attributes
 ```python
 # Before
 class Service:
-    defaults = {"timeout": 30, "retries": 3}
+    defaults = {"timeout": 30, "retries": 3, "backoff": 1.5}
 
 # After
-class DefaultsSchema(BaseModel):
-    timeout: int = 30
-    retries: int = 3
+@dataclass
+class Defaults:
+    timeout: int | None = None
+    retries: int | None = None
+    backoff: float | None = None
 
 class Service:
-    defaults = DefaultsSchema(**{"timeout": 30, "retries": 3})
+    defaults = Defaults(timeout=30, retries=3, backoff=1.5)
 ```
 
 ## Running the Conversion
 
 ```bash
-# Install Codegen
-pip install codegen
+# Initialize Codegen in your project
+codegen init
 
-# Run the conversion
-python run.py
+# Run the codemod
+codegen run dict_to_schema
 ```
 
 ## Learn More
 
-- [Pydantic Documentation](https://docs.pydantic.dev/)
+- [Python Dataclasses Documentation](https://docs.python.org/3/library/dataclasses.html)
 - [Codegen Documentation](https://docs.codegen.com)
-
-## Contributing
-
-Feel free to submit issues and enhancement requests!