Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 203 additions & 0 deletions .codegen/codemods/dict_to_schema/dict_to_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
import codegen
from codegen import Codebase
import sys


def infer_type(value) -> str:
"""Infer type hint from a value."""
if isinstance(value, bool):
return "bool"
elif isinstance(value, int):
return "int"
elif isinstance(value, float):
return "float"
elif isinstance(value, str):
return "str"
elif isinstance(value, list):
return "List[Any]"
elif isinstance(value, dict):
return "Dict[str, Any]"
return "Any"


def print_progress(current: int, total: int, width: int = 40) -> None:
filled = int(width * current / total)
bar = "█" * filled + "░" * (width - filled)
percent = int(100 * current / total)
print(f"\r[{bar}] {percent}% ({current}/{total})", end="", file=sys.stderr)
if current == total:
print(file=sys.stderr)


@codegen.function('dict-to-schema')
def run(codebase: Codebase):
"""Convert dictionary literals to dataclasses with proper type hints."""
files_modified = 0
models_created = 0

# Process all Python files in the codebase
total_files = len([f for f in codebase.files if str(f.path).endswith('.py')])
print("\n\033[1;36m📁 Scanning files for dictionary literals...\033[0m")
print(f"Found {total_files} Python files to process")

def process_dict_assignment(source: str, name: str) -> tuple[str, str]:
"""Process dictionary assignment and return model definition and initialization."""
dict_str = source.split("=", 1)[1].strip()
if not dict_str.startswith("{") or not dict_str.endswith("}"):
return None, None

dict_items = parse_dict_str(dict_str)
if not dict_items:
return None, None

class_name = name.title()
fields = []
for key, value, comment in dict_items:
type_hint = infer_type_from_value(value)
field = f" {key}: {type_hint} | None = None"
if comment:
field += f" # {comment}"
fields.append(field)

model_def = f"@dataclass\nclass {class_name}:\n" + "\n".join(fields)
init_code = f"{name} = {class_name}(**{dict_str})"
return model_def, init_code

for i, file in enumerate([f for f in codebase.files if str(f.path).endswith('.py')], 1):
needs_imports = False
file_modified = False

print_progress(i, total_files)
print(f"\n\033[1;34m🔍 Processing: {file.path}\033[0m")

for global_var in file.global_vars:
try:
def parse_dict_str(dict_str: str) -> list:
"""Parse dictionary string into list of (key, value, comment) tuples."""
items = []
lines = dict_str.strip("{}").split("\n")
for line in lines:
line = line.strip()
if not line or line.startswith("#"):
continue

# Split line into key-value and comment
parts = line.split("#", 1)
kv_part = parts[0].strip().rstrip(",")
comment = parts[1].strip() if len(parts) > 1 else None

if ":" not in kv_part:
continue

key, value = kv_part.split(":", 1)
key = key.strip().strip('"\'')
value = value.strip()
items.append((key, value, comment))
return items

def infer_type_from_value(value: str) -> str:
"""Infer type hint from a string value."""
value = value.strip()
if value.startswith('"') or value.startswith("'"):
return "str"
elif value in ("True", "False"):
return "bool"
elif "." in value and value.replace(".", "").isdigit():
return "float"
elif value.isdigit():
return "int"
return "Any"

if "{" in global_var.source and "}" in global_var.source:
model_def, init_code = process_dict_assignment(global_var.source, global_var.name)
if not model_def:
continue

print("\n" + "═" * 60)
print(f"\033[1;32m🔄 Converting global variable '{global_var.name}' to schema\033[0m")
print("─" * 60)
print("\033[1;34m📝 Original code:\033[0m")
print(f" {global_var.source}")
print("\n\033[1;35m✨ Generated schema:\033[0m")
print(" " + model_def.replace("\n", "\n "))
print("\n\033[1;32m✅ Updated code:\033[0m")
print(f" {init_code}")
print("═" * 60)

global_var.file.add_symbol_from_source(model_def + "\n")
global_var.edit(init_code)
needs_imports = True
models_created += 1
file_modified = True
elif "[" in global_var.source and "]" in global_var.source and "{" in global_var.source:
list_str = global_var.source.split("=", 1)[1].strip()
if not list_str.startswith("[") or not list_str.endswith("]"):
continue

dict_start = list_str.find("{")
dict_end = list_str.find("}")
if dict_start == -1 or dict_end == -1:
continue

dict_str = list_str[dict_start:dict_end + 1]
model_def, _ = process_dict_assignment(f"temp = {dict_str}", global_var.name.rstrip('s'))
if not model_def:
continue

list_init = f"[{global_var.name.rstrip('s').title()}(**item) for item in {list_str}]"

print("\n" + "═" * 60)
print(f"\033[1;32m🔄 Converting list items in '{global_var.name}' to schema\033[0m")
print("─" * 60)
print("\033[1;34m📝 Original code:\033[0m")
print(f" {global_var.source}")
print("\n\033[1;35m✨ Generated schema:\033[0m")
print(" " + model_def.replace("\n", "\n "))
print("\n\033[1;32m✅ Updated code:\033[0m")
print(f" {global_var.name} = {list_init}")
print("═" * 60)

global_var.file.add_symbol_from_source(model_def + "\n")
global_var.edit(list_init)
needs_imports = True
models_created += 1
file_modified = True
except Exception as e:
print(f"\n❌ Error processing global variable '{global_var.name}':")
print(f" {str(e)}")
print(" Skipping this variable and continuing...\n")

if needs_imports:
print(f" ➕ Adding dataclass imports to {file.path}")
file.add_import_from_import_string("from dataclasses import dataclass")
file.add_import_from_import_string("from typing import Any, Dict, List, Optional")

# Process class attributes
for cls in file.classes:
for attr in cls.attributes:
try:
if "{" in attr.source and "}" in attr.source:
model_def, init_code = process_dict_assignment(attr.source, attr.name)
if not model_def:
continue

cls.insert_before(model_def + "\n")
attr.edit(init_code.split("=", 1)[1].strip())
needs_imports = True
models_created += 1
file_modified = True
except Exception as e:
print(f"\n❌ Error processing class attribute '{attr.name}':")
print(f" {str(e)}")
print(" Skipping this attribute and continuing...\n")

if file_modified:
print(f" ✅ Successfully modified {file.path}")
files_modified += 1

print("\n" + "═" * 60)
print("\033[1;35m📊 Summary of Changes\033[0m")
print("═" * 60)
print(f"\033[1;32m✨ Files modified: {files_modified}\033[0m")
print(f"\033[1;32m🔄 Schemas created: {models_created}\033[0m")
print("═" * 60)
93 changes: 41 additions & 52 deletions examples/dict_to_schema/README.md
Original file line number Diff line number Diff line change
@@ -1,96 +1,85 @@
# Dict to Schema

This example demonstrates how to automatically convert Python dictionary literals into Pydantic models. The codemod makes this process simple by handling all the tedious manual updates automatically.

> [!NOTE]
> View example transformations created by this codemod on the `modal-labs/modal-client` repository [here](https://www.codegen.sh/codemod/6b5f2dfa-948a-4953-b283-9bd4b8545632/public/diff).
This example demonstrates how to automatically convert Python dictionary literals into dataclasses with proper type hints. The codemod makes this process simple by handling all the tedious manual updates automatically.

## How the Conversion Script Works

The script (`run.py`) automates the entire conversion process in a few key steps:

1. **Codebase Loading**
```python
codebase = Codebase.from_repo("modal-labs/modal-client")
```
- Loads your codebase into Codegen's intelligent code analysis engine
- Provides a simple SDK for making codebase-wide changes
- Supports any Git repository as input

2. **Dictionary Detection**
```python
if "{" in global_var.source and "}" in global_var.source:
dict_content = global_var.value.source.strip("{}")
```
1. **Dictionary Detection**
- Automatically identifies dictionary literals in your code
- Processes both global variables and class attributes
- Skips empty dictionaries to avoid unnecessary conversions

3. **Schema Creation**
```python
class_name = global_var.name.title() + "Schema"
model_def = f"""class {class_name}(BaseModel):
{dict_content.replace(",", "\n ")}"""
```
2. **Schema Creation**
- Generates meaningful model names based on variable names
- Converts dictionary key-value pairs to class attributes
- Maintains proper Python indentation

4. **Code Updates**
```python
global_var.insert_before(model_def + "\n\n")
global_var.set_value(f"{class_name}(**{global_var.value.source})")
```
- Inserts new Pydantic models in appropriate locations
- Updates dictionary assignments to use the new models
- Automatically adds required Pydantic imports

3. **Code Updates**
- Inserts new dataclass definitions in appropriate locations
- Updates dictionary assignments to use the new dataclasses
- Automatically adds required imports for dataclasses and typing

## Common Conversion Patterns
## Example Transformations

### Global Variables
```python
# Before
config = {"host": "localhost", "port": 8080}
app_config = {"host": "localhost", "port": 8080}

# After
@dataclass
class AppConfig:
host: str | None = None
port: int | None = None

app_config = AppConfig(host="localhost", port=8080)

# List Example
books = [
{"id": 1, "title": "Book One", "author": "Author A"},
{"id": 2, "title": "Book Two", "author": "Author B"}
]

# After
class ConfigSchema(BaseModel):
host: str = "localhost"
port: int = 8080
@dataclass
class Book:
id: int | None = None
title: str | None = None
author: str | None = None

config = ConfigSchema(**{"host": "localhost", "port": 8080})
books = [Book(**item) for item in books]
```

### Class Attributes
```python
# Before
class Service:
defaults = {"timeout": 30, "retries": 3}
defaults = {"timeout": 30, "retries": 3, "backoff": 1.5}

# After
class DefaultsSchema(BaseModel):
timeout: int = 30
retries: int = 3
@dataclass
class Defaults:
timeout: int | None = None
retries: int | None = None
backoff: float | None = None

class Service:
defaults = DefaultsSchema(**{"timeout": 30, "retries": 3})
defaults = Defaults(timeout=30, retries=3, backoff=1.5)
```

## Running the Conversion

```bash
# Install Codegen
pip install codegen
# Initialize Codegen in your project
codegen init

# Run the conversion
python run.py
# Run the codemod
codegen run dict_to_schema
```

## Learn More

- [Pydantic Documentation](https://docs.pydantic.dev/)
- [Python Dataclasses Documentation](https://docs.python.org/3/library/dataclasses.html)
- [Codegen Documentation](https://docs.codegen.com)

## Contributing

Feel free to submit issues and enhancement requests!
Loading