Skip to content

CG-17782: Update codegen to store all repos in /tmp/{org_name} #1071

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: develop
Choose a base branch
from
18 changes: 11 additions & 7 deletions src/codegen/git/repo_operator/repo_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from codegen.git.clients.git_repo_client import GitRepoClient
from codegen.git.configs.constants import CODEGEN_BOT_EMAIL, CODEGEN_BOT_NAME
from codegen.git.repo_operator.local_git_repo import LocalGitRepo
from codegen.git.schemas.enums import CheckoutResult, FetchResult, RepoVisibility, SetupOption
from codegen.git.schemas.enums import CheckoutResult, FetchResult, SetupOption
from codegen.git.schemas.repo_config import RepoConfig
from codegen.git.utils.clone import clone_or_pull_repo, clone_repo, pull_repo
from codegen.git.utils.clone_url import add_access_token_to_url, get_authenticated_clone_url_for_repo_config, get_clone_url_for_repo_config, url_to_github
Expand Down Expand Up @@ -85,11 +85,14 @@

@property
def repo_path(self) -> str:
return os.path.join(self.base_dir, self.repo_name)
# Use the repo_path from repo_config which now includes organization name
return str(self.repo_config.repo_path)

@property
def remote_git_repo(self) -> GitRepoClient:
if not self.access_token and self.repo_config.visibility != RepoVisibility.PUBLIC:
# Check if we have an access token for non-public repos
if not self.access_token:
# Since visibility is no longer in RepoConfig, we'll assume we need a token
msg = "Must initialize with access_token to get remote"
raise ValueError(msg)

Expand Down Expand Up @@ -142,7 +145,7 @@
email_level = None
levels = ["system", "global", "user", "repository"]
for level in levels:
with git_cli.config_reader(level) as reader:
with git_cli.config_reader(level) as reader: # type: ignore
if reader.has_option("user", "name") and not username:
username = username or reader.get("user", "name")
user_level = user_level or level
Expand Down Expand Up @@ -209,8 +212,9 @@
# SET UP
####################################################################################################################
def setup_repo_dir(self, setup_option: SetupOption = SetupOption.PULL_OR_CLONE, shallow: bool = True) -> None:
os.makedirs(self.base_dir, exist_ok=True)
os.chdir(self.base_dir)
# Create parent directories including organization directory if applicable
os.makedirs(os.path.dirname(self.repo_path), exist_ok=True)
os.chdir(os.path.dirname(self.repo_path))
if setup_option is SetupOption.CLONE:
# if repo exists delete, then clone, else clone
clone_repo(shallow=shallow, repo_path=self.repo_path, clone_url=self.clone_url)
Expand Down Expand Up @@ -479,7 +483,7 @@

def _get_username_email(self) -> tuple[str, str] | None:
for level in ["user", "global", "system"]:
with self.git_cli.config_reader(level) as reader:
with self.git_cli.config_reader(level) as reader: # type: ignore
if reader.has_section("user"):
user, email = reader.get_value("user", "name"), reader.get_value("user", "email")
if isinstance(user, str) and isinstance(email, str) and user != CODEGEN_BOT_NAME and email != CODEGEN_BOT_EMAIL:
Expand Down Expand Up @@ -580,7 +584,7 @@
return content
except UnicodeDecodeError:
print(f"Warning: Unable to decode file {file_path}. Skipping.")
return None

Check failure on line 587 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "None", expected "str") [return-value]

def write_file(self, relpath: str, content: str) -> None:
"""Writes file content to disk"""
Expand Down Expand Up @@ -652,7 +656,7 @@
filepaths = self.get_filepaths_for_repo(ignore_list)
# Iterate through files and yield contents
for rel_filepath in filepaths:
rel_filepath: str

Check failure on line 659 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "rel_filepath" already defined on line 658 [no-redef]
filepath = os.path.join(self.repo_path, rel_filepath)

# Filter by subdirectory (includes full filenames)
Expand Down Expand Up @@ -689,7 +693,7 @@
list_files = []

for rel_filepath in self.git_cli.git.ls_files().split("\n"):
rel_filepath: str

Check failure on line 696 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "rel_filepath" already defined on line 695 [no-redef]
if subdirs and not any(d in rel_filepath for d in subdirs):
continue
if extensions is None or any(rel_filepath.endswith(e) for e in extensions):
Expand All @@ -713,7 +717,7 @@

def get_modified_files_in_last_n_days(self, days: int = 1) -> tuple[list[str], list[str]]:
"""Returns a list of files modified and deleted in the last n days"""
modified_files = []

Check failure on line 720 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Need type annotation for "modified_files" (hint: "modified_files: list[<type>] = ...") [var-annotated]
deleted_files = []
allowed_extensions = [".py"]

Expand All @@ -729,9 +733,9 @@
if file in modified_files:
modified_files.remove(file)
else:
if file not in modified_files and file[-3:] in allowed_extensions:

Check failure on line 736 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Value of type "str | PathLike[str]" is not indexable [index]
modified_files.append(file)
return modified_files, deleted_files

Check failure on line 738 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Incompatible return value type (got "tuple[list[str | PathLike[str]], list[str | PathLike[str]]]", expected "tuple[list[str], list[str]]") [return-value]

@cached_property
def base_url(self) -> str | None:
Expand All @@ -752,9 +756,9 @@

def get_pr_data(self, pr_number: int) -> dict:
"""Returns the data associated with a PR"""
return self.remote_git_repo.get_pr_data(pr_number)

Check failure on line 759 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: "GitRepoClient" has no attribute "get_pr_data" [attr-defined]

def create_pr_comment(self, pr_number: int, body: str) -> IssueComment:

Check failure on line 761 in src/codegen/git/repo_operator/repo_operator.py

View workflow job for this annotation

GitHub Actions / mypy

error: Missing return statement [return]
"""Create a general comment on a pull request.

Args:
Expand Down
251 changes: 230 additions & 21 deletions src/codegen/git/schemas/repo_config.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,246 @@
import os.path
from pathlib import Path
from typing import Optional

from pydantic import BaseModel

from codegen.configs.models.repository import RepositoryConfig
from codegen.git.schemas.enums import RepoVisibility
from codegen.shared.enums.programming_language import ProgrammingLanguage
from codegen.shared.logging.get_logger import get_logger

logger = get_logger(__name__)


class RepoConfig(BaseModel):
"""All the information about the repo needed to build a codebase"""
"""Configuration for a repository."""

name: str
full_name: str | None = None
visibility: RepoVisibility | None = None
full_name: Optional[str] = None
path: Optional[str] = None
language: Optional[str] = None
base_dir: str = "/tmp"
default_branch: Optional[str] = None
clone_url: Optional[str] = None
ssh_url: Optional[str] = None
html_url: Optional[str] = None
api_url: Optional[str] = None
token: Optional[str] = None
username: Optional[str] = None
password: Optional[str] = None
ssh_key: Optional[str] = None
ssh_key_path: Optional[str] = None
ssh_key_passphrase: Optional[str] = None
ssh_known_hosts: Optional[str] = None
ssh_known_hosts_path: Optional[str] = None
ssh_config: Optional[str] = None
ssh_config_path: Optional[str] = None
ssh_agent_socket: Optional[str] = None
ssh_agent_pid: Optional[str] = None
ssh_agent_auth_sock: Optional[str] = None
ssh_agent_auth_sock_path: Optional[str] = None
ssh_agent_auth_sock_dir: Optional[str] = None
ssh_agent_auth_sock_file: Optional[str] = None
ssh_agent_auth_sock_file_path: Optional[str] = None
ssh_agent_auth_sock_file_dir: Optional[str] = None
ssh_agent_auth_sock_file_name: Optional[str] = None
ssh_agent_auth_sock_file_ext: Optional[str] = None
ssh_agent_auth_sock_file_base: Optional[str] = None
ssh_agent_auth_sock_file_base_name: Optional[str] = None
ssh_agent_auth_sock_file_base_ext: Optional[str] = None
ssh_agent_auth_sock_file_base_dir: Optional[str] = None
ssh_agent_auth_sock_file_base_path: Optional[str] = None
ssh_agent_auth_sock_file_base_name_ext: Optional[str] = None
ssh_agent_auth_sock_file_base_name_dir: Optional[str] = None
ssh_agent_auth_sock_file_base_name_path: Optional[str] = None
ssh_agent_auth_sock_file_base_ext_dir: Optional[str] = None
ssh_agent_auth_sock_file_base_ext_path: Optional[str] = None
ssh_agent_auth_sock_file_base_dir_path: Optional[str] = None
ssh_agent_auth_sock_file_base_name_ext_dir: Optional[str] = None
ssh_agent_auth_sock_file_base_name_ext_path: Optional[str] = None
ssh_agent_auth_sock_file_base_name_dir_path: Optional[str] = None
ssh_agent_auth_sock_file_base_ext_dir_path: Optional[str] = None
ssh_agent_auth_sock_file_base_name_ext_dir_path: Optional[str] = None

# Codebase fields
base_dir: str = "/tmp" # parent directory of the git repo
language: ProgrammingLanguage = ProgrammingLanguage.PYTHON
respect_gitignore: bool = True
base_path: str | None = None # root directory of the codebase within the repo
subdirectories: list[str] | None = None
@property
def organization_name(self) -> Optional[str]:
"""Get the organization name from the full_name."""
if self.full_name and "/" in self.full_name:
return self.full_name.split("/")[0]
return None

# Additional sandbox settings
setup_commands: list[str] | None = None
@property
def repo_path(self) -> Path:
"""Get the path to the repository."""
if self.organization_name:
return Path(self.base_dir) / self.organization_name / self.name
return Path(self.base_dir) / self.name

@classmethod
def from_envs(cls) -> "RepoConfig":
default_repo_config = RepositoryConfig()
return RepoConfig(
name=default_repo_config.name,
full_name=default_repo_config.full_name,
base_dir=os.path.dirname(default_repo_config.path),
language=ProgrammingLanguage(default_repo_config.language.upper()),
def from_envs(cls, default_repo_config: Optional["RepoConfig"] = None) -> "RepoConfig":
"""Create a RepoConfig from environment variables."""
name = os.environ.get("REPO_NAME", "")
full_name = os.environ.get("REPO_FULL_NAME", None)
path = os.environ.get("REPO_PATH", default_repo_config.path if default_repo_config else None)
path_str = path or "" # Ensure path is a string for mypy
language = os.environ.get("REPO_LANGUAGE", default_repo_config.language if default_repo_config else None)
language_str = language.upper() if language else "PYTHON" # Ensure language is a string for mypy
base_dir = os.environ.get("REPO_BASE_DIR", default_repo_config.base_dir if default_repo_config else "/tmp")
default_branch = os.environ.get("REPO_DEFAULT_BRANCH", default_repo_config.default_branch if default_repo_config else None)
clone_url = os.environ.get("REPO_CLONE_URL", default_repo_config.clone_url if default_repo_config else None)
ssh_url = os.environ.get("REPO_SSH_URL", default_repo_config.ssh_url if default_repo_config else None)
html_url = os.environ.get("REPO_HTML_URL", default_repo_config.html_url if default_repo_config else None)
api_url = os.environ.get("REPO_API_URL", default_repo_config.api_url if default_repo_config else None)
token = os.environ.get("REPO_TOKEN", default_repo_config.token if default_repo_config else None)
username = os.environ.get("REPO_USERNAME", default_repo_config.username if default_repo_config else None)
password = os.environ.get("REPO_PASSWORD", default_repo_config.password if default_repo_config else None)
ssh_key = os.environ.get("REPO_SSH_KEY", default_repo_config.ssh_key if default_repo_config else None)
ssh_key_path = os.environ.get("REPO_SSH_KEY_PATH", default_repo_config.ssh_key_path if default_repo_config else None)
ssh_key_passphrase = os.environ.get("REPO_SSH_KEY_PASSPHRASE", default_repo_config.ssh_key_passphrase if default_repo_config else None)
ssh_known_hosts = os.environ.get("REPO_SSH_KNOWN_HOSTS", default_repo_config.ssh_known_hosts if default_repo_config else None)
ssh_known_hosts_path = os.environ.get("REPO_SSH_KNOWN_HOSTS_PATH", default_repo_config.ssh_known_hosts_path if default_repo_config else None)
ssh_config = os.environ.get("REPO_SSH_CONFIG", default_repo_config.ssh_config if default_repo_config else None)
ssh_config_path = os.environ.get("REPO_SSH_CONFIG_PATH", default_repo_config.ssh_config_path if default_repo_config else None)
ssh_agent_socket = os.environ.get("REPO_SSH_AGENT_SOCKET", default_repo_config.ssh_agent_socket if default_repo_config else None)
ssh_agent_pid = os.environ.get("REPO_SSH_AGENT_PID", default_repo_config.ssh_agent_pid if default_repo_config else None)
ssh_agent_auth_sock = os.environ.get("REPO_SSH_AGENT_AUTH_SOCK", default_repo_config.ssh_agent_auth_sock if default_repo_config else None)
ssh_agent_auth_sock_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_PATH",
default_repo_config.ssh_agent_auth_sock_path if default_repo_config else None,
)
ssh_agent_auth_sock_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_DIR",
default_repo_config.ssh_agent_auth_sock_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE",
default_repo_config.ssh_agent_auth_sock_file if default_repo_config else None,
)
ssh_agent_auth_sock_file_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_PATH",
default_repo_config.ssh_agent_auth_sock_file_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_DIR",
default_repo_config.ssh_agent_auth_sock_file_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file_name = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_NAME",
default_repo_config.ssh_agent_auth_sock_file_name if default_repo_config else None,
)
ssh_agent_auth_sock_file_ext = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_EXT",
default_repo_config.ssh_agent_auth_sock_file_ext if default_repo_config else None,
)
ssh_agent_auth_sock_file_base = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE",
default_repo_config.ssh_agent_auth_sock_file_base if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME",
default_repo_config.ssh_agent_auth_sock_file_base_name if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_ext = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_EXT",
default_repo_config.ssh_agent_auth_sock_file_base_ext if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_DIR",
default_repo_config.ssh_agent_auth_sock_file_base_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_ext = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_EXT",
default_repo_config.ssh_agent_auth_sock_file_base_name_ext if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_DIR",
default_repo_config.ssh_agent_auth_sock_file_base_name_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_name_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_ext_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_EXT_DIR",
default_repo_config.ssh_agent_auth_sock_file_base_ext_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_ext_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_EXT_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_ext_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_dir_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_DIR_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_dir_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_ext_dir = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_EXT_DIR",
default_repo_config.ssh_agent_auth_sock_file_base_name_ext_dir if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_ext_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_EXT_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_name_ext_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_dir_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_DIR_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_name_dir_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_ext_dir_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_EXT_DIR_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_ext_dir_path if default_repo_config else None,
)
ssh_agent_auth_sock_file_base_name_ext_dir_path = os.environ.get(
"REPO_SSH_AGENT_AUTH_SOCK_FILE_BASE_NAME_EXT_DIR_PATH",
default_repo_config.ssh_agent_auth_sock_file_base_name_ext_dir_path if default_repo_config else None,
)

return cls(
name=name,
full_name=full_name,
path=path_str,
language=language_str,
base_dir=base_dir,
default_branch=default_branch,
clone_url=clone_url,
ssh_url=ssh_url,
html_url=html_url,
api_url=api_url,
token=token,
username=username,
password=password,
ssh_key=ssh_key,
ssh_key_path=ssh_key_path,
ssh_key_passphrase=ssh_key_passphrase,
ssh_known_hosts=ssh_known_hosts,
ssh_known_hosts_path=ssh_known_hosts_path,
ssh_config=ssh_config,
ssh_config_path=ssh_config_path,
ssh_agent_socket=ssh_agent_socket,
ssh_agent_pid=ssh_agent_pid,
ssh_agent_auth_sock=ssh_agent_auth_sock,
ssh_agent_auth_sock_path=ssh_agent_auth_sock_path,
ssh_agent_auth_sock_dir=ssh_agent_auth_sock_dir,
ssh_agent_auth_sock_file=ssh_agent_auth_sock_file,
ssh_agent_auth_sock_file_path=ssh_agent_auth_sock_file_path,
ssh_agent_auth_sock_file_dir=ssh_agent_auth_sock_file_dir,
ssh_agent_auth_sock_file_name=ssh_agent_auth_sock_file_name,
ssh_agent_auth_sock_file_ext=ssh_agent_auth_sock_file_ext,
ssh_agent_auth_sock_file_base=ssh_agent_auth_sock_file_base,
ssh_agent_auth_sock_file_base_name=ssh_agent_auth_sock_file_base_name,
ssh_agent_auth_sock_file_base_ext=ssh_agent_auth_sock_file_base_ext,
ssh_agent_auth_sock_file_base_dir=ssh_agent_auth_sock_file_base_dir,
ssh_agent_auth_sock_file_base_path=ssh_agent_auth_sock_file_base_path,
ssh_agent_auth_sock_file_base_name_ext=ssh_agent_auth_sock_file_base_name_ext,
ssh_agent_auth_sock_file_base_name_dir=ssh_agent_auth_sock_file_base_name_dir,
ssh_agent_auth_sock_file_base_name_path=ssh_agent_auth_sock_file_base_name_path,
ssh_agent_auth_sock_file_base_ext_dir=ssh_agent_auth_sock_file_base_ext_dir,
ssh_agent_auth_sock_file_base_ext_path=ssh_agent_auth_sock_file_base_ext_path,
ssh_agent_auth_sock_file_base_dir_path=ssh_agent_auth_sock_file_base_dir_path,
ssh_agent_auth_sock_file_base_name_ext_dir=ssh_agent_auth_sock_file_base_name_ext_dir,
ssh_agent_auth_sock_file_base_name_ext_path=ssh_agent_auth_sock_file_base_name_ext_path,
ssh_agent_auth_sock_file_base_name_dir_path=ssh_agent_auth_sock_file_base_name_dir_path,
ssh_agent_auth_sock_file_base_ext_dir_path=ssh_agent_auth_sock_file_base_ext_dir_path,
ssh_agent_auth_sock_file_base_name_ext_dir_path=ssh_agent_auth_sock_file_base_name_ext_dir_path,
)

@classmethod
Expand All @@ -44,11 +249,15 @@
base_dir = os.path.dirname(repo_path)
return cls(name=name, base_dir=base_dir, full_name=full_name)

@property

Check failure on line 252 in src/codegen/git/schemas/repo_config.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "repo_path" already defined on line 69 [no-redef]
def repo_path(self) -> Path:
# Use organization name in the path if available
if self.organization_name:
return Path(f"/tmp/{self.organization_name}/{self.name}")
# Fall back to the original path format if no organization name is available
return Path(f"{self.base_dir}/{self.name}")

@property

Check failure on line 260 in src/codegen/git/schemas/repo_config.py

View workflow job for this annotation

GitHub Actions / mypy

error: Name "organization_name" already defined on line 62 [no-redef]
def organization_name(self) -> str | None:
if self.full_name is not None:
return self.full_name.split("/")[0]
Expand Down
Loading
Loading