Skip to content

Commit 979f85c

Browse files
Fix language detection bug in monorepos with mixed languages
1 parent 9a829f0 commit 979f85c

File tree

4 files changed

+99
-7
lines changed

4 files changed

+99
-7
lines changed

src/codegen/git/utils/language.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,11 @@ def _determine_language_by_git_file_count(folder_path: str) -> ProgrammingLangua
132132
repo_config = RepoConfig.from_repo_path(repo_path=git_root)
133133
repo_operator = RepoOperator(repo_config=repo_config)
134134

135+
# Use the specified subfolder path for language detection if provided
136+
subdirs = [base_path] if base_path else None
137+
135138
# Walk through the directory
136-
for rel_path, _ in repo_operator.iter_files(subdirs=[base_path] if base_path else None, ignore_list=GLOBAL_FILE_IGNORE_LIST):
139+
for rel_path, _ in repo_operator.iter_files(subdirs=subdirs, ignore_list=GLOBAL_FILE_IGNORE_LIST):
137140
# Convert to Path object
138141
file_path = Path(git_root) / Path(rel_path)
139142

src/codegen/sdk/codebase/config.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,23 +46,29 @@ def from_path(cls, path: str, programming_language: ProgrammingLanguage | None =
4646
repo_path = os.path.abspath(path)
4747
git_root, base_path = split_git_path(repo_path)
4848
subdirectories = [base_path] if base_path else None
49-
programming_language = programming_language or determine_project_language(repo_path)
49+
50+
# Only determine project language if not explicitly provided
51+
detected_language = programming_language or determine_project_language(repo_path)
52+
5053
repo_config = RepoConfig.from_repo_path(repo_path=git_root)
51-
repo_config.language = programming_language
54+
repo_config.language = detected_language
5255
repo_config.subdirectories = subdirectories
5356
# Create main project
5457
return cls(
5558
repo_operator=RepoOperator(repo_config=repo_config),
56-
programming_language=programming_language,
59+
programming_language=detected_language,
5760
base_path=base_path,
5861
subdirectories=subdirectories,
5962
)
6063

6164
@classmethod
6265
def from_repo_operator(cls, repo_operator: RepoOperator, programming_language: ProgrammingLanguage | None = None, base_path: str | None = None) -> Self:
66+
# Only determine project language if not explicitly provided
67+
detected_language = programming_language or determine_project_language(repo_operator.repo_path)
68+
6369
return cls(
6470
repo_operator=repo_operator,
65-
programming_language=programming_language or determine_project_language(repo_operator.repo_path),
71+
programming_language=detected_language,
6672
base_path=base_path,
6773
subdirectories=[base_path] if base_path else None,
6874
)

src/codegen/sdk/core/codebase.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def __init__(
200200
if repo_path is not None:
201201
main_project = ProjectConfig.from_path(
202202
repo_path,
203-
programming_language=ProgrammingLanguage(language.upper()) if language else None,
203+
programming_language=ProgrammingLanguage(language.upper()) if isinstance(language, str) and language else language,
204204
)
205205
projects = [main_project]
206206
else:
@@ -1392,7 +1392,7 @@ def from_repo(
13921392
logger.info("Initializing Codebase...")
13931393
project = ProjectConfig.from_repo_operator(
13941394
repo_operator=repo_operator,
1395-
programming_language=ProgrammingLanguage(language.upper()) if language else None,
1395+
programming_language=ProgrammingLanguage(language.upper()) if isinstance(language, str) and language else language,
13961396
)
13971397
codebase = Codebase(projects=[project], config=config, secrets=secrets)
13981398
logger.info("Codebase initialization complete")
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import os
2+
import tempfile
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
from codegen.sdk.codebase.config import ProjectConfig
8+
from codegen.shared.enums.programming_language import ProgrammingLanguage
9+
10+
11+
def test_explicit_language_respected():
12+
"""Test that explicitly provided language is respected and not overridden by detection."""
13+
with tempfile.TemporaryDirectory() as tmp_dir:
14+
# Create a temporary directory with more TypeScript files than Python files
15+
ts_dir = Path(tmp_dir) / "ts"
16+
py_dir = Path(tmp_dir) / "py"
17+
ts_dir.mkdir()
18+
py_dir.mkdir()
19+
20+
# Create TypeScript files
21+
for i in range(5):
22+
with open(ts_dir / f"file{i}.ts", "w") as f:
23+
f.write(f"// TypeScript file {i}")
24+
25+
# Create fewer Python files
26+
for i in range(2):
27+
with open(py_dir / f"file{i}.py", "w") as f:
28+
f.write(f"# Python file {i}")
29+
30+
# Initialize git repo
31+
os.system(f"cd {tmp_dir} && git init && git config user.email '[email protected]' && git config user.name 'Test User' && git add . && git commit -m 'Initial commit'")
32+
33+
# Test with explicit Python language
34+
project_config = ProjectConfig.from_path(
35+
path=str(tmp_dir),
36+
programming_language=ProgrammingLanguage.PYTHON
37+
)
38+
39+
# Verify that the language is Python, not TypeScript (which would be detected based on file count)
40+
assert project_config.programming_language == ProgrammingLanguage.PYTHON
41+
42+
# Test with explicit TypeScript language
43+
project_config = ProjectConfig.from_path(
44+
path=str(py_dir), # Use Python directory
45+
programming_language=ProgrammingLanguage.TYPESCRIPT
46+
)
47+
48+
# Verify that the language is TypeScript, not Python (which would be detected based on file count)
49+
assert project_config.programming_language == ProgrammingLanguage.TYPESCRIPT
50+
51+
52+
def test_subfolder_language_detection():
53+
"""Test that language detection respects the specified subfolder."""
54+
with tempfile.TemporaryDirectory() as tmp_dir:
55+
# Create a temporary directory with TypeScript files in root and Python files in subfolder
56+
ts_dir = Path(tmp_dir)
57+
py_dir = Path(tmp_dir) / "python_only"
58+
py_dir.mkdir()
59+
60+
# Create TypeScript files in root
61+
for i in range(5):
62+
with open(ts_dir / f"file{i}.ts", "w") as f:
63+
f.write(f"// TypeScript file {i}")
64+
65+
# Create Python files in subfolder
66+
for i in range(3):
67+
with open(py_dir / f"file{i}.py", "w") as f:
68+
f.write(f"# Python file {i}")
69+
70+
# Initialize git repo
71+
os.system(f"cd {tmp_dir} && git init && git config user.email '[email protected]' && git config user.name 'Test User' && git add . && git commit -m 'Initial commit'")
72+
73+
# Test with root path - should detect TypeScript
74+
project_config = ProjectConfig.from_path(
75+
path=str(tmp_dir)
76+
)
77+
assert project_config.programming_language == ProgrammingLanguage.TYPESCRIPT
78+
79+
# Test with Python subfolder path - should detect Python
80+
project_config = ProjectConfig.from_path(
81+
path=str(py_dir)
82+
)
83+
assert project_config.programming_language == ProgrammingLanguage.PYTHON

0 commit comments

Comments
 (0)