InternScience · ChenZiHong-Gavin · Nov 19, 2025 · Nov 19, 2025
diff --git a/tests/e2e_tests/conftest.py b/tests/e2e_tests/conftest.py
@@ -0,0 +1,63 @@
+import json
+import os
+import subprocess
+from pathlib import Path
+
+
+def run_generate_test(tmp_path: Path, config_name: str):
+    """
+    Run the generate test with the given configuration file and temporary path.
+
+    Args:
+        tmp_path: pytest temporary path
+        config_name: configuration file name (e.g. "atomic_config.yaml")
+
+    Returns:
+        tuple: (run_folder, json_files[0])
+    """
+    repo_root = Path(__file__).resolve().parents[2]
+    os.chdir(repo_root)
+
+    config_path = repo_root / "graphgen" / "configs" / config_name
+    output_dir = tmp_path / "output"
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    result = subprocess.run(
+        [
+            "python",
+            "-m",
+            "graphgen.generate",
+            "--config_file",
+            str(config_path),
+            "--output_dir",
+            str(output_dir),
+        ],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    assert result.returncode == 0, f"Script failed with error: {result.stderr}"
+
+    data_root = output_dir / "data" / "graphgen"
+    assert data_root.exists(), f"{data_root} does not exist"
+    run_folders = sorted(data_root.iterdir(), key=lambda p: p.name, reverse=True)
+    assert run_folders, f"No run folders found in {data_root}"
+    run_folder = run_folders[0]
+
+    config_saved = run_folder / "config.yaml"
+    assert config_saved.exists(), f"{config_saved} not found"
+
+    json_files = list(run_folder.glob("*.json"))
+    assert json_files, f"No JSON output found in {run_folder}"
+
+    log_files = list(run_folder.glob("*.log"))
+    assert log_files, "No log file generated"
+
+    with open(json_files[0], "r", encoding="utf-8") as f:
+        data = json.load(f)
+    assert (
+        isinstance(data, list) and len(data) > 0
+    ), "JSON output is empty or not a list"
+
+    return run_folder, json_files[0]
+
diff --git a/tests/e2e_tests/test_generate_aggregated.py b/tests/e2e_tests/test_generate_aggregated.py
@@ -1,50 +1,7 @@
-import json
-import os
-import subprocess
 from pathlib import Path
 
+from .conftest import run_generate_test
 
-def test_generate_aggregated(tmp_path: Path):
-    repo_root = Path(__file__).resolve().parents[2]
-    os.chdir(repo_root)
-
-    config_path = repo_root / "graphgen" / "configs" / "aggregated_config.yaml"
-    output_dir = tmp_path / "output"
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    result = subprocess.run(
-        [
-            "python",
-            "-m",
-            "graphgen.generate",
-            "--config_file",
-            str(config_path),
-            "--output_dir",
-            str(output_dir),
-        ],
-        capture_output=True,
-        text=True,
-        check=False,
-    )
-    assert result.returncode == 0, f"Script failed with error: {result.stderr}"
-
-    data_root = output_dir / "data" / "graphgen"
-    assert data_root.exists(), f"{data_root} does not exist"
-    run_folders = sorted(data_root.iterdir(), key=lambda p: p.name, reverse=True)
-    assert run_folders, f"No run folders found in {data_root}"
-    run_folder = run_folders[0]
 
-    config_saved = run_folder / "config.yaml"
-    assert config_saved.exists(), f"{config_saved} not found"
-
-    json_files = list(run_folder.glob("*.json"))
-    assert json_files, f"No JSON output found in {run_folder}"
-
-    log_files = list(run_folder.glob("*.log"))
-    assert log_files, "No log file generated"
-
-    with open(json_files[0], "r", encoding="utf-8") as f:
-        data = json.load(f)
-    assert (
-        isinstance(data, list) and len(data) > 0
-    ), "JSON output is empty or not a list"
+def test_generate_aggregated(tmp_path: Path):
+    run_generate_test(tmp_path, "aggregated_config.yaml")
diff --git a/tests/e2e_tests/test_generate_atomic.py b/tests/e2e_tests/test_generate_atomic.py
@@ -1,50 +1,7 @@
-import json
-import os
-import subprocess
 from pathlib import Path
 
+from .conftest import run_generate_test
 
-def test_generate_atomic(tmp_path: Path):
-    repo_root = Path(__file__).resolve().parents[2]
-    os.chdir(repo_root)
-
-    config_path = repo_root / "graphgen" / "configs" / "atomic_config.yaml"
-    output_dir = tmp_path / "output"
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    result = subprocess.run(
-        [
-            "python",
-            "-m",
-            "graphgen.generate",
-            "--config_file",
-            str(config_path),
-            "--output_dir",
-            str(output_dir),
-        ],
-        capture_output=True,
-        text=True,
-        check=False,
-    )
-    assert result.returncode == 0, f"Script failed with error: {result.stderr}"
-
-    data_root = output_dir / "data" / "graphgen"
-    assert data_root.exists(), f"{data_root} does not exist"
-    run_folders = sorted(data_root.iterdir(), key=lambda p: p.name, reverse=True)
-    assert run_folders, f"No run folders found in {data_root}"
-    run_folder = run_folders[0]
 
-    config_saved = run_folder / "config.yaml"
-    assert config_saved.exists(), f"{config_saved} not found"
-
-    json_files = list(run_folder.glob("*.json"))
-    assert json_files, f"No JSON output found in {run_folder}"
-
-    log_files = list(run_folder.glob("*.log"))
-    assert log_files, "No log file generated"
-
-    with open(json_files[0], "r", encoding="utf-8") as f:
-        data = json.load(f)
-    assert (
-        isinstance(data, list) and len(data) > 0
-    ), "JSON output is empty or not a list"
+def test_generate_atomic(tmp_path: Path):
+    run_generate_test(tmp_path, "atomic_config.yaml")
diff --git a/tests/e2e_tests/test_generate_cot.py b/tests/e2e_tests/test_generate_cot.py
@@ -1,50 +1,7 @@
-import json
-import os
-import subprocess
 from pathlib import Path
 
+from .conftest import run_generate_test
 
-def test_generate_aggregated(tmp_path: Path):
-    repo_root = Path(__file__).resolve().parents[2]
-    os.chdir(repo_root)
 
-    config_path = repo_root / "graphgen" / "configs" / "cot_config.yaml"
-    output_dir = tmp_path / "output"
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    result = subprocess.run(
-        [
-            "python",
-            "-m",
-            "graphgen.generate",
-            "--config_file",
-            str(config_path),
-            "--output_dir",
-            str(output_dir),
-        ],
-        capture_output=True,
-        text=True,
-        check=False,
-    )
-    assert result.returncode == 0, f"Script failed with error: {result.stderr}"
-
-    data_root = output_dir / "data" / "graphgen"
-    assert data_root.exists(), f"{data_root} does not exist"
-    run_folders = sorted(data_root.iterdir(), key=lambda p: p.name, reverse=True)
-    assert run_folders, f"No run folders found in {data_root}"
-    run_folder = run_folders[0]
-
-    config_saved = run_folder / "config.yaml"
-    assert config_saved.exists(), f"{config_saved} not found"
-
-    json_files = list(run_folder.glob("*.json"))
-    assert json_files, f"No JSON output found in {run_folder}"
-
-    log_files = list(run_folder.glob("*.log"))
-    assert log_files, "No log file generated"
-
-    with open(json_files[0], "r", encoding="utf-8") as f:
-        data = json.load(f)
-    assert (
-        isinstance(data, list) and len(data) > 0
-    ), "JSON output is empty or not a list"
+def test_generate_cot(tmp_path: Path):
+    run_generate_test(tmp_path, "cot_config.yaml")
diff --git a/tests/e2e_tests/test_generate_multi_hop.py b/tests/e2e_tests/test_generate_multi_hop.py
@@ -1,50 +1,7 @@
-import json
-import os
-import subprocess
 from pathlib import Path
 
+from .conftest import run_generate_test
 
-def test_generate_aggregated(tmp_path: Path):
-    repo_root = Path(__file__).resolve().parents[2]
-    os.chdir(repo_root)
 
-    config_path = repo_root / "graphgen" / "configs" / "multi_hop_config.yaml"
-    output_dir = tmp_path / "output"
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    result = subprocess.run(
-        [
-            "python",
-            "-m",
-            "graphgen.generate",
-            "--config_file",
-            str(config_path),
-            "--output_dir",
-            str(output_dir),
-        ],
-        capture_output=True,
-        text=True,
-        check=False,
-    )
-    assert result.returncode == 0, f"Script failed with error: {result.stderr}"
-
-    data_root = output_dir / "data" / "graphgen"
-    assert data_root.exists() and data_root.is_dir(), f"{data_root} does not exist or is not a directory"
-    run_folders = sorted(list(data_root.iterdir()), key=lambda p: p.name, reverse=True)
-    assert run_folders, f"No run folders found in {data_root}"
-    run_folder = run_folders[0]
-
-    config_saved = run_folder / "config.yaml"
-    assert config_saved.exists(), f"{config_saved} not found"
-
-    json_files = list(run_folder.glob("*.json"))
-    assert json_files, f"No JSON output found in {run_folder}"
-
-    log_files = list(run_folder.glob("*.log"))
-    assert log_files, "No log file generated"
-
-    with open(json_files[0], "r", encoding="utf-8") as f:
-        data = json.load(f)
-    assert (
-        isinstance(data, list) and len(data) > 0
-    ), "JSON output is empty or not a list"
+def test_generate_multi_hop(tmp_path: Path):
+    run_generate_test(tmp_path, "multi_hop_config.yaml")
diff --git a/tests/e2e_tests/test_generate_vqa.py b/tests/e2e_tests/test_generate_vqa.py
@@ -1,50 +1,7 @@
-import json
-import os
-import subprocess
 from pathlib import Path
 
+from .conftest import run_generate_test
 
-def test_generate_vqa(tmp_path: Path):
-    repo_root = Path(__file__).resolve().parents[2]
-    os.chdir(repo_root)
-
-    config_path = repo_root / "graphgen" / "configs" / "vqa_config.yaml"
-    output_dir = tmp_path / "output"
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    result = subprocess.run(
-        [
-            "python",
-            "-m",
-            "graphgen.generate",
-            "--config_file",
-            str(config_path),
-            "--output_dir",
-            str(output_dir),
-        ],
-        capture_output=True,
-        text=True,
-        check=False,
-    )
-    assert result.returncode == 0, f"Script failed with error: {result.stderr}"
-
-    data_root = output_dir / "data" / "graphgen"
-    assert data_root.exists(), f"{data_root} does not exist"
-    run_folders = sorted(data_root.iterdir(), key=lambda p: p.name, reverse=True)
-    assert run_folders, f"No run folders found in {data_root}"
-    run_folder = run_folders[0]
 
-    config_saved = run_folder / "config.yaml"
-    assert config_saved.exists(), f"{config_saved} not found"
-
-    json_files = list(run_folder.glob("*.json"))
-    assert json_files, f"No JSON output found in {run_folder}"
-
-    log_files = list(run_folder.glob("*.log"))
-    assert log_files, "No log file generated"
-
-    with open(json_files[0], "r", encoding="utf-8") as f:
-        data = json.load(f)
-    assert (
-        isinstance(data, list) and len(data) > 0
-    ), "JSON output is empty or not a list"
+def test_generate_vqa(tmp_path: Path):
+    run_generate_test(tmp_path, "vqa_config.yaml")