InternScience
diff --git a/‎examples/evaluate/evaluate.sh‎
Lines changed: 0 additions & 3 deletions b/‎examples/evaluate/evaluate.sh‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎examples/evaluate/evaluate_kg/evaluate_kg.sh‎
Lines changed: 2 additions & 0 deletions b/‎examples/evaluate/evaluate_kg/evaluate_kg.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/evaluate/evaluate_kg/kg_evaluation_config.yaml‎
Lines changed: 41 additions & 0 deletions b/‎examples/evaluate/evaluate_kg/kg_evaluation_config.yaml‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎examples/evaluate/evaluate_qa/evaluate.sh‎
Lines changed: 2 additions & 0 deletions b/‎examples/evaluate/evaluate_qa/evaluate.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/evaluate/evaluate_qa/qa_evaluation_config.yaml‎
Lines changed: 90 additions & 0 deletions b/‎examples/evaluate/evaluate_qa/qa_evaluation_config.yaml‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎examples/evaluate_kg/evaluate_kg.sh‎
Lines changed: 0 additions & 5 deletions b/‎examples/evaluate_kg/evaluate_kg.sh‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎…gen/operators/evaluate_kg/evaluate_kg.py‎ ‎…aphgen/operators/evaluate/evaluate_kg.py‎graphgen/operators/evaluate_kg/evaluate_kg.py renamed to graphgen/operators/evaluate/evaluate_kg.py
Lines changed: 63 additions & 32 deletions b/‎…gen/operators/evaluate_kg/evaluate_kg.py‎ ‎…aphgen/operators/evaluate/evaluate_kg.py‎graphgen/operators/evaluate_kg/evaluate_kg.py renamed to graphgen/operators/evaluate/evaluate_kg.py
Lines changed: 63 additions & 32 deletions
diff --git a/‎graphgen/operators/evaluate/evaluate.py‎ ‎…aphgen/operators/evaluate/evaluate_qa.py‎graphgen/operators/evaluate/evaluate.py renamed to graphgen/operators/evaluate/evaluate_qa.py b/‎graphgen/operators/evaluate/evaluate.py‎ ‎…aphgen/operators/evaluate/evaluate_qa.py‎graphgen/operators/evaluate/evaluate.py renamed to graphgen/operators/evaluate/evaluate_qa.py
diff --git a/‎graphgen/operators/evaluate/evaluate_service.py‎
Lines changed: 22 additions & 0 deletions b/‎graphgen/operators/evaluate/evaluate_service.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎graphgen/operators/evaluate_kg/__init__.py‎ b/‎graphgen/operators/evaluate_kg/__init__.py‎
@@ -0,0 +1,2 @@
+python3 -m graphgen.run \
+--config_file examples/evaluate/evaluate_kg/evaluate_kg_config.yaml
@@ -0,0 +1,41 @@
+global_params:
+  working_dir: cache
+  graph_backend: kuzu # graph database backend, support: kuzu, networkx
+  kv_backend: rocksdb # key-value store backend, support: rocksdb, json_kv
+
+nodes:
+  - id: read
+    op_name: read
+    type: source
+    dependencies: []
+    params:
+      input_path:
+        - examples/input_examples/extract_demo.txt
+
+  - id: chunk
+    op_name: chunk
+    type: map_batch
+    dependencies:
+      - read
+    execution_params:
+      replicas: 4
+    params:
+      chunk_size: 20480 # larger chunk size for better context
+      chunk_overlap: 2000
+
+  - id: build_kg
+    op_name: build_kg
+    type: map_batch
+    dependencies:
+      - chunk_documents
+    execution_params:
+      replicas: 1
+      batch_size: 128
+
+  - id: evaluate
+    op_name: evaluate
+    type: aggregate
+    dependencies:
+      - build_kg
+    params:
+      metrics:
@@ -0,0 +1,2 @@
+python3 -m graphgen.run \
+--config_file examples/evaluate/evaluate_qa/evaluate_qa_config.yaml
@@ -0,0 +1,90 @@
+global_params:
+  working_dir: cache
+  graph_backend: kuzu # graph database backend, support: kuzu, networkx
+  kv_backend: rocksdb # key-value store backend, support: rocksdb, json_kv
+
+nodes:
+  - id: read_files # id is unique in the pipeline, and can be referenced by other steps
+    op_name: read
+    type: source
+    dependencies: []
+    params:
+        input_path:
+          - examples/input_examples/jsonl_demo.jsonl # input file path, support json, jsonl, txt, pdf. See examples/input_examples for examples
+
+  - id: chunk_documents
+    op_name: chunk
+    type: map_batch
+    dependencies:
+      - read_files
+    execution_params:
+      replicas: 4
+    params:
+        chunk_size: 1024 # chunk size for text splitting
+        chunk_overlap: 100 # chunk overlap for text splitting
+
+  - id: build_kg
+    op_name: build_kg
+    type: map_batch
+    dependencies:
+      - chunk_documents
+    execution_params:
+      replicas: 1
+      batch_size: 128
+
+  - id: quiz
+    op_name: quiz
+    type: aggregate
+    dependencies:
+      - build_kg
+    execution_params:
+      replicas: 1
+      batch_size: 128
+    params:
+      quiz_samples: 2 # number of quiz samples to generate
+      concurrency_limit: 200
+
+  - id: judge
+    op_name: judge
+    type: map_batch
+    dependencies:
+      - quiz
+    execution_params:
+      replicas: 1
+      batch_size: 128
+
+  - id: partition
+    op_name: partition
+    type: aggregate
+    dependencies:
+      - judge
+    params:
+      method: ece # ece is a custom partition method based on comprehension loss
+      method_params:
+        max_units_per_community: 20 # max nodes and edges per community
+        min_units_per_community: 5 # min nodes and edges per community
+        max_tokens_per_community: 10240 # max tokens per community
+        unit_sampling: max_loss # unit sampling strategy, support: random, max_loss, min_loss
+
+  - id: generate
+    op_name: generate
+    type: map_batch
+    dependencies:
+      - partition
+    execution_params:
+      replicas: 1
+      batch_size: 128
+    params:
+      method: aggregated # atomic, aggregated, multi_hop, cot, vqa
+      data_format: ChatML # Alpaca, Sharegpt, ChatML
+
+  - id: evaluate
+    op_name: evaluate
+    type: map_batch
+    dependencies:
+      - generate
+    execution_params:
+      replicas: 1
+      batch_size: 128
+    params:
+      metrics:
@@ -1,6 +1,7 @@
 import argparse
 import json
 from pathlib import Path
+
 from dotenv import load_dotenv
 
 from graphgen.models import KGQualityEvaluator
@@ -37,14 +38,22 @@ def _print_accuracy_summary(acc):
             precision = e.get("precision", {})
 
             print("  Entity Extraction Quality:")
-            print(f"    Overall Score: {overall.get('mean', 0):.3f} (mean), "
-                  f"{overall.get('median', 0):.3f} (median)")
-            print(f"    Accuracy: {accuracy.get('mean', 0):.3f} (mean), "
-                  f"{accuracy.get('median', 0):.3f} (median)")
-            print(f"    Completeness: {completeness.get('mean', 0):.3f} (mean), "
-                  f"{completeness.get('median', 0):.3f} (median)")
-            print(f"    Precision: {precision.get('mean', 0):.3f} (mean), "
-                  f"{precision.get('median', 0):.3f} (median)")
+            print(
+                f"    Overall Score: {overall.get('mean', 0):.3f} (mean), "
+                f"{overall.get('median', 0):.3f} (median)"
+            )
+            print(
+                f"    Accuracy: {accuracy.get('mean', 0):.3f} (mean), "
+                f"{accuracy.get('median', 0):.3f} (median)"
+            )
+            print(
+                f"    Completeness: {completeness.get('mean', 0):.3f} (mean), "
+                f"{completeness.get('median', 0):.3f} (median)"
+            )
+            print(
+                f"    Precision: {precision.get('mean', 0):.3f} (mean), "
+                f"{precision.get('median', 0):.3f} (median)"
+            )
             print(f"    Total Chunks Evaluated: {e.get('total_chunks', 0)}")
 
         if "relation_accuracy" in acc:
@@ -55,14 +64,22 @@ def _print_accuracy_summary(acc):
             precision = r.get("precision", {})
 
             print("  Relation Extraction Quality:")
-            print(f"    Overall Score: {overall.get('mean', 0):.3f} (mean), "
-                  f"{overall.get('median', 0):.3f} (median)")
-            print(f"    Accuracy: {accuracy.get('mean', 0):.3f} (mean), "
-                  f"{accuracy.get('median', 0):.3f} (median)")
-            print(f"    Completeness: {completeness.get('mean', 0):.3f} (mean), "
-                  f"{completeness.get('median', 0):.3f} (median)")
-            print(f"    Precision: {precision.get('mean', 0):.3f} (mean), "
-                  f"{precision.get('median', 0):.3f} (median)")
+            print(
+                f"    Overall Score: {overall.get('mean', 0):.3f} (mean), "
+                f"{overall.get('median', 0):.3f} (median)"
+            )
+            print(
+                f"    Accuracy: {accuracy.get('mean', 0):.3f} (mean), "
+                f"{accuracy.get('median', 0):.3f} (median)"
+            )
+            print(
+                f"    Completeness: {completeness.get('mean', 0):.3f} (mean), "
+                f"{completeness.get('median', 0):.3f} (median)"
+            )
+            print(
+                f"    Precision: {precision.get('mean', 0):.3f} (mean), "
+                f"{precision.get('median', 0):.3f} (median)"
+            )
             print(f"    Total Chunks Evaluated: {r.get('total_chunks', 0)}")
     else:
         print(f"\n[Accuracy] Error: {acc['error']}")
@@ -73,19 +90,25 @@ def _print_consistency_summary(cons):
     if "error" not in cons:
         print("\n[Consistency]")
         print(f"  Conflict Rate: {cons.get('conflict_rate', 0):.3f}")
-        print(f"  Conflict Entities: {cons.get('conflict_entities_count', 0)} / "
-              f"{cons.get('total_entities', 0)}")
-        entities_checked = cons.get('entities_checked', 0)
+        print(
+            f"  Conflict Entities: {cons.get('conflict_entities_count', 0)} / "
+            f"{cons.get('total_entities', 0)}"
+        )
+        entities_checked = cons.get("entities_checked", 0)
         if entities_checked > 0:
-            print(f"  Entities Checked: {entities_checked} (entities with multiple sources)")
-        conflicts = cons.get('conflicts', [])
+            print(
+                f"  Entities Checked: {entities_checked} (entities with multiple sources)"
+            )
+        conflicts = cons.get("conflicts", [])
         if conflicts:
             print(f"  Total Conflicts Found: {len(conflicts)}")
             # Show sample conflicts
             sample_conflicts = conflicts[:3]
             for conflict in sample_conflicts:
-                print(f"    - {conflict.get('entity_id', 'N/A')}: {conflict.get('conflict_type', 'N/A')} "
-                      f"(severity: {conflict.get('conflict_severity', 0):.2f})")
+                print(
+                    f"    - {conflict.get('entity_id', 'N/A')}: {conflict.get('conflict_type', 'N/A')} "
+                    f"(severity: {conflict.get('conflict_severity', 0):.2f})"
+                )
     else:
         print(f"\n[Consistency] Error: {cons['error']}")
 
@@ -103,15 +126,19 @@ def _print_structure_summary(struct):
         noise_check = thresholds.get("noise_ratio", {})
         noise_threshold = noise_check.get("threshold", "N/A")
         noise_pass = noise_check.get("pass", False)
-        print(f"  Noise Ratio: {struct.get('noise_ratio', 0):.3f} "
-              f"({'✓' if noise_pass else '✗'} < {noise_threshold})")
+        print(
+            f"  Noise Ratio: {struct.get('noise_ratio', 0):.3f} "
+            f"({'✓' if noise_pass else '✗'} < {noise_threshold})"
+        )
 
         # Largest CC Ratio
         lcc_check = thresholds.get("largest_cc_ratio", {})
         lcc_threshold = lcc_check.get("threshold", "N/A")
         lcc_pass = lcc_check.get("pass", False)
-        print(f"  Largest CC Ratio: {struct.get('largest_cc_ratio', 0):.3f} "
-              f"({'✓' if lcc_pass else '✗'} > {lcc_threshold})")
+        print(
+            f"  Largest CC Ratio: {struct.get('largest_cc_ratio', 0):.3f} "
+            f"({'✓' if lcc_pass else '✗'} > {lcc_threshold})"
+        )
 
         # Avg Degree
         avg_degree_check = thresholds.get("avg_degree", {})
@@ -122,16 +149,20 @@ def _print_structure_summary(struct):
             threshold_str = f"{avg_degree_threshold[0]}-{avg_degree_threshold[1]}"
         else:
             threshold_str = str(avg_degree_threshold)
-        print(f"  Avg Degree: {struct.get('avg_degree', 0):.2f} "
-              f"({'✓' if avg_degree_pass else '✗'} {threshold_str})")
+        print(
+            f"  Avg Degree: {struct.get('avg_degree', 0):.2f} "
+            f"({'✓' if avg_degree_pass else '✗'} {threshold_str})"
+        )
 
         # Power Law R²
-        if struct.get('powerlaw_r2') is not None:
+        if struct.get("powerlaw_r2") is not None:
             powerlaw_check = thresholds.get("powerlaw_r2", {})
             powerlaw_threshold = powerlaw_check.get("threshold", "N/A")
             powerlaw_pass = powerlaw_check.get("pass", False)
-            print(f"  Power Law R²: {struct.get('powerlaw_r2', 0):.3f} "
-                  f"({'✓' if powerlaw_pass else '✗'} > {powerlaw_threshold})")
+            print(
+                f"  Power Law R²: {struct.get('powerlaw_r2', 0):.3f} "
+                f"({'✓' if powerlaw_pass else '✗'} > {powerlaw_threshold})"
+            )
     else:
         print(f"\n[Structural Robustness] Error: {struct['error']}")
 
 
@@ -0,0 +1,22 @@
+import pandas as pd
+
+from graphgen.bases import BaseLLMWrapper, BaseOperator
+from graphgen.common import init_llm
+
+
+class EvaluateService(BaseOperator):
+    """
+    1. KG Quality Evaluation
+    2. QA Quality Evaluation
+    """
+
+    def __init__(self, working_dir: str = "cache"):
+        super().__init__(working_dir=working_dir, op_name="evaluate_service")
+        self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
+
+    def process(self, batch: pd.DataFrame) -> pd.DataFrame:
+        items = batch.to_dict(orient="records")
+        return pd.DataFrame(self.evaluate(items))
+
+    def evaluate(self, items: list[dict]) -> list[dict]:
+        pass
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+python3 -m graphgen.run \`
	`2`	`+--config_file examples/evaluate/evaluate_kg/evaluate_kg_config.yaml`