Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
fbc8d52
wip: refactor data evaluators & add kg evaluators
ChenZiHong-Gavin Dec 17, 2025
18be127
feat: add KG quality evaluation module
CHERRY-ui8 Dec 23, 2025
a44b1f3
refactor: removed repeated calculations and remove hardcoded params
CHERRY-ui8 Dec 23, 2025
6c77734
add: add kg_evaluate config file for params
CHERRY-ui8 Dec 23, 2025
93abd00
fix: correct relation acc evaluation logic
CHERRY-ui8 Dec 23, 2025
777cb25
refactor: enhance KG evaluator to use llm-as judge; remove evaluate_k…
CHERRY-ui8 Dec 23, 2025
5bfdc0a
fix: fix format and clean up imports
CHERRY-ui8 Dec 23, 2025
8ef5f47
Merge branch 'main' of https://github.com/open-sciencelab/GraphGen in…
ChenZiHong-Gavin Dec 24, 2025
42693df
wip: refactor evaluator structure
ChenZiHong-Gavin Dec 24, 2025
09072f0
Merge branch 'main' of https://github.com/open-sciencelab/GraphGen in…
ChenZiHong-Gavin Dec 24, 2025
a257246
wip: add annotations
ChenZiHong-Gavin Dec 24, 2025
41015a2
refactor: refactor proj structure & configs
ChenZiHong-Gavin Dec 25, 2025
978b76c
wip: split prompts
ChenZiHong-Gavin Dec 25, 2025
77bb00d
refactor: refactor base_evaluator
ChenZiHong-Gavin Dec 25, 2025
19510d9
refator: refactor LengthEvaluator
ChenZiHong-Gavin Dec 25, 2025
028b043
refactor: refactor MTLDEvaluator
ChenZiHong-Gavin Dec 25, 2025
c161358
refactor: refactor NLTKHelper
ChenZiHong-Gavin Dec 25, 2025
58ede2e
refactor: refactor RewardEvaluator
ChenZiHong-Gavin Dec 25, 2025
f3a0391
refactor: refactor UniEvaluator
ChenZiHong-Gavin Dec 25, 2025
2a3f09f
refactor: refactor evaluator structure
ChenZiHong-Gavin Dec 25, 2025
a4d7993
refactor: change evaluation methods in acc and consistency to sync
CHERRY-ui8 Dec 25, 2025
3ae2321
refactor: streamline evaluation functions for accuracy, consistency, …
CHERRY-ui8 Dec 25, 2025
f5b2254
Merge branch 'main' of https://github.com/open-sciencelab/GraphGen in…
ChenZiHong-Gavin Dec 25, 2025
86fa173
wip: perf evaluate_service
ChenZiHong-Gavin Dec 25, 2025
8d7e6b4
merge
ChenZiHong-Gavin Dec 25, 2025
06fc6e3
perf: perf evaluate_service
ChenZiHong-Gavin Dec 25, 2025
f9d6dc3
fix: fix output node
ChenZiHong-Gavin Dec 26, 2025
4d022fb
merge
CHERRY-ui8 Dec 26, 2025
084cb08
feat: add KGQualityEvaluator and integrate into EvaluateService for K…
CHERRY-ui8 Dec 26, 2025
98968e6
refactor: remove KGQualityEvaluator and restructure KG evaluation int…
CHERRY-ui8 Dec 26, 2025
71ebba2
pylints
CHERRY-ui8 Dec 26, 2025
f6cce9b
feat: add kg_structure evaluation
ChenZiHong-Gavin Dec 26, 2025
4f0350b
feat: add kg_structure evaluation
ChenZiHong-Gavin Dec 26, 2025
e10b391
feat: add kg_accuracy & kg_consistency metrics
ChenZiHong-Gavin Dec 26, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions examples/evaluate/evaluate.sh

This file was deleted.

2 changes: 2 additions & 0 deletions examples/evaluate/evaluate_kg/evaluate_kg.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
python3 -m graphgen.run \
--config_file examples/evaluate/evaluate_kg/kg_evaluation_config.yaml
45 changes: 45 additions & 0 deletions examples/evaluate/evaluate_kg/kg_evaluation_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
global_params:
working_dir: cache
graph_backend: kuzu # graph database backend, support: kuzu, networkx
kv_backend: rocksdb # key-value store backend, support: rocksdb, json_kv

nodes:
- id: read
op_name: read
type: source
dependencies: []
params:
input_path:
- examples/input_examples/extract_demo.txt

- id: chunk
op_name: chunk
type: map_batch
dependencies:
- read
execution_params:
replicas: 4
params:
chunk_size: 20480 # larger chunk size for better context
chunk_overlap: 2000

- id: build_kg
op_name: build_kg
type: map_batch
dependencies:
- chunk
execution_params:
replicas: 1
batch_size: 128

- id: evaluate
op_name: evaluate
type: aggregate
save_output: true
dependencies:
- build_kg
params:
metrics:
- kg_structure
- kg_accuracy
- kg_consistency
2 changes: 2 additions & 0 deletions examples/evaluate/evaluate_qa/evaluate_qa.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
python3 -m graphgen.run \
--config_file examples/evaluate/evaluate_qa/qa_evaluation_config.yaml
98 changes: 98 additions & 0 deletions examples/evaluate/evaluate_qa/qa_evaluation_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
global_params:
working_dir: cache
graph_backend: kuzu # graph database backend, support: kuzu, networkx
kv_backend: rocksdb # key-value store backend, support: rocksdb, json_kv

nodes:
- id: read_files # id is unique in the pipeline, and can be referenced by other steps
op_name: read
type: source
dependencies: []
params:
input_path:
- examples/input_examples/jsonl_demo.jsonl # input file path, support json, jsonl, txt, pdf. See examples/input_examples for examples

- id: chunk_documents
op_name: chunk
type: map_batch
dependencies:
- read_files
execution_params:
replicas: 4
params:
chunk_size: 1024 # chunk size for text splitting
chunk_overlap: 100 # chunk overlap for text splitting

- id: build_kg
op_name: build_kg
type: map_batch
dependencies:
- chunk_documents
execution_params:
replicas: 1
batch_size: 128

- id: quiz
op_name: quiz
type: aggregate
dependencies:
- build_kg
execution_params:
replicas: 1
batch_size: 128
params:
quiz_samples: 2 # number of quiz samples to generate
concurrency_limit: 200

- id: judge
op_name: judge
type: map_batch
dependencies:
- quiz
execution_params:
replicas: 1
batch_size: 128

- id: partition
op_name: partition
type: aggregate
dependencies:
- judge
params:
method: ece # ece is a custom partition method based on comprehension loss
method_params:
max_units_per_community: 20 # max nodes and edges per community
min_units_per_community: 5 # min nodes and edges per community
max_tokens_per_community: 10240 # max tokens per community
unit_sampling: max_loss # unit sampling strategy, support: random, max_loss, min_loss

- id: generate
op_name: generate
type: map_batch
dependencies:
- partition
execution_params:
replicas: 1
batch_size: 128
save_output: true
params:
method: aggregated # atomic, aggregated, multi_hop, cot, vqa
data_format: ChatML # Alpaca, Sharegpt, ChatML

- id: evaluate
op_name: evaluate
type: map_batch
dependencies:
- generate
execution_params:
replicas: 1
batch_size: 128
save_output: true
params:
metrics:
- qa_length
- qa_mtld
- qa_reward_score
- qa_uni_score
mtld_params:
threshold: 0.7
1 change: 1 addition & 0 deletions graphgen/bases/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@
from .base_splitter import BaseSplitter
from .base_storage import BaseGraphStorage, BaseKVStorage, StorageNameSpace
from .base_tokenizer import BaseTokenizer
from .base_evaluator import BaseEvaluator
from .datatypes import Chunk, Config, Node, QAPair, Token
10 changes: 10 additions & 0 deletions graphgen/bases/base_evaluator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from abc import ABC, abstractmethod
from .datatypes import QAPair


class BaseEvaluator(ABC):
@abstractmethod
def evaluate(self, pair: QAPair) -> float:
"""
Evaluate the text and return a score.
"""
47 changes: 43 additions & 4 deletions graphgen/bases/base_storage.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Generic, TypeVar, Union
from typing import Dict, Generic, List, Set, TypeVar, Union

T = TypeVar("T")

Expand Down Expand Up @@ -45,52 +46,90 @@ def reload(self):
raise NotImplementedError


class BaseGraphStorage(StorageNameSpace):
class BaseGraphStorage(StorageNameSpace, ABC):
@abstractmethod
def is_directed(self) -> bool:
pass

@abstractmethod
def has_node(self, node_id: str) -> bool:
raise NotImplementedError

@abstractmethod
def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
raise NotImplementedError

@abstractmethod
def node_degree(self, node_id: str) -> int:
raise NotImplementedError

def edge_degree(self, src_id: str, tgt_id: str) -> int:
raise NotImplementedError
@abstractmethod
def get_all_node_degrees(self) -> Dict[str, int]:
pass

def get_isolated_nodes(self) -> List[str]:
return [
node_id
for node_id, degree in self.get_all_node_degrees().items()
if degree == 0
]

@abstractmethod
def get_node(self, node_id: str) -> Union[dict, None]:
raise NotImplementedError

@abstractmethod
def update_node(self, node_id: str, node_data: dict[str, str]):
raise NotImplementedError

@abstractmethod
def get_all_nodes(self) -> Union[list[tuple[str, dict]], None]:
raise NotImplementedError

@abstractmethod
def get_node_count(self) -> int:
pass

@abstractmethod
def get_edge(self, source_node_id: str, target_node_id: str) -> Union[dict, None]:
raise NotImplementedError

@abstractmethod
def update_edge(
self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
):
raise NotImplementedError

@abstractmethod
def get_all_edges(self) -> Union[list[tuple[str, str, dict]], None]:
raise NotImplementedError

@abstractmethod
def get_edge_count(self) -> int:
pass

@abstractmethod
def get_node_edges(self, source_node_id: str) -> Union[list[tuple[str, str]], None]:
raise NotImplementedError

@abstractmethod
def upsert_node(self, node_id: str, node_data: dict[str, str]):
raise NotImplementedError

@abstractmethod
def upsert_edge(
self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
):
raise NotImplementedError

@abstractmethod
def delete_node(self, node_id: str):
raise NotImplementedError

@abstractmethod
def reload(self):
raise NotImplementedError

@abstractmethod
def get_connected_components(self, undirected: bool = True) -> List[Set[str]]:
raise NotImplementedError
44 changes: 39 additions & 5 deletions graphgen/common/init_storage.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, Union
from typing import Any, Dict, List, Set, Union

import ray

Expand Down Expand Up @@ -68,6 +68,21 @@ def __init__(self, backend: str, working_dir: str, namespace: str):
def index_done_callback(self):
return self.graph.index_done_callback()

def is_directed(self) -> bool:
return self.graph.is_directed()

def get_all_node_degrees(self) -> Dict[str, int]:
return self.graph.get_all_node_degrees()

def get_node_count(self) -> int:
return self.graph.get_node_count()

def get_edge_count(self) -> int:
return self.graph.get_edge_count()

def get_connected_components(self, undirected: bool = True) -> List[Set[str]]:
return self.graph.get_connected_components(undirected)

def has_node(self, node_id: str) -> bool:
return self.graph.has_node(node_id)

Expand Down Expand Up @@ -165,6 +180,21 @@ def __init__(self, actor_handle: ray.actor.ActorHandle):
def index_done_callback(self):
return ray.get(self.actor.index_done_callback.remote())

def is_directed(self) -> bool:
return ray.get(self.actor.is_directed.remote())

def get_all_node_degrees(self) -> Dict[str, int]:
return ray.get(self.actor.get_all_node_degrees.remote())

def get_node_count(self) -> int:
return ray.get(self.actor.get_node_count.remote())

def get_edge_count(self) -> int:
return ray.get(self.actor.get_edge_count.remote())

def get_connected_components(self, undirected: bool = True) -> List[Set[str]]:
return ray.get(self.actor.get_connected_components.remote(undirected))

def has_node(self, node_id: str) -> bool:
return ray.get(self.actor.has_node.remote(node_id))

Expand Down Expand Up @@ -239,10 +269,14 @@ def create_storage(backend: str, working_dir: str, namespace: str):
try:
actor_handle = ray.get_actor(actor_name)
except ValueError:
actor_handle = ray.remote(actor_class).options(
name=actor_name,
get_if_exists=True,
).remote(backend, working_dir, namespace)
actor_handle = (
ray.remote(actor_class)
.options(
name=actor_name,
get_if_exists=True,
)
.remote(backend, working_dir, namespace)
)
ray.get(actor_handle.ready.remote())
return proxy_class(actor_handle)

Expand Down
2 changes: 2 additions & 0 deletions graphgen/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ def execute(self, initial_ds: ray.data.Dataset) -> Dict[str, ray.data.Dataset]:

for node in sorted_nodes:
self._execute_node(node, initial_ds)
if getattr(node, "save_output", False):
self.datasets[node.id] = self.datasets[node.id].materialize()

output_nodes = [n for n in sorted_nodes if getattr(n, "save_output", False)]
return {node.id: self.datasets[node.id] for node in output_nodes}
10 changes: 9 additions & 1 deletion graphgen/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
from .evaluator import LengthEvaluator, MTLDEvaluator, RewardEvaluator, UniEvaluator
from .evaluator import (
AccuracyEvaluator,
ConsistencyEvaluator,
LengthEvaluator,
MTLDEvaluator,
RewardEvaluator,
StructureEvaluator,
UniEvaluator,
)
from .generator import (
AggregatedGenerator,
AtomicGenerator,
Expand Down
6 changes: 2 additions & 4 deletions graphgen/models/evaluator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
from .length_evaluator import LengthEvaluator
from .mtld_evaluator import MTLDEvaluator
from .reward_evaluator import RewardEvaluator
from .uni_evaluator import UniEvaluator
from .kg import AccuracyEvaluator, ConsistencyEvaluator, StructureEvaluator
from .qa import LengthEvaluator, MTLDEvaluator, RewardEvaluator, UniEvaluator
Loading