Skip to content

Commit 86760e9

Browse files
feat: make storage backend configuragble
1 parent ee6a927 commit 86760e9

File tree

12 files changed

+42
-14
lines changed

12 files changed

+42
-14
lines changed

examples/extract/extract_schema_guided/schema_guided_extraction_config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
global_params:
22
working_dir: cache
3+
kv_backend: rocksdb # key-value store backend, support: rocksdb, json_kv
34

45
nodes:
56
- id: read

examples/generate/generate_aggregated_qa/aggregated_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
global_params:
22
working_dir: cache
3+
graph_backend: kuzu # graph database backend, support: kuzu, networkx
4+
kv_backend: rocksdb # key-value store backend, support: rocksdb, json_kv
35

46
nodes:
57
- id: read_files # id is unique in the pipeline, and can be referenced by other steps

examples/generate/generate_atomic_qa/atomic_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
global_params:
22
working_dir: cache
3+
graph_backend: kuzu # graph database backend, support: kuzu, networkx
4+
kv_backend: rocksdb # key-value store backend, support: rocksdb, json_kv
35

46
nodes:
57
- id: read

examples/generate/generate_cot_qa/cot_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
global_params:
22
working_dir: cache
3+
graph_backend: kuzu # graph database backend, support: kuzu, networkx
4+
kv_backend: rocksdb # key-value store backend, support: rocksdb, json_kv
35

46
nodes:
57
- id: read

examples/generate/generate_multi_hop_qa/multi_hop_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
global_params:
22
working_dir: cache
3+
graph_backend: kuzu # graph database backend, support: kuzu, networkx
4+
kv_backend: rocksdb # key-value store backend, support: rocksdb, json_kv
35

46
nodes:
57
- id: read

examples/generate/generate_vqa/vqa_config.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
global_params:
22
working_dir: cache
3+
graph_backend: kuzu # graph database backend, support: kuzu, networkx
4+
kv_backend: rocksdb # key-value store backend, support: rocksdb, json_kv
35

46
nodes:
57
- id: read

graphgen/models/llm/local/vllm_wrapper.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1-
import uuid
21
import math
2+
import uuid
33
from typing import Any, List, Optional
4+
45
from graphgen.bases.base_llm_wrapper import BaseLLMWrapper
56
from graphgen.bases.datatypes import Token
67

8+
79
class VLLMWrapper(BaseLLMWrapper):
810
"""
911
Async inference backend based on vLLM.
1012
"""
13+
1114
def __init__(
1215
self,
1316
model: str,
@@ -33,7 +36,7 @@ def __init__(
3336
tensor_parallel_size=tensor_parallel_size,
3437
gpu_memory_utilization=gpu_memory_utilization,
3538
trust_remote_code=kwargs.get("trust_remote_code", True),
36-
disable_log_stats=False,
39+
disable_log_stats=False,
3740
)
3841
self.engine = AsyncLLMEngine.from_engine_args(engine_args)
3942
self.temperature = temperature
@@ -96,7 +99,11 @@ async def generate_topk_per_token(
9699
async for request_output in result_generator:
97100
final_output = request_output
98101

99-
if not final_output or not final_output.outputs or not final_output.outputs[0].logprobs:
102+
if (
103+
not final_output
104+
or not final_output.outputs
105+
or not final_output.outputs[0].logprobs
106+
):
100107
return []
101108

102109
top_logprobs = final_output.outputs[0].logprobs[0]

graphgen/operators/build_kg/build_kg_service.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212

1313

1414
class BuildKGService(BaseOperator):
15-
def __init__(self, working_dir: str = "cache"):
15+
def __init__(self, working_dir: str = "cache", graph_backend: str = "kuzu"):
1616
super().__init__(working_dir=working_dir, op_name="build_kg_service")
1717
self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
1818
self.graph_storage: BaseGraphStorage = init_storage(
19-
backend="kuzu", working_dir=working_dir, namespace="graph"
19+
backend=graph_backend, working_dir=working_dir, namespace="graph"
2020
)
2121

2222
def process(self, batch: pd.DataFrame) -> pd.DataFrame:

graphgen/operators/chunk/chunk_service.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,14 @@ def split_chunks(text: str, language: str = "en", **kwargs) -> list:
4242

4343

4444
class ChunkService(BaseOperator):
45-
def __init__(self, working_dir: str = "cache", **chunk_kwargs):
45+
def __init__(
46+
self, working_dir: str = "cache", kv_backend: str = "rocksdb", **chunk_kwargs
47+
):
4648
super().__init__(working_dir=working_dir, op_name="chunk_service")
4749
tokenizer_model = os.getenv("TOKENIZER_MODEL", "cl100k_base")
4850
self.tokenizer_instance: Tokenizer = Tokenizer(model_name=tokenizer_model)
4951
self.chunk_storage = init_storage(
50-
backend="rocksdb",
52+
backend=kv_backend,
5153
working_dir=working_dir,
5254
namespace="chunk",
5355
)

graphgen/operators/judge/judge_service.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@
1111
class JudgeService(BaseOperator):
1212
"""Service for judging graph edges and nodes using a trainee LLM."""
1313

14-
def __init__(self, working_dir: str = "cache"):
14+
def __init__(self, working_dir: str = "cache", graph_backend: str = "kuzu"):
1515
super().__init__(working_dir=working_dir, op_name="judge_service")
1616
self.llm_client: BaseLLMWrapper = init_llm("trainee")
1717
self.graph_storage: BaseGraphStorage = init_storage(
18-
backend="kuzu",
18+
backend=graph_backend,
1919
working_dir=working_dir,
2020
namespace="graph",
2121
)

0 commit comments

Comments
 (0)