Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion graphgen/operators/build_kg/build_kg_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@


class BuildKGService(BaseOperator):
def __init__(self, working_dir: str = "cache", graph_backend: str = "kuzu"):
def __init__(
self, working_dir: str = "cache", graph_backend: str = "kuzu", **build_kwargs
):
super().__init__(working_dir=working_dir, op_name="build_kg_service")
self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
self.graph_storage: BaseGraphStorage = init_storage(
backend=graph_backend, working_dir=working_dir, namespace="graph"
)
self.build_kwargs = build_kwargs
self.max_loop: int = int(self.build_kwargs.get("max_loop", 3))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The direct conversion using int() can raise a ValueError if the max_loop value in build_kwargs is not a valid integer (e.g., an empty string or non-numeric text). This would cause the operator initialization to fail. It's safer to handle this potential error with a try-except block to ensure robustness, log a warning, and fall back to a default value.

Suggested change
self.max_loop: int = int(self.build_kwargs.get("max_loop", 3))
max_loop_val = self.build_kwargs.get("max_loop", 3)
try:
self.max_loop: int = int(max_loop_val)
except (ValueError, TypeError):
self.logger.warning(
f"Invalid value for max_loop: '{max_loop_val}'. Using default value of 3."
)
self.max_loop = 3


def process(self, batch: pd.DataFrame) -> pd.DataFrame:
docs = batch.to_dict(orient="records")
Expand Down Expand Up @@ -46,6 +50,7 @@ def build_kg(self, chunks: List[Chunk]) -> None:
llm_client=self.llm_client,
kg_instance=self.graph_storage,
chunks=text_chunks,
max_loop=self.max_loop,
)
if len(mm_chunks) == 0:
logger.info("All multi-modal chunks are already in the storage")
Expand Down
4 changes: 3 additions & 1 deletion graphgen/operators/build_kg/build_text_kg.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,17 @@ def build_text_kg(
llm_client: BaseLLMWrapper,
kg_instance: BaseGraphStorage,
chunks: List[Chunk],
max_loop: int = 3,
):
"""
:param llm_client: Synthesizer LLM model to extract entities and relationships
:param kg_instance
:param chunks
:param max_loop: Maximum number of loops for entity and relationship extraction
:return:
"""

kg_builder = LightRAGKGBuilder(llm_client=llm_client, max_loop=3)
kg_builder = LightRAGKGBuilder(llm_client=llm_client, max_loop=max_loop)

results = run_concurrent(
kg_builder.extract,
Expand Down
Loading