Skip to content

Commit 2192ee8

Browse files
fix: fix lint errors
1 parent 27ab285 commit 2192ee8

File tree

3 files changed

+18
-61
lines changed

3 files changed

+18
-61
lines changed

graphgen/graphgen.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -108,16 +108,16 @@ async def insert(self, read_config: Dict, split_config: Dict):
108108
self.progress_bar,
109109
)
110110

111-
# _add_chunk_keys = await self.chunks_storage.filter_keys(
112-
# list(inserting_chunks.keys())
113-
# )
114-
# inserting_chunks = {
115-
# k: v for k, v in inserting_chunks.items() if k in _add_chunk_keys
116-
# }
117-
#
118-
# if len(inserting_chunks) == 0:
119-
# logger.warning("All chunks are already in the storage")
120-
# return
111+
_add_chunk_keys = await self.chunks_storage.filter_keys(
112+
list(inserting_chunks.keys())
113+
)
114+
inserting_chunks = {
115+
k: v for k, v in inserting_chunks.items() if k in _add_chunk_keys
116+
}
117+
118+
if len(inserting_chunks) == 0:
119+
logger.warning("All chunks are already in the storage")
120+
return
121121

122122
logger.info("[New Chunks] inserting %d chunks", len(inserting_chunks))
123123
await self.chunks_storage.upsert(inserting_chunks)

graphgen/operators/build_kg/build_kg.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22

33
import gradio as gr
44

5+
from graphgen.bases import BaseLLMWrapper
56
from graphgen.bases.base_storage import BaseGraphStorage
67
from graphgen.bases.datatypes import Chunk
7-
from graphgen.models import OpenAIClient
88
from graphgen.utils import logger
99

1010
from .build_mm_kg import build_mm_kg
@@ -13,7 +13,7 @@
1313

1414

1515
async def build_kg(
16-
llm_client: OpenAIClient,
16+
llm_client: BaseLLMWrapper,
1717
kg_instance: BaseGraphStorage,
1818
chunks: List[Chunk],
1919
anchor_type: Optional[str] = None,
Lines changed: 6 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,17 @@
11
import json
2-
import re
32
from typing import List
43

54
import gradio as gr
65

6+
from graphgen.bases import BaseLLMWrapper
77
from graphgen.bases.base_storage import BaseGraphStorage
88
from graphgen.bases.datatypes import Chunk
9-
from graphgen.models import OpenAIClient
109
from graphgen.templates import PROTEIN_ANCHOR_PROMPT, PROTEIN_KG_EXTRACTION_PROMPT
11-
from graphgen.utils import (
12-
detect_main_language,
13-
handle_single_entity_extraction,
14-
handle_single_relationship_extraction,
15-
logger,
16-
run_concurrent,
17-
split_string_by_multi_markers,
18-
)
10+
from graphgen.utils import detect_main_language, logger, run_concurrent
1911

2012

2113
async def build_mo_kg(
22-
llm_client: OpenAIClient,
14+
llm_client: BaseLLMWrapper,
2315
kg_instance: BaseGraphStorage,
2416
chunks: List[Chunk],
2517
progress_bar: gr.Progress = None,
@@ -73,48 +65,13 @@ async def extract_mo_info(chunk: Chunk):
7365
# logger.warning("Failed to search for protein info: %s", e)
7466
# search_results = {}
7567

76-
# 组织成文本
7768
mo_text = "\n".join([f"{k}: {v}" for k, v in merged.items()])
7869
lang = detect_main_language(mo_text)
7970
prompt = PROTEIN_KG_EXTRACTION_PROMPT[lang].format(
8071
input_text=mo_text,
8172
**PROTEIN_KG_EXTRACTION_PROMPT["FORMAT"],
8273
)
8374
kg_output = await llm_client.generate_answer(prompt)
84-
85-
logger.debug("Image chunk extraction result: %s", kg_output)
86-
87-
# parse the result
88-
records = split_string_by_multi_markers(
89-
kg_output,
90-
[
91-
PROTEIN_KG_EXTRACTION_PROMPT["FORMAT"]["record_delimiter"],
92-
PROTEIN_KG_EXTRACTION_PROMPT["FORMAT"]["completion_delimiter"],
93-
],
94-
)
95-
96-
print(records)
97-
raise NotImplementedError
98-
99-
nodes = defaultdict(list)
100-
edges = defaultdict(list)
101-
102-
for record in records:
103-
match = re.search(r"\((.*)\)", record)
104-
if not match:
105-
continue
106-
inner = match.group(1)
107-
108-
attributes = split_string_by_multi_markers(
109-
inner, [PROTEIN_KG_EXTRACTION_PROMPT["FORMAT"]["tuple_delimiter"]]
110-
)
111-
112-
entity = await handle_single_entity_extraction(attributes, "temp")
113-
if entity is not None:
114-
nodes[entity["entity_name"]].append(entity)
115-
continue
116-
117-
relation = await handle_single_relationship_extraction(attributes, "temp")
118-
if relation is not None:
119-
key = (relation["src_id"], relation["tgt_id"])
120-
edges[key].append(relation)
75+
print(kg_output)
76+
# TODO: parse kg_output and insert into kg_instance
77+
return kg_instance

0 commit comments

Comments
 (0)