Skip to content

Commit 556341c

Browse files
feat: support Reader classes
1 parent e7b0268 commit 556341c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+273
-360
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ For any questions, please check [FAQ](https://github.com/open-sciencelab/GraphGe
100100
### Run Gradio Demo
101101

102102
```bash
103-
python -m webui.app.py
103+
python -m webui.app
104104
```
105105

106106
![ui](https://github.com/user-attachments/assets/3024e9bc-5d45-45f8-a4e6-b57bd2350d84)
@@ -148,7 +148,7 @@ For any questions, please check [FAQ](https://github.com/open-sciencelab/GraphGe
148148
```yaml
149149
# configs/cot_config.yaml
150150
input_data_type: raw
151-
input_file: resources/input_examples/raw_demo.jsonl
151+
input_file: resources/input_examples/jsonl_demo.jsonl
152152
output_data_type: cot
153153
tokenizer: cl100k_base
154154
# additional settings...

README_ZH.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ GraphGen 首先根据源文本构建细粒度的知识图谱,然后利用期
9999
### 运行 Gradio 演示
100100

101101
```bash
102-
python -m webui.app.py
102+
python -m webui.app
103103
```
104104

105105
![ui](https://github.com/user-attachments/assets/3024e9bc-5d45-45f8-a4e6-b57bd2350d84)
@@ -147,7 +147,7 @@ GraphGen 首先根据源文本构建细粒度的知识图谱,然后利用期
147147
```yaml
148148
# configs/cot_config.yaml
149149
input_data_type: raw
150-
input_file: resources/input_examples/raw_demo.jsonl
150+
input_file: resources/input_examples/jsonl_demo.jsonl
151151
output_data_type: cot
152152
tokenizer: cl100k_base
153153
# 其他设置...

baselines/EntiGraph/entigraph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ async def generate_qa_sft(content):
232232
parser.add_argument(
233233
"--input_file",
234234
help="Raw context jsonl path.",
235-
default="resources/input_examples/chunked_demo.json",
235+
default="resources/input_examples/json_demo.json",
236236
type=str,
237237
)
238238
parser.add_argument(

baselines/Genie/genie.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ async def process_chunk(content: str):
100100
parser.add_argument(
101101
"--input_file",
102102
help="Raw context jsonl path.",
103-
default="resources/input_examples/chunked_demo.json",
103+
default="resources/input_examples/json_demo.json",
104104
type=str,
105105
)
106106
parser.add_argument(

baselines/LongForm/longform.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ async def process_chunk(content: str):
6767
parser.add_argument(
6868
"--input_file",
6969
help="Raw context jsonl path.",
70-
default="resources/input_examples/chunked_demo.json",
70+
default="resources/input_examples/json_demo.json",
7171
type=str,
7272
)
7373
parser.add_argument(

baselines/SELF-QA/self-qa.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ async def process_chunk(content: str):
134134
parser.add_argument(
135135
"--input_file",
136136
help="Raw context jsonl path.",
137-
default="resources/input_examples/chunked_demo.json",
137+
default="resources/input_examples/json_demo.json",
138138
type=str,
139139
)
140140
parser.add_argument(

baselines/Wrap/wrap.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ async def process_chunk(content: str):
8787
parser.add_argument(
8888
"--input_file",
8989
help="Raw context jsonl path.",
90-
default="resources/input_examples/chunked_demo.json",
90+
default="resources/input_examples/json_demo.json",
9191
type=str,
9292
)
9393
parser.add_argument(
Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
21
from typing import Tuple
32

4-
__version__ = '20250416'
3+
__version__ = "20250416"
54
short_version = __version__
65

76

@@ -15,13 +14,13 @@ def parse_version_info(version_str: str) -> Tuple:
1514
tuple: A sequence of integer and string represents version.
1615
"""
1716
_version_info = []
18-
for x in version_str.split('.'):
17+
for x in version_str.split("."):
1918
if x.isdigit():
2019
_version_info.append(int(x))
21-
elif x.find('rc') != -1:
22-
patch_version = x.split('rc')
20+
elif x.find("rc") != -1:
21+
patch_version = x.split("rc")
2322
_version_info.append(int(patch_version[0]))
24-
_version_info.append(f'rc{patch_version[1]}')
23+
_version_info.append(f"rc{patch_version[1]}")
2524
return tuple(_version_info)
2625

2726

graphgen/bases/base_reader.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from abc import ABC, abstractmethod
2+
from typing import Any, Dict, List
3+
4+
5+
class BaseReader(ABC):
6+
"""
7+
Abstract base class for reading and processing data.
8+
"""
9+
10+
def __init__(self, text_column: str = "content"):
11+
self.text_column = text_column
12+
13+
@abstractmethod
14+
def read(self, file_path: str) -> List[Dict[str, Any]]:
15+
"""
16+
Read data from the specified file path.
17+
18+
:param file_path: Path to the input file.
19+
:return: List of dictionaries containing the data.
20+
"""

0 commit comments

Comments
 (0)