Skip to content

Commit 978b76c

Browse files
wip: split prompts
1 parent 41015a2 commit 978b76c

File tree

10 files changed

+451
-383
lines changed

10 files changed

+451
-383
lines changed

graphgen/models/evaluator/kg/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
"""
2+
Knowledge Graph Quality Evaluator
3+
4+
This module provides comprehensive quality evaluation for knowledge graphs,
5+
1. accuracy assessment (entity/relation/triple validation),
6+
2. consistency assessment (attribute conflict detection), and structural
7+
3. robustness assessment (noise ratio, connectivity, degree distribution).
8+
"""
9+
110
from .accuracy_evaluator import AccuracyEvaluator
211
from .consistency_evaluator import ConsistencyEvaluator
312
from .structure_evaluator import StructureEvaluator

graphgen/models/evaluator/kg/accuracy_evaluator.py

Lines changed: 87 additions & 125 deletions
Large diffs are not rendered by default.

graphgen/models/evaluator/kg/consistency_evaluator.py

Lines changed: 94 additions & 149 deletions
Large diffs are not rendered by default.

graphgen/models/evaluator/kg/kg_quality_evaluator.py

Lines changed: 0 additions & 107 deletions
This file was deleted.

graphgen/operators/evaluate/evaluate_service.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,9 @@ class EvaluateService(BaseOperator):
1111
"""
1212

1313
def __init__(self, working_dir: str = "cache", metrics: list[str] = None):
14-
# optional 传入 graph
1514
super().__init__(working_dir=working_dir, op_name="evaluate_service")
1615
self.llm_client: BaseLLMWrapper = init_llm("synthesizer")
17-
self.metrics = metrics or []
16+
self.metrics = metrics
1817

1918
self.evaluators = {
2019
"xxx": "xxxEvaluator"
@@ -24,6 +23,10 @@ def __init__(self, working_dir: str = "cache", metrics: list[str] = None):
2423
xx, xx, xx
2524
)
2625

26+
def _init_evaluators(self):
27+
for metric in self.metrics:
28+
29+
2730
def process(self, batch: pd.DataFrame) -> pd.DataFrame:
2831
items = batch.to_dict(orient="records")
2932
return pd.DataFrame(self.evaluate(items))

graphgen/templates/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from .coreference_resolution import COREFERENCE_RESOLUTION_PROMPT
22
from .description_rephrasing import DESCRIPTION_REPHRASING_PROMPT
3+
from .evaluation import ACCURACY_EVALUATION_PROMPT, CONSISTENCY_EVALUATION_PROMPT
34
from .extraction import SCHEMA_GUIDED_EXTRACTION_PROMPT
45
from .generation import (
56
AGGREGATED_GENERATION_PROMPT,
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .kg import ACCURACY_EVALUATION_PROMPT
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .accuracy_evaluation import ACCURACY_EVALUATION_PROMPT
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
ENTITY_EVALUATION_PROMPT_ZH = """你是一个知识图谱质量评估专家。你的任务是从给定的文本块和提取的实体列表,评估实体提取的质量。
2+
3+
评估维度:
4+
1. ACCURACY (准确性, 权重: 40%): 提取的实体是否正确,是否有误提取或错误识别
5+
2. COMPLETENESS (完整性, 权重: 40%): 是否遗漏了文本中的重要实体
6+
3. PRECISION (精确性, 权重: 20%): 提取的实体是否精确,命名是否准确
7+
8+
评分标准(每个维度 0-1 分):
9+
- EXCELLENT (0.8-1.0): 高质量提取
10+
- GOOD (0.6-0.79): 良好质量,有少量问题
11+
- ACCEPTABLE (0.4-0.59): 可接受,有明显问题
12+
- POOR (0.0-0.39): 质量差,需要改进
13+
14+
综合评分 = 0.4 × Accuracy + 0.4 × Completeness + 0.2 × Precision
15+
16+
请评估以下内容:
17+
18+
原始文本块:
19+
{chunk_content}
20+
21+
提取的实体列表:
22+
{extracted_entities}
23+
24+
请以 JSON 格式返回评估结果:
25+
{{
26+
"accuracy": <0-1之间的浮点数>,
27+
"completeness": <0-1之间的浮点数>,
28+
"precision": <0-1之间的浮点数>,
29+
"overall_score": <综合评分>,
30+
"accuracy_reasoning": "<准确性评估理由>",
31+
"completeness_reasoning": "<完整性评估理由,包括遗漏的重要实体>",
32+
"precision_reasoning": "<精确性评估理由>",
33+
"issues": ["<发现的问题列表>"]
34+
}}
35+
"""
36+
37+
ENTITY_EVALUATION_PROMPT_EN = """You are a Knowledge Graph Quality Assessment Expert. \
38+
Your task is to evaluate the quality of entity extraction from a given text block and extracted entity list.
39+
40+
Evaluation Dimensions:
41+
1. ACCURACY (Weight: 40%): Whether the extracted entities are correct, and if there are any false extractions or misidentifications
42+
2. COMPLETENESS (Weight: 40%): Whether important entities from the text are missing
43+
3. PRECISION (Weight: 20%): Whether the extracted entities are precise and accurately named
44+
45+
Scoring Criteria (0-1 scale for each dimension):
46+
- EXCELLENT (0.8-1.0): High-quality extraction
47+
- GOOD (0.6-0.79): Good quality with minor issues
48+
- ACCEPTABLE (0.4-0.59): Acceptable with noticeable issues
49+
- POOR (0.0-0.39): Poor quality, needs improvement
50+
51+
Overall Score = 0.4 × Accuracy + 0.4 × Completeness + 0.2 × Precision
52+
53+
Please evaluate the following:
54+
55+
Original Text Block:
56+
{chunk_content}
57+
58+
Extracted Entity List:
59+
{extracted_entities}
60+
61+
Please return the evaluation result in JSON format:
62+
{{
63+
"accuracy": <float between 0-1>,
64+
"completeness": <float between 0-1>,
65+
"precision": <float between 0-1>,
66+
"overall_score": <overall score>,
67+
"accuracy_reasoning": "<reasoning for accuracy assessment>",
68+
"completeness_reasoning": "<reasoning for completeness assessment, including important missing entities>",
69+
"precision_reasoning": "<reasoning for precision assessment>",
70+
"issues": ["<list of identified issues>"]
71+
}}
72+
"""
73+
74+
RELATION_EVALUATION_PROMPT_ZH = """你是一个知识图谱质量评估专家。你的任务是从给定的文本块和提取的关系列表,评估关系抽取的质量。
75+
76+
评估维度:
77+
1. ACCURACY (准确性, 权重: 40%): 提取的关系是否正确,关系描述是否准确
78+
2. COMPLETENESS (完整性, 权重: 40%): 是否遗漏了文本中的重要关系
79+
3. PRECISION (精确性, 权重: 20%): 关系描述是否精确,是否过于宽泛
80+
81+
评分标准(每个维度 0-1 分):
82+
- EXCELLENT (0.8-1.0): 高质量提取
83+
- GOOD (0.6-0.79): 良好质量,有少量问题
84+
- ACCEPTABLE (0.4-0.59): 可接受,有明显问题
85+
- POOR (0.0-0.39): 质量差,需要改进
86+
87+
综合评分 = 0.4 × Accuracy + 0.4 × Completeness + 0.2 × Precision
88+
89+
请评估以下内容:
90+
91+
原始文本块:
92+
{chunk_content}
93+
94+
提取的关系列表:
95+
{extracted_relations}
96+
97+
请以 JSON 格式返回评估结果:
98+
{{
99+
"accuracy": <0-1之间的浮点数>,
100+
"completeness": <0-1之间的浮点数>,
101+
"precision": <0-1之间的浮点数>,
102+
"overall_score": <综合评分>,
103+
"accuracy_reasoning": "<准确性评估理由>",
104+
"completeness_reasoning": "<完整性评估理由,包括遗漏的重要关系>",
105+
"precision_reasoning": "<精确性评估理由>",
106+
"issues": ["<发现的问题列表>"]
107+
}}
108+
"""
109+
110+
RELATION_EVALUATION_PROMPT_EN = """You are a Knowledge Graph Quality Assessment Expert. \
111+
Your task is to evaluate the quality of relation extraction from a given text block and extracted relation list.
112+
113+
Evaluation Dimensions:
114+
1. ACCURACY (Weight: 40%): Whether the extracted relations are correct and the relation descriptions are accurate
115+
2. COMPLETENESS (Weight: 40%): Whether important relations from the text are missing
116+
3. PRECISION (Weight: 20%): Whether the relation descriptions are precise and not overly broad
117+
118+
Scoring Criteria (0-1 scale for each dimension):
119+
- EXCELLENT (0.8-1.0): High-quality extraction
120+
- GOOD (0.6-0.79): Good quality with minor issues
121+
- ACCEPTABLE (0.4-0.59): Acceptable with noticeable issues
122+
- POOR (0.0-0.39): Poor quality, needs improvement
123+
124+
Overall Score = 0.4 × Accuracy + 0.4 × Completeness + 0.2 × Precision
125+
126+
Please evaluate the following:
127+
128+
Original Text Block:
129+
{chunk_content}
130+
131+
Extracted Relation List:
132+
{extracted_relations}
133+
134+
Please return the evaluation result in JSON format:
135+
{{
136+
"accuracy": <float between 0-1>,
137+
"completeness": <float between 0-1>,
138+
"precision": <float between 0-1>,
139+
"overall_score": <overall score>,
140+
"accuracy_reasoning": "<reasoning for accuracy assessment>",
141+
"completeness_reasoning": "<reasoning for completeness assessment, including important missing relations>",
142+
"precision_reasoning": "<reasoning for precision assessment>",
143+
"issues": ["<list of identified issues>"]
144+
}}
145+
"""
146+
147+
ACCURACY_EVALUATION_PROMPT = {
148+
"zh": {
149+
"ENTITY": ENTITY_EVALUATION_PROMPT_ZH,
150+
"RELATION": RELATION_EVALUATION_PROMPT_ZH,
151+
},
152+
"en": {
153+
"ENTITY": ENTITY_EVALUATION_PROMPT_EN,
154+
"RELATION": RELATION_EVALUATION_PROMPT_EN,
155+
},
156+
}

0 commit comments

Comments
 (0)