Skip to content

Commit c5407b5

Browse files
feat: add schema_guided_extraction config
1 parent b7af2e4 commit c5407b5

1 file changed

Lines changed: 4 additions & 18 deletions

File tree

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,8 @@
11
read:
22
input_file: resources/input_examples/jsonl_demo.jsonl # input file path, support json, jsonl, txt, pdf. See resources/input_examples for examples
33
split:
4-
chunk_size: 1024 # chunk size for text splitting
4+
chunk_size: 10240 # chunk size for text splitting
55
chunk_overlap: 100 # chunk overlap for text splitting
6-
search: # web search configuration
7-
enabled: false # whether to enable web search
8-
search_types: ["google"] # search engine types, support: google, bing, uniprot, wikipedia
9-
quiz_and_judge: # quiz and test whether the LLM masters the knowledge points
10-
enabled: true
11-
quiz_samples: 2 # number of quiz samples to generate
12-
re_judge: false # whether to re-judge the existing quiz samples
13-
partition: # graph partition configuration
14-
method: ece # ece is a custom partition method based on comprehension loss
15-
method_params:
16-
max_units_per_community: 20 # max nodes and edges per community
17-
min_units_per_community: 5 # min nodes and edges per community
18-
max_tokens_per_community: 10240 # max tokens per community
19-
unit_sampling: max_loss # unit sampling strategy, support: random, max_loss, min_loss
20-
generate:
21-
mode: aggregated # atomic, aggregated, multi_hop, cot, vqa
22-
data_format: ChatML # Alpaca, Sharegpt, ChatML
6+
extract:
7+
method: schema_guided # extraction method, support: schema_guided
8+
schema_file: resources/schemas/legal_contract.json # schema file path for schema_guided method

0 commit comments

Comments
 (0)