Skip to content

Commit 8b68414

Browse files
committed
improvement(app.py): use async for
1 parent 855217c commit 8b68414

File tree

6 files changed

+28
-20
lines changed

6 files changed

+28
-20
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,9 @@ Experience it on the [OpenXLab Application Center](https://openxlab.org.cn/apps/
108108

109109
### Workflow
110110
![workflow](resources/images/flow.png)
111+
112+
113+
## 🍀 Acknowledgements
114+
- [SiliconCloud](https://siliconflow.cn) Abundant LLM API, some models are free
115+
- [LightRAG](https://github.com/HKUDS/LightRAG) Simple and efficient graph retrieval solution
116+
- [ROGRAG](https://github.com/tpoisonooo/ROGRAG) ROGRAG: A Robustly Optimized GraphRAG Framework

graphgen/graphgen.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ async def async_split_chunks(self, data: Union[List[list], List[dict]], data_typ
8585

8686
cur_index = 1
8787
doc_number = len(new_docs)
88-
for doc_key, doc in tqdm_async(
89-
new_docs.items(), desc="Chunking documents", unit="doc"
88+
async for doc_key, doc in tqdm_async(
89+
new_docs.items(), desc="[1/4]Chunking documents", unit="doc"
9090
):
9191
chunks = {
9292
compute_content_hash(dp["content"], prefix="chunk-"): {
@@ -117,7 +117,7 @@ async def async_split_chunks(self, data: Union[List[list], List[dict]], data_typ
117117
logger.warning("All docs are already in the storage")
118118
return {}
119119
logger.info("[New Docs] inserting %d docs", len(new_docs))
120-
for doc in tqdm_async(data, desc="Chunking documents", unit="doc"):
120+
async for doc in tqdm_async(data, desc="[1/4]Chunking documents", unit="doc"):
121121
doc_str = "".join([chunk['content'] for chunk in doc])
122122
for chunk in doc:
123123
chunk_key = compute_content_hash(chunk['content'], prefix="chunk-")

graphgen/operators/extract_kg.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,16 +103,16 @@ async def _process_single_content(chunk: Chunk, max_loop: int = 3):
103103

104104
results = []
105105
chunk_number = len(chunks)
106-
for result in tqdm_async(
106+
async for result in tqdm_async(
107107
asyncio.as_completed([_process_single_content(c) for c in chunks]),
108108
total=len(chunks),
109-
desc="Extracting entities and relationships from chunks",
109+
desc="[3/4]Extracting entities and relationships from chunks",
110110
unit="chunk",
111111
):
112112
try:
113113
results.append(await result)
114114
if progress_bar is not None:
115-
progress_bar(len(results) / chunk_number, desc="Extracting entities and relationships from chunks")
115+
progress_bar(len(results) / chunk_number, desc="[3/4]Extracting entities and relationships from chunks")
116116
except Exception as e: # pylint: disable=broad-except
117117
logger.error("Error occurred while extracting entities and relationships from chunks: %s", e)
118118

graphgen/operators/traverse_graph.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -292,11 +292,11 @@ async def _process_single_batch(
292292

293293
for result in tqdm_async(asyncio.as_completed(
294294
[_process_single_batch(batch) for batch in processing_batches]
295-
), total=len(processing_batches), desc="Generating QAs"):
295+
), total=len(processing_batches), desc="[4/4]Generating QAs"):
296296
try:
297297
results.update(await result)
298298
if progress_bar is not None:
299-
progress_bar(len(results) / len(processing_batches), desc="Generating QAs")
299+
progress_bar(len(results) / len(processing_batches), desc="[4/4]Generating QAs")
300300
except Exception as e: # pylint: disable=broad-except
301301
logger.error("Error occurred while generating QA: %s", e)
302302

@@ -398,12 +398,12 @@ async def _generate_question(
398398
for result in tqdm_async(
399399
asyncio.as_completed([_generate_question(task) for task in tasks]),
400400
total=len(tasks),
401-
desc="Generating QAs"
401+
desc="[4/4]Generating QAs"
402402
):
403403
try:
404404
results.update(await result)
405405
if progress_bar is not None:
406-
progress_bar(len(results) / len(tasks), desc="Generating QAs")
406+
progress_bar(len(results) / len(tasks), desc="[4/4]Generating QAs")
407407
except Exception as e: # pylint: disable=broad-except
408408
logger.error("Error occurred while generating QA: %s", e)
409409
return results
@@ -507,15 +507,15 @@ async def _process_single_batch(
507507
logger.error("Error occurred while processing batch: %s", e)
508508
return {}
509509

510-
for result in tqdm_async(
510+
async for result in tqdm_async(
511511
asyncio.as_completed([_process_single_batch(batch) for batch in processing_batches]),
512512
total=len(processing_batches),
513-
desc="Generating QAs"
513+
desc="[4/4]Generating QAs"
514514
):
515515
try:
516516
results.update(await result)
517517
if progress_bar is not None:
518-
progress_bar(len(results) / len(processing_batches), desc="Generating QAs")
518+
progress_bar(len(results) / len(processing_batches), desc="[4/4]Generating QAs")
519519
except Exception as e: # pylint: disable=broad-except
520520
logger.error("Error occurred while generating QA: %s", e)
521521
return results

webui/app.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def sum_tokens(client):
119119
# Initialize GraphGen
120120
graph_gen = init_graph_gen(config, env)
121121
graph_gen.clear()
122-
progress(0.2, "Model Initialized")
122+
progress(0.2, "[2/4]Model Initialized")
123123

124124
graph_gen.progress_bar = progress
125125

@@ -378,7 +378,7 @@ def sum_tokens(client):
378378
with gr.Column():
379379
rpm = gr.Slider(
380380
label="RPM",
381-
minimum=500,
381+
minimum=10,
382382
maximum=10000,
383383
value=1000,
384384
step=100,
@@ -388,7 +388,7 @@ def sum_tokens(client):
388388
tpm = gr.Slider(
389389
label="TPM",
390390
minimum=5000,
391-
maximum=100000,
391+
maximum=5000000,
392392
value=50000,
393393
step=1000,
394394
interactive=True,
@@ -435,9 +435,11 @@ def sum_tokens(client):
435435
test_api_connection,
436436
inputs=[base_url, api_key, synthesizer_model],
437437
outputs=[])
438-
test_connection_btn.click(test_api_connection,
439-
inputs=[base_url, api_key, trainee_model],
440-
outputs=[])
438+
439+
if if_trainee_model.value:
440+
test_connection_btn.click(test_api_connection,
441+
inputs=[base_url, api_key, trainee_model],
442+
outputs=[])
441443

442444
expand_method.change(lambda method:
443445
(gr.update(visible=method == "max_width"),

webui/translation.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
},
1717
"zh": {
1818
"Title": "✨开箱即用的LLM训练数据生成框架✨",
19-
"Intro": "是一个基于知识图谱的合成数据生成框架,旨在解决知识密集型问答生成的挑战\n\n 上传你的文本块(如农业、医疗、海洋知识),填写 LLM api key,即可在线生成 **[LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory)**、**[xtuner](https://github.com/InternLM/xtuner)** 所需训练数据。结束后我们将自动删除用户信息。",
19+
"Intro": "是一个基于知识图谱的数据合成框架,旨在知识密集型任务中生成问答\n\n 上传你的文本块(如农业、医疗、海洋知识),填写 LLM api key,即可在线生成 **[LLaMA-Factory](https://github.com/hiyouga/LLaMA-Factory)**、**[xtuner](https://github.com/InternLM/xtuner)** 所需训练数据。结束后我们将自动删除用户信息。",
2020
"Use Trainee Model": "使用Trainee Model来识别知识盲区,使用硅基流动时请保持禁用",
2121
"Base URL Info": "调用模型API的URL,默认使用硅基流动",
2222
"Synthesizer Model Info": "用于构建知识图谱和生成问答的模型",

0 commit comments

Comments
 (0)