diff --git a/docs/en/notes/api/operators/pdf2vqa/generate/LLMOutputParser.md b/docs/en/notes/api/operators/pdf2vqa/generate/LLMOutputParser.md
index 249001a00..68662c931 100644
--- a/docs/en/notes/api/operators/pdf2vqa/generate/LLMOutputParser.md
+++ b/docs/en/notes/api/operators/pdf2vqa/generate/LLMOutputParser.md
@@ -1,7 +1,7 @@
 ---
 title: LLMOutputParser
 createTime: 2026/01/20 20:15:00
-permalink: /en/api/operators/core_text/parse/llmoutputparser/
+permalink: /en/api/operators/pdf2vqa/generate/llmoutputparser/
 ---
 
 ## 📘 Overview
@@ -16,8 +16,7 @@ The core functionalities of this operator include:
 ## `__init__` Function
 
 ```python
-def __init__(self, 
-             mode: Literal['question', 'answer'], 
+def __init__(self,  
              output_dir: str, 
              intermediate_dir: str = "intermediate"
              )
@@ -28,7 +27,6 @@ def __init__(self,
 
 | Parameter | Type | Default | Description |
 | --- | --- | --- | --- |
-| **mode** | str | Required | Parsing mode. Options are `'question'` or `'answer'`, which affects the output filename and the image subdirectory name. |
 | **output_dir** | str | Required | The final root directory for structured data and images. |
 | **intermediate_dir** | str | "intermediate" | The intermediate directory where original image resources processed by MinerU are located. |
 
@@ -76,7 +74,7 @@ Suppose the LLM returns: `<question>1, 3</question>`
 The operator looks up entries with `id` 1 and 3 in the layout JSON:
 
 * If `id: 1` is the text "What is AI?" and `id: 3` is the image `path/to/img.png`.
-* The restored content will be: `What is AI?\n![image](images/img.png)`.
+* The restored content will be: `What is AI?\n![image](vqa_images/img.png)`.
 
 ### 2. Output File Structure
 
@@ -86,7 +84,7 @@ After execution, the directory structure under `output_dir` (referenced as `cach
 output_dir/
 └── {name}/
     ├── extracted_questions.jsonl  # Structured data
-    └── question_images/           # Automatically synchronized images
+    └── vqa_images/           # Automatically synchronized images
         ├── img1.png
         └── ...
 
@@ -96,7 +94,7 @@ output_dir/
 
 ```json
 {
-  "question": "Please analyze the image below:\n![image](question_images/fig1.png)",
+  "question": "Please analyze the image below:\n![image](vqa_images/img1.png)",
   "answer": "This is the parsed answer text.",
   "solution": "Detailed step-by-step solution...",
   "label": "1",
diff --git a/docs/en/notes/api/operators/pdf2vqa/generate/MineruToLLMInputOperator.md b/docs/en/notes/api/operators/pdf2vqa/generate/MineruToLLMInputOperator.md
index 6d056fe5a..523dbafee 100644
--- a/docs/en/notes/api/operators/pdf2vqa/generate/MineruToLLMInputOperator.md
+++ b/docs/en/notes/api/operators/pdf2vqa/generate/MineruToLLMInputOperator.md
@@ -1,7 +1,7 @@
 ---
 title: MinerU2LLMInputOperator
 createTime: 2026/01/20 20:10:00
-permalink: /en/api/operators/core_text/convert/mineru2llminputoperator/
+permalink: /en/api/operators/pdf2vqa/generate/mineru2llminputoperator/
 ---
 
 ## 📘 Overview
diff --git a/docs/en/notes/api/operators/pdf2vqa/generate/QAMerger.md b/docs/en/notes/api/operators/pdf2vqa/generate/QAMerger.md
index 3b7a70e01..6dfadb0d4 100644
--- a/docs/en/notes/api/operators/pdf2vqa/generate/QAMerger.md
+++ b/docs/en/notes/api/operators/pdf2vqa/generate/QAMerger.md
@@ -1,7 +1,7 @@
 ---
 title: QA_Merger
 createTime: 2026/01/20 20:25:00
-permalink: /en/api/operators/core_text/merge/qamerger/
+permalink: /en/api/operators/pdf2vqa/generate/qamerger/
 ---
 
 ## 📘 Overview
diff --git a/docs/en/notes/guide/quickstart/PDFVQAExtract.md b/docs/en/notes/guide/quickstart/PDFVQAExtract.md
index cedfea0f7..c80527992 100644
--- a/docs/en/notes/guide/quickstart/PDFVQAExtract.md
+++ b/docs/en/notes/guide/quickstart/PDFVQAExtract.md
@@ -22,7 +22,7 @@ Major stages:
 
 ## 2. Quick Start
 
-### Step 1: Install Dataflow (and MinerU)
+### Step 1: Install Dataflow
 Install Dataflow:
 ```shell
 pip install "open-dataflow[pdf2vqa]"
@@ -35,12 +35,6 @@ cd Dataflow
 pip install -e ".[pdf2vqa]"
 ```
 
-Then install MinerU and download models:
-```shell
-pip install "mineru[vllm]>=2.5.0,<2.7.0"
-mineru-models-download
-```
-
 ### Step 2: Create a workspace
 ```shell
 cd /your/working/directory
@@ -55,13 +49,18 @@ dataflow init
 You can then add your pipeline script under `pipelines/` or any custom path.
 
 ### Step 4: Configure API credentials
+`DF_API_KEY` is for calling LLM API, and `MINERU_API_KEY` is for calling MinerU for layout analysis.
+`MINERU_API_KEY` can be obtained from https://mineru.net/apiManage/token, and `DF_API_KEY` can be obtained from your LLM provider (e.g., OpenAI, Google Gemini, etc.). Set them as environment variables:
+
 Linux / macOS:
 ```shell
 export DF_API_KEY="sk-xxxxx"
+export MINERU_API_KEY="sk2-xxxxx"
 ```
 Windows PowerShell:
 ```powershell
 $env:DF_API_KEY = "sk-xxxxx"
+$env:MINERU_API_KEY = "sk2-xxxxx"
 ```
 In the pipeline script, set your API endpoint:
 ```python
@@ -72,12 +71,7 @@ self.llm_serving = APILLMServing_request(
     max_workers=100,
 )
 ```
-and set MinerU backend ('vlm-vllm-engine' or 'vlm-transformers') and LLM max token length (recommended not to exceed 128000 to avoid LLM forgetting details).
-**Caution: The pipeline was only tested with the `vlm` backend; compatibility with the `pipeline` backend is uncertain due to format differences. Using the `vlm` backend is recommended.**
-The `vlm-vllm-engine` backend requires GPU support.
-```python
-self.mineru_executor = FileOrURLToMarkdownConverterBatch(intermediate_dir = "intermediate", mineru_backend="vlm-vllm-engine")
-```
+and set LLM max token length (recommended not to exceed 128000 to avoid LLM forgetting details).
 
 ```python
 self.vqa_extractor = ChunkedPromptedGenerator(
@@ -97,16 +91,12 @@ You can also import the operators into other workflows; the remainder of this do
 
 ### 1. Input data
 
-Each job is defined by a JSONL row. Two modes are supported:
+Each job is defined by a JSONL row. `input_pdf_paths` can be a single PDF or a list of PDFs (questions appear before answers). `name` is an identifier for the job. Questions and answers can be interleaved or separated; they can come from the same PDF or different PDFs.
 
-- **QA-Separated PDFs**
-  ```jsonl
-  {"question_pdf_path": "/abs/path/questions.pdf", "answer_pdf_path": "/abs/path/answers.pdf", "subject": "math", "output_dir": "./output/math"}
-  ```
-- **QA-Interleaved PDFs**
-  ```jsonl
-  {"question_pdf_path": "/abs/path/qa.pdf", "answer_pdf_path": "/abs/path/qa.pdf", "name": "math2"}
-  ```
+```jsonl
+{"input_pdf_paths": "./example_data/PDF2VQAPipeline/questionextract_test.pdf", "name": "math1"}
+{"input_pdf_paths": ["./example_data/PDF2VQAPipeline/math_question.pdf", "./example_data/PDF2VQAPipeline/math_answer.pdf"], "name": "math2"}
+```
 
 `FileStorage` handles batching/cache management:
 ```python
@@ -120,15 +110,48 @@ self.storage = FileStorage(
 
 ### 2. Document layout extraction (MinerU)
 
-For each PDF (question, answer, or mixed), the pipeline calls `_parse_file_with_mineru` inside `FileOrURLToMarkdownConverterBatch`. MinerU outputs:
+For each PDF (question, answer, or mixed), the pipeline calls `_parse_file_with_mineru` inside `FileOrURLToMarkdownConverterAPI`. MinerU outputs:
 
-- `<book>/<backend>/<book>_content_list.json`: structured layout tokens (texts, figures, tables, IDs)
-- `<book>/<backend>/images/`: cropped page images
+- `*_content_list.json`: structured layout tokens (texts, figures, tables, IDs)
+- `images/`: cropped page images
 
-The backend can be:
+---
+**Note**：
+If you want to use a locally deployed MinerU model, you can replace the operator with `FileOrURLToMarkdownConverterLocal` (original version from opendatalab) or `FileOrURLToMarkdownConverterFlash` (our accelerated version), and provide the corresponding model path and deployment parameters. 
 
-- `vlm-transformers`: CPU/GPU compatible
-- `vlm-vllm-engine`: high-throughput GPU mode (requires CUDA)
+For example:
+
+```python
+self.mineru_executor = FileOrURLToMarkdownConverterAPI(intermediate_dir = "intermediate")
+```
+
+can be replaced with
+
+```python
+self.mineru_executor = FileOrURLToMarkdownConverterLocal(
+    intermediate_dir = "intermediate",
+    mineru_model_path = "path/to/mineru/model",
+)
+```
+
+or
+
+```python
+self.mineru_executor = FileOrURLToMarkdownConverterFlash(
+    intermediate_dir = "intermediate",
+    mineru_model_path = "path/to/mineru/model",
+    batch_size = 4,
+    replicas = 1,
+    num_gpus_per_replica = 1,
+    engine_gpu_util_rate_to_ray_cap = 0.9
+)
+```
+
+You can refer to https://github.com/OpenDCAI/DataFlow/blob/main/dataflow/operators/knowledge_cleaning/generate/mineru_operators.py for specific parameters and usage.
+
+---
+
+Afterwards, the `MinerU2LLMInputOperator` flattens list items and re-indexes them to create LLM-friendly input.
 
 ### 3. QA extraction (VQAExtractor)
 
@@ -136,7 +159,7 @@ The backend can be:
 
 - Grouping and pairing Q&A based, and inserting images to proper positions.
 - Supports QA separated or interleaved PDFs.
-- Copies rendered images into `output_dir/question_images` and/or `answer_images`.
+- Copies rendered images into `cache_path/name/vqa_images`.
 - Parses `<qa_pair>`, `<question>`, `<answer>`, `<solution>`, `<chapter>`, `<label>` tags from the LLM response.
 
 ### 4. Post-processing and outputs
@@ -155,11 +178,10 @@ This operator includes a `strict_title_match` parameter:
 
 For each `output_dir` (under cache_path/name/), the pipeline writes:
 
-1. `vqa_extracted_questions.jsonl`
-2. `vqa_extracted_answers.jsonl`
-3. `vqa_merged_qa_pairs.jsonl`
-4. `vqa_merged_qa_pairs.md`
-5. `question_images/`, `answer_images/` (depending on mode)
+1. `extracted_vqa.jsonl` (extracted questions and answers, could be separate or interleaved depending on input)
+2. `merged_qa_pairs.jsonl` (fully merged question-answer pairs)
+3. `merged_qa_pairs.md` (markdown version of the merged QA pairs)
+4. `vqa_images/` (containing all images extracted for the QA pairs)
 
 Furthermore, the final step of the cache main file will contain all extracted qa pairs, making it easier to connect subsequent operators for downstream post-processing.
 
@@ -185,17 +207,19 @@ Example:
 ## 5. Pipeline Example
 
 ```python
-from dataflow.operators.knowledge_cleaning import FileOrURLToMarkdownConverterBatch
+from dataflow.operators.knowledge_cleaning import FileOrURLToMarkdownConverterAPI
 
 from dataflow.serving import APILLMServing_request
 from dataflow.utils.storage import FileStorage
-from dataflow.operators.pdf2vqa import MinerU2LLMInputOperator, LLMOutputParser, QA_Merger
+from dataflow.operators.pdf2vqa import MinerU2LLMInputOperator, LLMOutputParser, QA_Merger, PDF_Merger
 from dataflow.operators.core_text import ChunkedPromptedGenerator
 
 from dataflow.pipeline import PipelineABC
 from dataflow.prompts.pdf2vqa import QAExtractPrompt
 
-class VQA_extract_optimized_pipeline(PipelineABC):
+from pypdf import PdfWriter
+    
+class PDF_VQA_extract_optimized_pipeline(PipelineABC):
     def __init__(self):
         super().__init__()
         self.storage = FileStorage(
@@ -214,82 +238,59 @@ class VQA_extract_optimized_pipeline(PipelineABC):
         
         self.vqa_extract_prompt = QAExtractPrompt()
         
-        self.mineru_executor = FileOrURLToMarkdownConverterBatch(intermediate_dir = "intermediate", mineru_backend="vlm-vllm-engine")
+        self.pdf_merger = PDF_Merger(output_dir="./cache")
+        self.mineru_executor = FileOrURLToMarkdownConverterAPI(intermediate_dir = "intermediate")
         self.input_formatter = MinerU2LLMInputOperator()
         self.vqa_extractor = ChunkedPromptedGenerator(
             llm_serving=self.llm_serving,
             system_prompt = self.vqa_extract_prompt.build_prompt(),
             max_chunk_len=128000,
         )
-        self.llm_output_question_parser = LLMOutputParser(mode="question", output_dir="./cache", intermediate_dir="intermediate")
-        self.llm_output_answer_parser = LLMOutputParser(mode="answer", output_dir="./cache", intermediate_dir="intermediate")
+        self.llm_output_parser = LLMOutputParser(output_dir="./cache", intermediate_dir="intermediate")
         self.qa_merger = QA_Merger(output_dir="./cache", strict_title_match=False)
     def forward(self):
-        # The current processing logic is: MinerU processes questions -> MinerU processes answers -> Format question text -> Format answer text -> Input question text into LLM -> Input answer text into LLM -> Parse question output -> Parse answer output -> Merge QA pairs.
-        # Since QA pairs may originate from the same PDF or different PDFs, and DataFlow currently does not support branching, both question and answer PDFs must be processed even when they are the same PDF.
-        # This means if they come from the same PDF, it will be processed twice before the final QA merging step.
-        # Future optimizations will be considered to refine this workflow, avoid redundant processing of the same PDF, and improve performance.
-        
-        self.mineru_executor.run(
+        self.pdf_merger.run(
             storage=self.storage.step(),
-            input_key="question_pdf_path",
-            output_key="question_markdown_path",
+            input_pdf_list_key="input_pdf_paths",
+            input_name_key="name",
+            output_pdf_path_key="merged_pdf_path",
         )
         self.mineru_executor.run(
             storage=self.storage.step(),
-            input_key="answer_pdf_path",
-            output_key="answer_markdown_path",
+            input_key="merged_pdf_path",
+            output_key="vqa_markdown_path",
         )
         self.input_formatter.run(
             storage=self.storage.step(),
-            input_markdown_path_key="question_markdown_path",
-            output_converted_layout_key="converted_question_layout_path",
-        )
-        self.input_formatter.run(
-            storage=self.storage.step(),
-            input_markdown_path_key="answer_markdown_path",
-            output_converted_layout_key="converted_answer_layout_path",
+            input_markdown_path_key="vqa_markdown_path",
+            output_converted_layout_key="converted_vqa_layout_path",
         )
         self.vqa_extractor.run(
             storage=self.storage.step(),
-            input_path_key="converted_question_layout_path",
-            output_path_key="vqa_extracted_questions_path",
-        )
-        self.vqa_extractor.run(
-            storage=self.storage.step(),
-            input_path_key="converted_answer_layout_path",
-            output_path_key="vqa_extracted_answers_path",
-        )
-        self.llm_output_question_parser.run(
-            storage=self.storage.step(),
-            input_response_path_key="vqa_extracted_questions_path",
-            input_converted_layout_path_key="converted_question_layout_path",
-            input_name_key="name",
-            output_qalist_path_key="extracted_questions_path",
+            input_path_key="converted_vqa_layout_path",
+            output_path_key="extracted_llm_vqa_path",
         )
-        self.llm_output_answer_parser.run(
+        self.llm_output_parser.run(
             storage=self.storage.step(),
-            input_response_path_key="vqa_extracted_answers_path",
-            input_converted_layout_path_key="converted_answer_layout_path",
+            input_response_path_key="extracted_llm_vqa_path",
+            input_converted_layout_path_key="converted_vqa_layout_path",
             input_name_key="name",
-            output_qalist_path_key="extracted_answers_path",
+            output_qalist_path_key="extracted_vqa_path",
         )
         self.qa_merger.run(
             storage=self.storage.step(),
-            input_question_qalist_path_key="extracted_questions_path",
-            input_answer_qalist_path_key="extracted_answers_path",
+            input_qalist_path_key="extracted_vqa_path",
             input_name_key="name",
-            output_merged_qalist_path_key="output_merged_qalist_path",
+            output_merged_qalist_path_key="output_merged_vqalist_path",
             output_merged_md_path_key="output_merged_md_path",
-            output_qa_item_key="qa_pair",
+            output_qa_item_key="vqa_pair",
         )
 
 
 
 if __name__ == "__main__":
-    # Each line in the JSONL file contains `question_pdf_path`, `answer_pdf_path`, and `name` (e.g., math1, math2, physics1, chemistry1, ...).
-    # If the questions and answers are located within the same PDF, set both question_pdf_path and answer_pdf_path to the same file path.
-    pipeline = VQA_extract_optimized_pipeline()
+    # Each line in the jsonl contains input_pdf_paths, name (math1, math2, physics1, chemistry1, ...)
+    pipeline = PDF_VQA_extract_optimized_pipeline()
     pipeline.compile()
     pipeline.forward()
 ```
diff --git a/docs/zh/notes/api/operators/pdf2vqa/generate/LLMOutputParser.md b/docs/zh/notes/api/operators/pdf2vqa/generate/LLMOutputParser.md
index 31c7cfe68..44453d139 100644
--- a/docs/zh/notes/api/operators/pdf2vqa/generate/LLMOutputParser.md
+++ b/docs/zh/notes/api/operators/pdf2vqa/generate/LLMOutputParser.md
@@ -1,7 +1,7 @@
 ---
 title: LLMOutputParser
 createTime: 2026/01/20 20:15:00
-permalink: /zh/api/operators/core_text/parse/llmoutputparser/
+permalink: /zh/api/operators/pdf2vqa/generate/llmoutputparser/
 ---
 
 ## 📘 概述
@@ -16,8 +16,7 @@ permalink: /zh/api/operators/core_text/parse/llmoutputparser/
 ## `__init__` 函数
 
 ```python
-def __init__(self, 
-             mode: Literal['question', 'answer'], 
+def __init__(self,  
              output_dir: str, 
              intermediate_dir: str = "intermediate"
              )
@@ -28,7 +27,6 @@ def __init__(self,
 
 | 参数名 | 类型 | 默认值 | 说明 |
 | --- | --- | --- | --- |
-| **mode** | str | 必需 | 解析模式。可选 `'question'` 或 `'answer'`，影响输出文件名及图片子目录名。 |
 | **output_dir** | str | 必需 | 结构化数据及图片的最终输出根目录。 |
 | **intermediate_dir** | str | "intermediate" | 中间件目录，用于寻找 MinerU 处理后的原始图片资源。 |
 
@@ -76,7 +74,7 @@ def run(self,
 算子会查找布局 JSON 中 `id` 为 1 和 3 的项：
 
 * 如果 `id: 1` 是文本 "什么是 AI？"，`id: 3` 是图片 `path/to/img.png`。
-* 还原后的内容为：`什么是 AI？\n![image](images/img.png)`。
+* 还原后的内容为：`什么是 AI？\n![image](vqa_images/img.png)`。
 
 ### 2. 输出文件结构
 
@@ -86,7 +84,7 @@ def run(self,
 cache_path/
 └── {name}/
     ├── extracted_questions.jsonl  # 结构化数据
-    └── question_images/           # 自动同步过来的图片
+    └── vqa_images/           # 自动同步过来的图片
         ├── img1.png
         └── ...
 
@@ -96,7 +94,7 @@ cache_path/
 
 ```json
 {
-  "question": "请看下图并分析：\n![image](question_images/fig1.png)",
+  "question": "请看下图并分析：\n![image](vqa_images/img1.png)",
   "answer": "这是解析后的答案文本",
   "solution": "详细的解题步骤...",
   "label": "1",
diff --git a/docs/zh/notes/api/operators/pdf2vqa/generate/MineruToLLMInputOperator.md b/docs/zh/notes/api/operators/pdf2vqa/generate/MineruToLLMInputOperator.md
index a12d60541..7adde1fa3 100644
--- a/docs/zh/notes/api/operators/pdf2vqa/generate/MineruToLLMInputOperator.md
+++ b/docs/zh/notes/api/operators/pdf2vqa/generate/MineruToLLMInputOperator.md
@@ -1,7 +1,7 @@
 ---
 title: MinerU2LLMInputOperator
 createTime: 2026/01/20 20:10:00
-permalink: /zh/api/operators/core_text/convert/mineru2llminputoperator/
+permalink: /zh/api/operators/pdf2vqa/generate/mineru2llminputoperator/
 ---
 
 ## 📘 概述
diff --git a/docs/zh/notes/api/operators/pdf2vqa/generate/QAMerger.md b/docs/zh/notes/api/operators/pdf2vqa/generate/QAMerger.md
index d0d7f1914..ced34345e 100644
--- a/docs/zh/notes/api/operators/pdf2vqa/generate/QAMerger.md
+++ b/docs/zh/notes/api/operators/pdf2vqa/generate/QAMerger.md
@@ -1,7 +1,7 @@
 ---
 title: QA_Merger
 createTime: 2026/01/20 20:25:00
-permalink: /zh/api/operators/core_text/merge/qamerger/
+permalink: /zh/api/operators/pdf2vqa/generate/qamerger/
 ---
 
 ## 📘 概述
diff --git a/docs/zh/notes/guide/quickstart/PDFVQAExtract.md b/docs/zh/notes/guide/quickstart/PDFVQAExtract.md
index d9d8ade80..2b76cafab 100644
--- a/docs/zh/notes/guide/quickstart/PDFVQAExtract.md
+++ b/docs/zh/notes/guide/quickstart/PDFVQAExtract.md
@@ -22,7 +22,7 @@ icon: heroicons:document-text
 
 ## 2. 快速开始
 
-### 步骤 1：安装 Dataflow（以及 MinerU）
+### 步骤 1：安装 Dataflow
 安装 Dataflow：
 ```shell
 pip install "open-dataflow[pdf2vqa]"
@@ -35,12 +35,6 @@ cd Dataflow
 pip install -e ".[pdf2vqa]"
 ```
 
-安装 MinerU 及下载模型：
-```shell
-pip install "mineru[vllm]>=2.5.0,<2.7.0"
-mineru-models-download
-```
-
 ### 步骤 2：创建工作区
 ```shell
 cd /your/working/directory
@@ -55,13 +49,19 @@ dataflow init
 初始化后即可在 `pipelines/` 或任意自定义目录编写脚本。
 
 ### 步骤 4：配置 API 凭证
+
+其中`DF_API_KEY`用于调用LLM API，`MINERU_API_KEY`用于调用MinerU进行布局解析。
+`MINERU_API_KEY`可以在 https://mineru.net/apiManage/token 获取， `DF_API_KEY`可以在你的LLM提供商处获取（例如OpenAI、Google Gemini等）。将它们设置为环境变量：
+
 Linux / macOS:
 ```shell
 export DF_API_KEY="sk-xxxxx"
+export MINERU_API_KEY="sk2-xxxxx"
 ```
 Windows PowerShell:
 ```powershell
 $env:DF_API_KEY = "sk-xxxxx"
+$env:MINERU_API_KEY = "sk2-xxxxx"
 ```
 在脚本中设置接口：
 ```python
@@ -72,12 +72,7 @@ self.llm_serving = APILLMServing_request(
     max_workers=100,
 )
 ```
-并设置MinerU后端（'vlm-vllm-engine'或者'vlm-transformers'）和LLM最大token数量（建议不要设置大于128000，否则LLM因为无法记住细节而效果不好）。`vlm-vllm-engine` 模式需要 GPU。
-**目前这个pipeline只在`vlm`后端下经过测试，不确定是否能支持`pipeline`后端，根据官方文档两个后端格式有区别，因此建议使用`vlm`后端。**
-```python
-self.mineru_executor = FileOrURLToMarkdownConverterBatch(intermediate_dir = "intermediate", mineru_backend="vlm-vllm-engine")
-```
-
+并设置LLM最大token数量（建议不要设置大于128000，否则LLM因为无法记住细节而效果不好）。
 ```python
 self.vqa_extractor = ChunkedPromptedGenerator(
     llm_serving=self.llm_serving,
@@ -96,17 +91,12 @@ python api_pipelines/pdf_vqa_extract_pipeline.py
 
 ### 1. 输入数据
 
-使用 JSONL 描述任务，支持两种模式：
+使用 JSONL 描述任务，每行包含 `input_pdf_paths` 和 `name`。`input_pdf_paths` 可以是单个 PDF 或 PDF 列表（问题在前，答案在后）。`name` 是该任务的标识符。问题和答案可以交错或者分开；它们可以来自同一 PDF 或不同 PDF。
 
-- **题答分离**
-  ```jsonl
-  {"question_pdf_path": "/abs/path/questions.pdf", "answer_pdf_path": "/abs/path/answers.pdf", "name": "math1"}
-  ```
-- **题答混排**
-  问题和答案设置成同一个pdf即可
-  ```jsonl
-  {"question_pdf_path": "/abs/path/qa.pdf", "answer_pdf_path": "/abs/path/qa.pdf", "name": "math2"}
-  ```
+```jsonl
+{"input_pdf_paths": "./example_data/PDF2VQAPipeline/questionextract_test.pdf", "name": "math1"}
+{"input_pdf_paths": ["./example_data/PDF2VQAPipeline/math_question.pdf", "./example_data/PDF2VQAPipeline/math_answer.pdf"], "name": "math2"}
+```
 
 `FileStorage` 负责读取与缓存：
 ```python
@@ -120,17 +110,48 @@ self.storage = FileStorage(
 
 ### 2. 文档布局解析（MinerU）
 
-对每个 PDF（题目、答案或混排）调用 `FileOrURLToMarkdownConverterBatch` 内部的 `_parse_file_with_mineru`，MinerU 会产出：
+对每个 PDF（题目、答案或混排）调用 `FileOrURLToMarkdownConverterAPI` 内部的 `_parse_file_with_mineru`，MinerU 会产出：
+
+- `*_content_list.json`：结构化布局 token
+- `images/`：对应页面切图
+
+---
+**Note**：
+如果想要使用本地部署的MinerU模型，可以替换算子为 `FileOrURLToMarkdownConverterLocal`（opendatalab原版） 或 `FileOrURLToMarkdownConverterFlash` （我们的加速版），并提供相应的模型路径和部署参数。
+
+例如：
+
+```python
+self.mineru_executor = FileOrURLToMarkdownConverterAPI(intermediate_dir = "intermediate")
+```
+
+可以等价替换为
+
+```python
+self.mineru_executor = FileOrURLToMarkdownConverterLocal(
+    intermediate_dir = "intermediate",
+    mineru_model_path = "path/to/mineru/model",
+)
+```
+
+或者
 
-- `<book>/<backend>/<book>_content_list.json`：结构化布局 token
-- `<book>/<backend>/images/`：对应页面切图
+```python
+self.mineru_executor = FileOrURLToMarkdownConverterFlash(
+    intermediate_dir = "intermediate",
+    mineru_model_path = "path/to/mineru/model",
+    batch_size = 4,
+    replicas = 1,
+    num_gpus_per_replica = 1,
+    engine_gpu_util_rate_to_ray_cap = 0.9
+)
+```
 
-可选后端：
+具体参数和使用方法可以参考 https://github.com/OpenDCAI/DataFlow/blob/main/dataflow/operators/knowledge_cleaning/generate/mineru_operators.py 。
 
-- `vlm-transformers`：CPU/GPU 均可
-- `vlm-vllm-engine`：高吞吐 GPU 模式（需 CUDA）
+---
 
-之后会使用MinerU2LLMInputOperator处理成给llm的输入，主要包括展平列表项并重新编号。
+之后会使用`MinerU2LLMInputOperator`处理成给llm的输入，主要包括展平列表项并重新编号。
 
 ### 3. 问答抽取（VQAExtractor）
 
@@ -138,7 +159,7 @@ self.storage = FileStorage(
 
 - 整合、匹配问答对，并将图片插入到正确位置。
 - 同时支持题目答案在不同pdf，以及题目答案混排（question1-answer1-question2-answer2-...）。
-- 将 MinerU 切图复制到 `cache_path/name/question_images`、`answer_images`。
+- 将 MinerU 切图复制到 `cache_path/name/vqa_images`。
 - 解析 `<qa_pair>`、`<question>`、`<answer>`、`<solution>`、`<chapter>`、`<label>` 标签。
 
 ### 4. 后处理与产物
@@ -151,11 +172,10 @@ self.storage = FileStorage(
 
 每个 output_dir （cache_path/name/下面） 会得到：
 
-1. `vqa_extracted_questions.jsonl`
-2. `vqa_extracted_answers.jsonl`
-3. `vqa_merged_qa_pairs.jsonl`
-4. `vqa_merged_qa_pairs.md`
-5. `question_images/`、`answer_images/`
+1. `extracted_vqa.jsonl`
+2. `merged_qa_pairs.jsonl`
+3. `merged_qa_pairs.md`
+4. `vqa_images/`
 
 此外，cache的主文件最后一个step会包含提取出来的所有问答对，方便后面直接接算子做后处理。
 
@@ -181,17 +201,19 @@ self.storage = FileStorage(
 ## 5. 流水线示例
 
 ```python
-from dataflow.operators.knowledge_cleaning import FileOrURLToMarkdownConverterBatch
+from dataflow.operators.knowledge_cleaning import FileOrURLToMarkdownConverterAPI
 
 from dataflow.serving import APILLMServing_request
 from dataflow.utils.storage import FileStorage
-from dataflow.operators.pdf2vqa import MinerU2LLMInputOperator, LLMOutputParser, QA_Merger
+from dataflow.operators.pdf2vqa import MinerU2LLMInputOperator, LLMOutputParser, QA_Merger, PDF_Merger
 from dataflow.operators.core_text import ChunkedPromptedGenerator
 
 from dataflow.pipeline import PipelineABC
 from dataflow.prompts.pdf2vqa import QAExtractPrompt
 
-class VQA_extract_optimized_pipeline(PipelineABC):
+from pypdf import PdfWriter
+    
+class PDF_VQA_extract_optimized_pipeline(PipelineABC):
     def __init__(self):
         super().__init__()
         self.storage = FileStorage(
@@ -210,82 +232,59 @@ class VQA_extract_optimized_pipeline(PipelineABC):
         
         self.vqa_extract_prompt = QAExtractPrompt()
         
-        self.mineru_executor = FileOrURLToMarkdownConverterBatch(intermediate_dir = "intermediate", mineru_backend="vlm-vllm-engine")
+        self.pdf_merger = PDF_Merger(output_dir="./cache")
+        self.mineru_executor = FileOrURLToMarkdownConverterAPI(intermediate_dir = "intermediate")
         self.input_formatter = MinerU2LLMInputOperator()
         self.vqa_extractor = ChunkedPromptedGenerator(
             llm_serving=self.llm_serving,
             system_prompt = self.vqa_extract_prompt.build_prompt(),
             max_chunk_len=128000,
         )
-        self.llm_output_question_parser = LLMOutputParser(mode="question", output_dir="./cache", intermediate_dir="intermediate")
-        self.llm_output_answer_parser = LLMOutputParser(mode="answer", output_dir="./cache", intermediate_dir="intermediate")
+        self.llm_output_parser = LLMOutputParser(output_dir="./cache", intermediate_dir="intermediate")
         self.qa_merger = QA_Merger(output_dir="./cache", strict_title_match=False)
     def forward(self):
-        # 目前的处理逻辑是：MinerU处理问题-MinerU处理答案-格式化问题文本-格式化答案文本-问题文本输入LLM-答案文本输入LLM-解析问题输出-解析答案输出-合并问答对
-        # 由于问答对可能来自同一份pdf，也有可能来自不同pdf，而dataflow目前不支持分支，因此这里只能将question和answer的pdf都进行一次处理，
-        # 即使是同一份pdf也会被处理两次，最后再合并问答对。
-        # 未来会再思考如何优化这个流程，避免重复处理同一份pdf，提升性能。
-        
-        self.mineru_executor.run(
+        self.pdf_merger.run(
             storage=self.storage.step(),
-            input_key="question_pdf_path",
-            output_key="question_markdown_path",
+            input_pdf_list_key="input_pdf_paths",
+            input_name_key="name",
+            output_pdf_path_key="merged_pdf_path",
         )
         self.mineru_executor.run(
             storage=self.storage.step(),
-            input_key="answer_pdf_path",
-            output_key="answer_markdown_path",
+            input_key="merged_pdf_path",
+            output_key="vqa_markdown_path",
         )
         self.input_formatter.run(
             storage=self.storage.step(),
-            input_markdown_path_key="question_markdown_path",
-            output_converted_layout_key="converted_question_layout_path",
-        )
-        self.input_formatter.run(
-            storage=self.storage.step(),
-            input_markdown_path_key="answer_markdown_path",
-            output_converted_layout_key="converted_answer_layout_path",
+            input_markdown_path_key="vqa_markdown_path",
+            output_converted_layout_key="converted_vqa_layout_path",
         )
         self.vqa_extractor.run(
             storage=self.storage.step(),
-            input_path_key="converted_question_layout_path",
-            output_path_key="vqa_extracted_questions_path",
-        )
-        self.vqa_extractor.run(
-            storage=self.storage.step(),
-            input_path_key="converted_answer_layout_path",
-            output_path_key="vqa_extracted_answers_path",
-        )
-        self.llm_output_question_parser.run(
-            storage=self.storage.step(),
-            input_response_path_key="vqa_extracted_questions_path",
-            input_converted_layout_path_key="converted_question_layout_path",
-            input_name_key="name",
-            output_qalist_path_key="extracted_questions_path",
+            input_path_key="converted_vqa_layout_path",
+            output_path_key="extracted_llm_vqa_path",
         )
-        self.llm_output_answer_parser.run(
+        self.llm_output_parser.run(
             storage=self.storage.step(),
-            input_response_path_key="vqa_extracted_answers_path",
-            input_converted_layout_path_key="converted_answer_layout_path",
+            input_response_path_key="extracted_llm_vqa_path",
+            input_converted_layout_path_key="converted_vqa_layout_path",
             input_name_key="name",
-            output_qalist_path_key="extracted_answers_path",
+            output_qalist_path_key="extracted_vqa_path",
         )
         self.qa_merger.run(
             storage=self.storage.step(),
-            input_question_qalist_path_key="extracted_questions_path",
-            input_answer_qalist_path_key="extracted_answers_path",
+            input_qalist_path_key="extracted_vqa_path",
             input_name_key="name",
-            output_merged_qalist_path_key="output_merged_qalist_path",
+            output_merged_qalist_path_key="output_merged_vqalist_path",
             output_merged_md_path_key="output_merged_md_path",
-            output_qa_item_key="qa_pair",
+            output_qa_item_key="vqa_pair",
         )
 
 
 
 if __name__ == "__main__":
-    # jsonl中每一行包含question_pdf_path, answer_pdf_path, name (math1, math2, physics1, chemistry1, ...)
-    # 如果question和answer在同一份pdf中，请将question_pdf_path和answer_pdf_path设置为相同的路径
-    pipeline = VQA_extract_optimized_pipeline()
+    # jsonl中每一行包含input_pdf_paths, name (math1, math2, physics1, chemistry1, ...)
+    pipeline = PDF_VQA_extract_optimized_pipeline()
     pipeline.compile()
     pipeline.forward()
 ```