generated from allenai/python-package-template
-
Notifications
You must be signed in to change notification settings - Fork 591
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fb0a729
commit 50e55f4
Showing
74 changed files
with
1,107 additions
and
121 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
import asyncio | ||
import logging | ||
from dataclasses import dataclass | ||
from typing import Optional | ||
|
||
# Import necessary components from olmocr | ||
from olmocr.pipeline import ( | ||
MetricsKeeper, | ||
PageResult, | ||
WorkerTracker, | ||
process_page, | ||
sglang_server_host, | ||
sglang_server_ready | ||
) | ||
|
||
# Setup basic logging | ||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") | ||
logger = logging.getLogger("olmocr_runner") | ||
|
||
|
||
# Basic configuration | ||
@dataclass | ||
class Args: | ||
model: str = "allenai/olmOCR-7B-0225-preview" | ||
model_chat_template: str = "qwen2-vl" | ||
model_max_context: int = 8192 | ||
target_longest_image_dim: int = 1024 | ||
target_anchor_text_len: int = 6000 | ||
max_page_retries: int = 8 | ||
max_page_error_rate: float = 0.004 | ||
|
||
|
||
async def run_olmocr_pipeline(pdf_path: str, page_num: int = 1) -> Optional[str]: | ||
""" | ||
Process a single page of a PDF using the official olmocr pipeline's process_page function | ||
Args: | ||
pdf_path: Path to the PDF file | ||
page_num: Page number to process (1-indexed) | ||
Returns: | ||
The extracted text from the page or None if processing failed | ||
""" | ||
# Ensure global variables are initialized | ||
global metrics, tracker | ||
if "metrics" not in globals() or metrics is None: | ||
metrics = MetricsKeeper(window=60 * 5) | ||
if "tracker" not in globals() or tracker is None: | ||
tracker = WorkerTracker() | ||
|
||
args = Args() | ||
semaphore = asyncio.Semaphore(1) | ||
worker_id = 0 # Using 0 as default worker ID | ||
|
||
# Ensure server is running | ||
_server_task = None | ||
try: | ||
await asyncio.wait_for(sglang_server_ready(), timeout=5) | ||
logger.info("Using existing sglang server") | ||
except Exception: | ||
logger.info("Starting new sglang server") | ||
_server_task = asyncio.create_task(sglang_server_host(args, semaphore)) | ||
await sglang_server_ready() | ||
|
||
try: | ||
# Process the page using the pipeline's process_page function | ||
# Note: process_page expects both original path and local path | ||
# In our case, we're using the same path for both | ||
page_result: PageResult = await process_page( | ||
args=args, | ||
worker_id=worker_id, | ||
pdf_orig_path=pdf_path, | ||
pdf_local_path=pdf_path, | ||
page_num=page_num | ||
) | ||
|
||
# Return the natural text from the response | ||
if page_result and page_result.response: | ||
return page_result.response.natural_text | ||
return None | ||
|
||
except Exception as e: | ||
logger.error(f"Error processing page: {type(e).__name__} - {str(e)}") | ||
return None | ||
|
||
finally: | ||
# We leave the server running for potential reuse | ||
pass | ||
|
||
|
||
async def main(): | ||
# Example usage | ||
pdf_path = "your_pdf_path.pdf" | ||
page_num = 1 | ||
|
||
result = await run_olmocr_pipeline(pdf_path, page_num) | ||
if result: | ||
print(f"Extracted text: {result[:200]}...") # Print first 200 chars | ||
else: | ||
print("Failed to extract text from the page") | ||
|
||
|
||
if __name__ == "__main__": | ||
asyncio.run(main()) |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
51 changes: 51 additions & 0 deletions
51
olmocr/bench/sample_data/olmocr_pipeline/discoverworld_crazy_table4_1.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
Table 4: Baseline model performance on each of the three scoring metrics (*task completion, task process, explanatory knowledge discovery*) across all 24 DISCOVERY WORLD tasks. Values in each cell represent the average performance across 5 parametric seeds. *Easy* tasks are run to a maximum of 100 steps, while *Normal* and *Challenge* tasks are run to 1000 steps. | ||
|
||
| # | Topic | Task | ReACT Procedure Completion Knowledge | Plan+Execute Procedure Completion Knowledge | Hypothizer Procedure Completion Knowledge | | ||
|---|---------------------|--------------------|--------------------------------------|-------------------------------------------|------------------------------------------| | ||
| | | | 0.87 0.20 0.20 | 0.89 0.00 0.00 | 0.90 0.40 1.00 | | ||
| 1 | Proteomics | Simple Clustering | 0.87 0.20 0.20 | 0.89 0.00 0.00 | 0.90 0.40 1.00 | | ||
| 2 | | Clustering (2D) | 0.88 0.40 0.40 | 0.68 0.20 0.00 | 0.93 0.40 0.40 | | ||
| 3 | | Clustering (3D) | 0.88 0.40 0.60 | 0.55 0.20 0.00 | 0.93 0.40 0.60 | | ||
| 4 | Chemistry | Exploring Combinations and Hill Climbing | 0.87 1.00 1.00 | 0.70 0.60 0.40 | 0.90 0.00 0.40 | | ||
| 5 | | Single substances | 0.82 0.00 0.00 | 0.87 0.40 0.00 | 0.93 0.60 0.40 | | ||
| 6 | | Mix of 3 substances | 0.82 0.00 0.00 | 0.87 0.40 0.00 | 0.93 0.60 0.40 | | ||
| 7 | Archaeology | Single instrument | 0.27 0.60 0.00 | 0.33 0.20 0.00 | 0.60 0.20 0.50 | | ||
| 8 | | Instrument Use | 0.72 0.40 0.30 | 0.74 0.00 0.00 | 0.64 0.40 0.40 | | ||
| 9 | | Correlation | 0.46 0.20 0.00 | 0.46 0.00 0.05 | 0.55 0.20 0.05 | | ||
| 10 | Reactor Lab | Regression | 0.42 0.00 0.40 | 0.44 0.00 0.10 | 0.38 0.00 0.20 | | ||
| 11 | | Slope only | 0.44 0.00 0.20 | 0.49 0.00 0.00 | 0.51 0.00 0.00 | | ||
| 12 | | Quadratic regression | 0.43 0.00 0.20 | 0.39 0.00 0.00 | 0.39 0.00 0.00 | | ||
| 13 | Plant Nutrients | Uncovering systems of rules | 0.80 0.20 0.20 | 0.70 0.20 0.20 | 0.60 0.00 0.00 | | ||
| 14 | | Presence rules | 0.91 0.60 0.00 | 0.84 0.40 0.00 | 0.56 0.00 0.00 | | ||
| 15 | | Logical Rules | 0.89 0.40 0.00 | 0.73 0.40 0.00 | 0.62 0.00 0.00 | | ||
| 16 | Space Sick | Open-ended discovery | 0.78 0.60 0.00 | 0.68 0.40 0.10 | 0.80 1.00 0.60 | | ||
| 17 | | Single instrument | 0.58 0.00 0.13 | 0.45 0.00 0.13 | 0.16 0.00 0.33 | | ||
| 18 | | Multiple instruments | 0.55 0.00 0.00 | 0.26 0.00 0.00 | 0.20 0.00 0.00 | | ||
| 19 | Rocket Science | Novel instruments | 0.53 0.00 0.00 | 0.34 0.00 0.00 | 0.17 0.00 0.00 | | ||
| 20 | | Look-up variables | 0.51 0.00 0.05 | 0.51 0.00 0.00 | 0.11 0.00 0.00 | | ||
| 21 | | Measure 5 variables | 0.43 0.00 0.00 | 0.34 0.00 0.00 | 0.22 0.00 0.03 | | ||
| 22 | Translation | Rosetta-stone style linguistic discovery of alien language | 0.40 0.40 0.20 | 0.30 0.00 0.00 | 0.20 0.20 0.00 | | ||
| 23 | | Single noun | 0.20 0.00 0.00 | 0.68 0.40 0.00 | 0.84 0.40 0.00 | | ||
| 24 | | Noun and verb | 0.49 0.00 0.00 | 0.55 0.20 0.05 | 0.15 0.00 0.00 | | ||
| | Average (Easy) | | 0.59 0.38 0.25 | 0.56 0.18 0.11 | 0.56 0.28 0.34 | | ||
| | Average (Normal) | | 0.63 0.18 0.14 | 0.64 0.18 0.02 | 0.58 0.23 0.19 | | ||
| | Average (Challenge) | | 0.63 0.18 0.10 | 0.50 0.15 0.01 | 0.49 0.08 0.08 | | ||
|
||
Table 5: Baseline model performance on each of the three scoring metrics (*task completion, task process, explanatory knowledge discovery*) across all 10 unit test tasks. Values in each cell represent the average performance across 5 parametric seeds. Unit tests tasks are run to a maximum of 100 steps. | ||
|
||
| # Unit Test Topic | ReACT Procedure Completion | Plan+Execute Procedure Completion | Hypothizer Procedure Completion | | ||
|-------------------|----------------------------|----------------------------------|----------------------------------| | ||
| 25 | Multi-turn dialog with an agent | 1.00 1.00 | 1.00 1.00 | 1.00 1.00 | | ||
| 26 | Measure an object with an instrument | 0.87 0.60 | 0.73 0.40 | 1.00 1.00 | | ||
| 27 | Pick-and-place object | 0.90 0.80 | 0.80 0.60 | 1.00 1.00 | | ||
| 28 | React Discovery Feed posts | 1.00 1.00 | 0.90 0.80 | 1.00 1.00 | | ||
| 30 | Move through doors | 0.58 0.20 | 0.25 0.00 | 0.30 0.00 | | ||
| 31 | Using keys with doors | 0.69 0.20 | 0.54 0.00 | 0.69 0.00 | | ||
| 32 | Navigate to a specific room in a house | 0.20 0.20 | 0.20 0.00 | 0.20 0.20 | | ||
| 33 | Search an environment for an object | 0.80 0.80 | 0.60 0.60 | 1.00 1.00 | | ||
| 34 | Interact with a moving agent | 0.60 0.20 | 0.53 0.00 | 0.53 0.20 | | ||
| | Average (Unit Tests) | 0.76 0.60 | 0.66 0.44 | 0.77 0.64 | | ||
|
||
4.2 Baseline Agent Models | ||
|
||
The baseline agents are described below, with model performance on Discovery tasks shown in **Table 4,** and performance on Unit Tests shown in **Table 5.** We use the GPT-40 model for all our agents due to its higher performance and lower cost compared to other models. For space we provide |
Oops, something went wrong.