Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
src/.DS_Store

# Pycache
__pycache__/

# Python virtualenv (in case you aren't using Docker)
env/

# Output folder
output/
3 changes: 3 additions & 0 deletions src/lib/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .highlightextractorargs import HighlightExtractorArgs
from .pdfhighlightedtextextractor import PDFHighlighedTextExtractor, PDFImageOcrStrategy
from .report_writer import ReportWriter
31 changes: 31 additions & 0 deletions src/lib/highlightextractorargs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import argparse
from typing import Any


class HighlightExtractorArgsParseExcetion(Exception):
def __init__(self, *args: object) -> None:
super().__init__(*args)


class HighlightExtractorArgs:
GENERAL_DESCRIPTION = 'Extract text from yellow highlighted areas in a PDF.'

PDF_PATH_ARG_NAME = 'pdf_filename'
PDF_PATH_ARG_DESCRIPTION = 'The path to the PDF file to analyze.'

def __init__(self):
self.parser = argparse.ArgumentParser(
description=self.GENERAL_DESCRIPTION
)
self.parser.add_argument(self.PDF_PATH_ARG_NAME, help=self.PDF_PATH_ARG_DESCRIPTION)

self.args = None

def parse(self):
self.args = self.parser.parse_args()

@property
def pdf_filename(self) -> Any:
if self.args is None:
raise HighlightExtractorArgsParseExcetion('No args parsed, maybe you forgot to call parse() method?')
return getattr(self.args, self.PDF_PATH_ARG_NAME)
File renamed without changes.
Loading