diff --git a/iocp.py b/iocp.py index 9968cc2..65f0fb2 100755 --- a/iocp.py +++ b/iocp.py @@ -47,10 +47,8 @@ import ConfigParser # Import optional third-party libraries -IMPORTS = [] try: from PyPDF2 import PdfFileReader - IMPORTS.append('pypdf2') except ImportError: pass try: @@ -59,17 +57,14 @@ from pdfminer.converter import TextConverter from pdfminer.pdfinterp import PDFPageInterpreter from pdfminer.layout import LAParams - IMPORTS.append('pdfminer') except ImportError: pass try: from bs4 import BeautifulSoup - IMPORTS.append('beautifulsoup') except ImportError: pass try: import requests - IMPORTS.append('requests') except ImportError: pass @@ -104,11 +99,11 @@ def __init__(self, patterns_ini=None, input_format='pdf', dedup=False, library=' self.library = library if input_format == 'pdf': - if library not in IMPORTS: + if library not in sys.modules: e = 'Selected PDF parser library not found: %s' % (library) raise ImportError(e) elif input_format == 'html': - if 'beautifulsoup' not in IMPORTS: + if 'beautifulsoup' not in sys.modules: e = 'HTML parser library not found: BeautifulSoup' raise ImportError(e) @@ -269,7 +264,7 @@ def parse_html(self, f, fpath): def parse(self, path): try: if path.startswith('http://') or path.startswith('https://'): - if 'requests' not in IMPORTS: + if 'requests' not in sys.modules: e = 'HTTP library not found: requests' raise ImportError(e) headers = { 'User-Agent': 'Mozilla/5.0 Gecko Firefox' } diff --git a/output.py b/output.py index d71c92c..0503774 100644 --- a/output.py +++ b/output.py @@ -31,7 +31,7 @@ def print_error(self, fpath, exception): class OutputHandler_csv(OutputHandler): def __init__(self): - self.csv_writer = csv.writer(sys.stdout, delimiter = '\t') + self.csv_writer = csv.writer(sys.stdout, delimiter = '\t', quoting=csv.QUOTE_NONNUMERIC) def print_match(self, fpath, page, name, match): self.csv_writer.writerow((fpath, page, name, match))