Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,14 @@ the log file and collect the statistics.
from texoutparse import LatexLogParser

parser = LatexLogParser()

# If using a unicode-supporting engine, e.g. XeTeX or LuaTeX/LuaHBTeX
with open('sample.log') as f:
parser.process(f)

# If using an 8-bit engine, e.g. TeX or pdfTeX
with open('sample.log', encoding='latin-1') as f:
parser.process(f)
```
The `parser` object contains lists of errors, warnings, and bad boxes, each described by an
`LogFileMessage` object. Both objects provide a `__str__` method that prints a summary of the
Expand Down Expand Up @@ -55,4 +61,4 @@ Sam Morley - [inakleinbottle.com](https://inakleinbottle.com) - admin@inakleinbo
Distributed under the MIT license. See `LICENSE` for more information.

## Release History
- 1.0. Initial release
- 1.0. Initial release
56 changes: 49 additions & 7 deletions texoutparse.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
"""
Parser for LaTeX log files.
"""
import io
import re
import codecs
import warnings
from collections import deque


KNOWN_NONUNICODE_ENGINES = ['TeX', 'eTeX', 'pdfTeX']


class LogFileMessage:
"""
Helper class for storing log file messages.
Expand Down Expand Up @@ -74,15 +80,18 @@ class LatexLogParser:
list.
"""

engine = re.compile(
r"^This is (\w+), Version ([\w\d.-]+)"
)
error = re.compile(
r"^(?:! ((?:La|pdf)TeX|Package|Class)(?: (\w+))? [eE]rror(?: \(([\\]?\w+)\))?: (.*)|! (.*))"
r"^(?:! ((?:\w*)TeX|Package|Class)(?: (\w+))? [eE]rror(?: \(([\\]?\w+)\))?: (.*)|! (.*))"
)
warning = re.compile(
r"^((?:La|pdf)TeX|Package|Class)(?: (\w+))? [wW]arning(?: \(([\\]?\w+)\))?: (.*)"
r"^((?:\w*)TeX|Package|Class)(?: (\w+))? [wW]arning(?: \(([\\]?\w+)\))?: (.*)"
)

info = re.compile(
r"^((?:La|pdf)TeX|Package|Class)(?: (\w+))? [iI]nfo(?: \(([\\]?\w+)\))?: (.*)"
r"^((?:\w*)TeX|Package|Class)(?: (\w+))? [iI]nfo(?: \(([\\]?\w+)\))?: (.*)"
)
badbox = re.compile(
r"^(Over|Under)full "
Expand All @@ -101,6 +110,7 @@ def __init__(self, context_lines=2):
self.errors = []
self.badboxes = []
self.missing_refs = []
self.version = None
self.context_lines = context_lines

def __str__(self):
Expand All @@ -117,6 +127,8 @@ def process(self, lines):

:param lines: Iterable over lines of log.
"""
self.process_header(lines)

lines_iterable = _LineIterWrapper(lines, self.context_lines)

# cache the line processor for speed
Expand Down Expand Up @@ -207,7 +219,7 @@ def process_warning(self, match):

# Regex match groups
# 0 - Whole match (line)
# 1 - Type ((?:La|pdf)TeX|Package|Class)
# 1 - Type ((?:\w*)TeX|Package|Class)
# 2 - Package or Class name (\w*)
# 3 - extra
# 4 - Warning message (.*)
Expand Down Expand Up @@ -243,7 +255,7 @@ def process_error(self, match):

# Regex match groups
# 0 - Whole match (line)
# 1 - Type (LaTeX|Package|Class)
# 1 - Type ((?:\w*)TeX|Package|Class)
# 2 - Package or Class (\w+)
# 3 - extra (\(([\\]\w+)\))
# 4 - Error message for typed error (.*)
Expand Down Expand Up @@ -288,7 +300,37 @@ def process_missing_ref(self, match):
self.missing_refs.append(message)
return message

def process_engine(self, match):
message = LogFileMessage()
message['engine'] = match.group(1)
message['version'] = match.group(2)

self.engine = message
return message



def process_header(self, lines):
"""
The first line of output should contain information about the engine, e.g. LuaHBTeX, Version 1.13.0, among other information.
We attempt to read it and silently fail if we can't since it is not crucial for the subsequent work of the parser.
"""
try:
first_line = next(lines)
except StopIteration:
return

engine_match = self.engine.match(first_line)
if engine_match is None:
return

self.process_engine(engine_match)
engine_name = self.engine['engine']

if (isinstance(lines, io.TextIOBase) and codecs.lookup(lines.encoding) == codecs.lookup('utf-8') and engine_name in KNOWN_NONUNICODE_ENGINES):
warnings.warn(
' '.join((
f'You are attempting to read unicode output from the non-unicode engine {engine_name}.',
'This will likely result in a UnicodeDecodeError.',
"Consider changing the encoding to 'latin-1' when reading the file."
)),
UnicodeWarning
)