Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Empty file.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions tex2pdf-tools/tests/preflight/fixture/html_multi/paper.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/strict.dtd"><html lang='eng'>
<head>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>
<title>HTML Submission Test</title>
</head>

<body>
<center>
Hello World
</center>
</body>
</html>

Empty file.
Binary file not shown.
44 changes: 44 additions & 0 deletions tex2pdf-tools/tests/preflight/test_preflight.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,25 @@ def test_preflight_pdf_only_submission(self):
)
self.assertEqual(pf.detected_toplevel_files[0].process.compiler.compiler_string, "pdf_submission")

def test_preflight_multi_pdf_only_submission(self):
"""Test PDF only submission."""
dir_path = os.path.join(self.fixture_dir, "multi-pdf")
pf: PreflightResponse = generate_preflight_response(dir_path)
self.assertEqual(pf.status.key.value, "success")
self.assertEqual(len(pf.detected_toplevel_files), 2)
self.assertEqual(pf.detected_toplevel_files[0].filename, "foo.pdf")
self.assertEqual(pf.detected_toplevel_files[1].filename, "hello-world.pdf")
self.assertEqual(
pf.detected_toplevel_files[0].process.compiler.model_dump_json(exclude_none=True, exclude_defaults=True),
"""{"engine":"unknown","lang":"pdf","output":"unknown","postp":"none"}""",
)
self.assertEqual(
pf.detected_toplevel_files[1].process.compiler.model_dump_json(exclude_none=True, exclude_defaults=True),
"""{"engine":"unknown","lang":"pdf","output":"unknown","postp":"none"}""",
)
self.assertEqual(pf.detected_toplevel_files[0].process.compiler.compiler_string, "pdf_submission")
self.assertEqual(pf.detected_toplevel_files[1].process.compiler.compiler_string, "pdf_submission")

def test_preflight_html_only_submission(self):
"""Test HTML only submission."""
dir_path = os.path.join(self.fixture_dir, "html_1")
Expand All @@ -179,6 +198,31 @@ def test_preflight_html_only_submission(self):
"html_submission"
)

def test_preflight_multi_html_only_submission(self):
"""Test HTML only submission."""
dir_path = os.path.join(self.fixture_dir, "html_multi")
pf: PreflightResponse = generate_preflight_response(dir_path)
self.assertEqual(pf.status.key.value, "success")
self.assertEqual(len(pf.detected_toplevel_files), 2)
self.assertEqual(pf.detected_toplevel_files[0].filename, "another.html")
self.assertEqual(pf.detected_toplevel_files[1].filename, "paper.html")
self.assertEqual(
pf.detected_toplevel_files[0].process.compiler.model_dump_json(exclude_none=True, exclude_defaults=True),
"""{"engine":"unknown","lang":"html","output":"unknown","postp":"none"}""",
)
self.assertEqual(
pf.detected_toplevel_files[1].process.compiler.model_dump_json(exclude_none=True, exclude_defaults=True),
"""{"engine":"unknown","lang":"html","output":"unknown","postp":"none"}""",
)
self.assertEqual(
pf.detected_toplevel_files[0].process.compiler.compiler_string,
"html_submission"
)
self.assertEqual(
pf.detected_toplevel_files[1].process.compiler.compiler_string,
"html_submission"
)

def test_anc_files_submission(self):
"""Test submission with ancillary files."""
dir_path = os.path.join(self.fixture_dir, "anc_files_1")
Expand Down
57 changes: 38 additions & 19 deletions tex2pdf-tools/tex2pdf_tools/preflight/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from enum import Enum
from itertools import zip_longest
from pprint import pformat
from typing import TypeVar
from typing import TypeVar, cast

import chardet
from pydantic import BaseModel, Field
Expand Down Expand Up @@ -993,7 +993,7 @@ def parse_file(basedir: str, filename: str) -> ParsedTeXFile:
return n


def parse_dir(rundir: str) -> tuple[dict[str, ParsedTeXFile] | ToplevelFile, list[str]]:
def parse_dir(rundir: str) -> tuple[dict[str, ParsedTeXFile] | dict[str, ToplevelFile], list[str]]:
"""Parse all TeX files in a directory."""
glob_files = glob.glob(f"{rundir}/**/*", recursive=True)
# strip rundir/ prefix
Expand All @@ -1007,19 +1007,36 @@ def parse_dir(rundir: str) -> tuple[dict[str, ParsedTeXFile] | ToplevelFile, lis
# needs more extensions that we support
tex_files = [t for t in files if os.path.splitext(t)[1].lower() in PARSED_FILE_EXTENSIONS]
if not tex_files:
# we didn't find any tex file, check for a single PDF file
if len(files) == 1 and files[0].lower().endswith(".pdf"):
# PDF only submission, only one PDF file, nothing else
return ToplevelFile(
filename=files[0], process=MainProcessSpec(compiler=CompilerSpec(compiler=PDF_SUBMISSION_STRING))
), anc_files
else:
# check for HTML submissions
ret: dict[str, ToplevelFile] = {}
# we didn't find any tex file, check for a PDF only submission
only_pdf: bool | None = None
for f in sorted(files):
if f.lower().endswith(".pdf"):
if only_pdf is None:
only_pdf = True
# if it was True or False already, we can leave it

else:
only_pdf = False
break
if only_pdf:
for f in sorted(files):
if f.lower().endswith(".html"):
return ToplevelFile(
filename=f, process=MainProcessSpec(compiler=CompilerSpec(compiler=HTML_SUBMISSION_STRING))
), anc_files
ret[f] = ToplevelFile(
filename=f, process=MainProcessSpec(compiler=CompilerSpec(compiler=PDF_SUBMISSION_STRING))
)
return ret, anc_files
# if we are still here, then it is not a PDF only submission.
# Check for the presence of HTML files
has_html: bool | None = None
for f in sorted(files):
if f.lower().endswith(".html"):
has_html = True
ret[f] = ToplevelFile(
filename=f, process=MainProcessSpec(compiler=CompilerSpec(compiler=HTML_SUBMISSION_STRING))
)
if has_html:
return ret, anc_files

nodes = {f: parse_file(rundir, f) for f in tex_files}
# print(nodes)
return nodes, anc_files
Expand Down Expand Up @@ -1381,20 +1398,22 @@ def deal_with_indices(rundir: str, toplevel_files: dict[str, ToplevelFile], node
def _generate_preflight_response_dict(rundir: str) -> PreflightResponse:
"""Parse submission and generated preflight response as dictionary."""
# parse files
n: dict[str, ParsedTeXFile] | ToplevelFile
n: dict[str, ParsedTeXFile] | dict[str, ToplevelFile]
anc_files: list[str]
nodes: dict[str, ParsedTeXFile]
roots: dict[str, ParsedTeXFile]
toplevel_files: dict[str, ToplevelFile]

n, anc_files = parse_dir(rundir)
if isinstance(n, ToplevelFile):
# pdf only submission, we received the toplevel file already
toplevel_files = {n.filename: n}
# we cannot do isinstance(n, dict[str, ToplevelFile), so we check that
# the first (or any) value is a ToplevelFile
if n and isinstance(list(n.values())[0], ToplevelFile): # noqa
# pdf or html submission, we received the toplevel file already
toplevel_files = cast(dict[str, ToplevelFile], n) # mypy cannot deduce this
nodes = {}
status = PreflightStatus(key=PreflightStatusValues.success)
else:
nodes = n
nodes = cast(dict[str, ParsedTeXFile], n) # mypy cannot deduce this
if nodes == {}:
roots = {}
toplevel_files = {}
Expand Down