arXiv · norbusan · Feb 19, 2025
diff --git a/tex2pdf-tools/tests/preflight/fixture/html_multi/another.html b/tex2pdf-tools/tests/preflight/fixture/html_multi/another.html
diff --git a/tex2pdf-tools/tests/preflight/fixture/html_multi/figure1.eps b/tex2pdf-tools/tests/preflight/fixture/html_multi/figure1.eps
diff --git a/tex2pdf-tools/tests/preflight/fixture/html_multi/figure2.png b/tex2pdf-tools/tests/preflight/fixture/html_multi/figure2.png
diff --git a/tex2pdf-tools/tests/preflight/fixture/html_multi/figure3.jpg b/tex2pdf-tools/tests/preflight/fixture/html_multi/figure3.jpg
diff --git a/tex2pdf-tools/tests/preflight/fixture/html_multi/paper.html b/tex2pdf-tools/tests/preflight/fixture/html_multi/paper.html
@@ -0,0 +1,13 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/strict.dtd"><html lang='eng'>
+<head>
+<meta http-equiv='Content-Type' content='text/html; charset=utf-8'>
+<title>HTML Submission Test</title>
+</head>
+
+<body>
+<center>
+	Hello World
+</center>
+</body>
+</html>
+
diff --git a/tex2pdf-tools/tests/preflight/fixture/multi-pdf/foo.pdf b/tex2pdf-tools/tests/preflight/fixture/multi-pdf/foo.pdf
diff --git a/tex2pdf-tools/tests/preflight/fixture/multi-pdf/hello-world.pdf b/tex2pdf-tools/tests/preflight/fixture/multi-pdf/hello-world.pdf
diff --git a/tex2pdf-tools/tests/preflight/test_preflight.py b/tex2pdf-tools/tests/preflight/test_preflight.py
@@ -163,6 +163,25 @@ def test_preflight_pdf_only_submission(self):
         )
         self.assertEqual(pf.detected_toplevel_files[0].process.compiler.compiler_string, "pdf_submission")
 
+    def test_preflight_multi_pdf_only_submission(self):
+        """Test PDF only submission."""
+        dir_path = os.path.join(self.fixture_dir, "multi-pdf")
+        pf: PreflightResponse = generate_preflight_response(dir_path)
+        self.assertEqual(pf.status.key.value, "success")
+        self.assertEqual(len(pf.detected_toplevel_files), 2)
+        self.assertEqual(pf.detected_toplevel_files[0].filename, "foo.pdf")
+        self.assertEqual(pf.detected_toplevel_files[1].filename, "hello-world.pdf")
+        self.assertEqual(
+            pf.detected_toplevel_files[0].process.compiler.model_dump_json(exclude_none=True, exclude_defaults=True),
+            """{"engine":"unknown","lang":"pdf","output":"unknown","postp":"none"}""",
+        )
+        self.assertEqual(
+            pf.detected_toplevel_files[1].process.compiler.model_dump_json(exclude_none=True, exclude_defaults=True),
+            """{"engine":"unknown","lang":"pdf","output":"unknown","postp":"none"}""",
+        )
+        self.assertEqual(pf.detected_toplevel_files[0].process.compiler.compiler_string, "pdf_submission")
+        self.assertEqual(pf.detected_toplevel_files[1].process.compiler.compiler_string, "pdf_submission")
+
     def test_preflight_html_only_submission(self):
         """Test HTML only submission."""
         dir_path = os.path.join(self.fixture_dir, "html_1")
@@ -179,6 +198,31 @@ def test_preflight_html_only_submission(self):
             "html_submission"
         )
 
+    def test_preflight_multi_html_only_submission(self):
+        """Test HTML only submission."""
+        dir_path = os.path.join(self.fixture_dir, "html_multi")
+        pf: PreflightResponse = generate_preflight_response(dir_path)
+        self.assertEqual(pf.status.key.value, "success")
+        self.assertEqual(len(pf.detected_toplevel_files), 2)
+        self.assertEqual(pf.detected_toplevel_files[0].filename, "another.html")
+        self.assertEqual(pf.detected_toplevel_files[1].filename, "paper.html")
+        self.assertEqual(
+            pf.detected_toplevel_files[0].process.compiler.model_dump_json(exclude_none=True, exclude_defaults=True),
+            """{"engine":"unknown","lang":"html","output":"unknown","postp":"none"}""",
+        )
+        self.assertEqual(
+            pf.detected_toplevel_files[1].process.compiler.model_dump_json(exclude_none=True, exclude_defaults=True),
+            """{"engine":"unknown","lang":"html","output":"unknown","postp":"none"}""",
+        )
+        self.assertEqual(
+            pf.detected_toplevel_files[0].process.compiler.compiler_string,
+            "html_submission"
+        )
+        self.assertEqual(
+            pf.detected_toplevel_files[1].process.compiler.compiler_string,
+            "html_submission"
+        )
+
     def test_anc_files_submission(self):
         """Test submission with ancillary files."""
         dir_path = os.path.join(self.fixture_dir, "anc_files_1")

diff --git a/tex2pdf-tools/tex2pdf_tools/preflight/__init__.py b/tex2pdf-tools/tex2pdf_tools/preflight/__init__.py
@@ -10,7 +10,7 @@
 from enum import Enum
 from itertools import zip_longest
 from pprint import pformat
-from typing import TypeVar
+from typing import TypeVar, cast
 
 import chardet
 from pydantic import BaseModel, Field
@@ -993,7 +993,7 @@ def parse_file(basedir: str, filename: str) -> ParsedTeXFile:
     return n
 
 
-def parse_dir(rundir: str) -> tuple[dict[str, ParsedTeXFile] | ToplevelFile, list[str]]:
+def parse_dir(rundir: str) -> tuple[dict[str, ParsedTeXFile] | dict[str, ToplevelFile], list[str]]:
     """Parse all TeX files in a directory."""
     glob_files = glob.glob(f"{rundir}/**/*", recursive=True)
     # strip rundir/ prefix
@@ -1007,19 +1007,36 @@ def parse_dir(rundir: str) -> tuple[dict[str, ParsedTeXFile] | ToplevelFile, lis
     # needs more extensions that we support
     tex_files = [t for t in files if os.path.splitext(t)[1].lower() in PARSED_FILE_EXTENSIONS]
     if not tex_files:
-        # we didn't find any tex file, check for a single PDF file
-        if len(files) == 1 and files[0].lower().endswith(".pdf"):
-            # PDF only submission, only one PDF file, nothing else
-            return ToplevelFile(
-                filename=files[0], process=MainProcessSpec(compiler=CompilerSpec(compiler=PDF_SUBMISSION_STRING))
-            ), anc_files
-        else:
-            # check for HTML submissions
+        ret: dict[str, ToplevelFile] = {}
+        # we didn't find any tex file, check for a PDF only submission
+        only_pdf: bool | None = None
+        for f in sorted(files):
+            if f.lower().endswith(".pdf"):
+                if only_pdf is None:
+                    only_pdf = True
+                # if it was True or False already, we can leave it
+
+            else:
+                only_pdf = False
+                break
+        if only_pdf:
             for f in sorted(files):
-                if f.lower().endswith(".html"):
-                    return ToplevelFile(
-                        filename=f, process=MainProcessSpec(compiler=CompilerSpec(compiler=HTML_SUBMISSION_STRING))
-                    ), anc_files
+                ret[f] = ToplevelFile(
+                    filename=f, process=MainProcessSpec(compiler=CompilerSpec(compiler=PDF_SUBMISSION_STRING))
+                )
+            return ret, anc_files
+        # if we are still here, then it is not a PDF only submission.
+        # Check for the presence of HTML files
+        has_html: bool | None = None
+        for f in sorted(files):
+            if f.lower().endswith(".html"):
+                has_html = True
+                ret[f] = ToplevelFile(
+                    filename=f, process=MainProcessSpec(compiler=CompilerSpec(compiler=HTML_SUBMISSION_STRING))
+                )
+        if has_html:
+            return ret, anc_files
+
     nodes = {f: parse_file(rundir, f) for f in tex_files}
     # print(nodes)
     return nodes, anc_files
@@ -1381,20 +1398,22 @@ def deal_with_indices(rundir: str, toplevel_files: dict[str, ToplevelFile], node
 def _generate_preflight_response_dict(rundir: str) -> PreflightResponse:
     """Parse submission and generated preflight response as dictionary."""
     # parse files
-    n: dict[str, ParsedTeXFile] | ToplevelFile
+    n: dict[str, ParsedTeXFile] | dict[str, ToplevelFile]
     anc_files: list[str]
     nodes: dict[str, ParsedTeXFile]
     roots: dict[str, ParsedTeXFile]
     toplevel_files: dict[str, ToplevelFile]
 
     n, anc_files = parse_dir(rundir)
-    if isinstance(n, ToplevelFile):
-        # pdf only submission, we received the toplevel file already
-        toplevel_files = {n.filename: n}
+    # we cannot do isinstance(n, dict[str, ToplevelFile), so we check that
+    # the first (or any) value is a ToplevelFile
+    if n and isinstance(list(n.values())[0], ToplevelFile):  # noqa
+        # pdf or html submission, we received the toplevel file already
+        toplevel_files = cast(dict[str, ToplevelFile], n)  # mypy cannot deduce this
         nodes = {}
         status = PreflightStatus(key=PreflightStatusValues.success)
     else:
-        nodes = n
+        nodes = cast(dict[str, ParsedTeXFile], n)  # mypy cannot deduce this
         if nodes == {}:
             roots = {}
             toplevel_files = {}