Skip to content

Commit a7df985

Browse files
committed
Handle nested XACTDOC.ZIPXML structure in ESX files
1 parent a8bb6ac commit a7df985

1 file changed

Lines changed: 39 additions & 11 deletions

File tree

backend/app/services/esx_parser.py

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,49 @@ async def parse_esx(self, file_contents: bytes, filename: str) -> Dict[str, Any]
2929
file_list = zf.namelist()
3030
logger.info(f"ESX contains {len(file_list)} files: {file_list}")
3131

32-
# Case-insensitive XML file search
33-
xml_files = [f for f in file_list if f.lower().endswith('.xml')]
34-
35-
if not xml_files:
36-
logger.error(f"No XML files found. Files in archive: {file_list}")
37-
raise ValueError(f"No XML files found in ESX archive. Found {len(file_list)} files: {', '.join(file_list[:10])}")
32+
# Check for nested XACTDOC.ZIPXML structure
33+
xactdoc_files = [f for f in file_list if 'XACTDOC' in f.upper() and 'ZIP' in f.upper()]
3834

35+
xml_files = []
3936
estimate_data = {}
40-
for xml_file in xml_files:
41-
content = zf.read(xml_file)
37+
38+
if xactdoc_files:
39+
# Extract and parse nested ZIP
40+
logger.info(f"Found nested XACTDOC file: {xactdoc_files[0]}")
41+
nested_zip_content = zf.read(xactdoc_files[0])
42+
4243
try:
43-
parsed = xmltodict.parse(content)
44-
estimate_data[xml_file] = parsed
44+
with zipfile.ZipFile(io.BytesIO(nested_zip_content)) as nested_zf:
45+
nested_file_list = nested_zf.namelist()
46+
logger.info(f"Nested ZIP contains {len(nested_file_list)} files: {nested_file_list}")
47+
48+
xml_files = [f for f in nested_file_list if f.lower().endswith('.xml')]
49+
50+
for xml_file in xml_files:
51+
content = nested_zf.read(xml_file)
52+
try:
53+
parsed = xmltodict.parse(content)
54+
estimate_data[xml_file] = parsed
55+
except Exception as e:
56+
logger.warning(f"Could not parse {xml_file}: {e}")
4557
except Exception as e:
46-
logger.warning(f"Could not parse {xml_file}: {e}")
58+
logger.error(f"Could not extract nested ZIP: {e}")
59+
raise ValueError(f"Failed to extract nested XACTDOC archive: {e}")
60+
else:
61+
# Direct XML files in root
62+
xml_files = [f for f in file_list if f.lower().endswith('.xml')]
63+
64+
for xml_file in xml_files:
65+
content = zf.read(xml_file)
66+
try:
67+
parsed = xmltodict.parse(content)
68+
estimate_data[xml_file] = parsed
69+
except Exception as e:
70+
logger.warning(f"Could not parse {xml_file}: {e}")
71+
72+
if not xml_files:
73+
logger.error(f"No XML files found. Files in archive: {file_list}")
74+
raise ValueError(f"No XML files found in ESX archive. Found {len(file_list)} files: {', '.join(file_list[:10])}")
4775

4876
metadata = self._extract_metadata(estimate_data)
4977
preview = self._create_preview(estimate_data)

0 commit comments

Comments
 (0)