Skip to content

Commit 70941dc

Browse files
committed
Search for XML markers in raw XACTDOC data as last resort
1 parent 28c78fc commit 70941dc

File tree

1 file changed

+31
-8
lines changed

1 file changed

+31
-8
lines changed

backend/app/services/esx_parser.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -142,16 +142,39 @@ async def parse_esx(self, file_contents: bytes, filename: str) -> Dict[str, Any]
142142
logger.error(f"First 200 bytes of decompressed: {decompressed[:200]}")
143143

144144
if not xml_files:
145-
# Not compressed, try parsing as raw XML
146-
logger.info("Not compressed, attempting to parse as XML directly")
145+
# Try searching for XML content in raw bytes (PKZip stored method)
146+
logger.info("Searching for XML markers in raw XACTDOC data")
147147
try:
148-
parsed = xmltodict.parse(nested_content)
149-
estimate_data[xactdoc_files[0]] = parsed
150-
xml_files = [xactdoc_files[0]]
148+
# Search for common XML start patterns
149+
xml_start_markers = [b'<?xml', b'<EstimateFile', b'<Estimate', b'<Project', b'<Claim']
150+
found_xml = False
151+
152+
for marker in xml_start_markers:
153+
pos = nested_content.find(marker)
154+
if pos >= 0:
155+
logger.info(f"Found XML marker '{marker.decode()}' at position {pos}")
156+
xml_data = nested_content[pos:]
157+
# Try to find end of XML
158+
xml_end_markers = [b'</EstimateFile>', b'</Estimate>', b'</Project>', b'</Claim>']
159+
for end_marker in xml_end_markers:
160+
end_pos = xml_data.find(end_marker)
161+
if end_pos >= 0:
162+
xml_data = xml_data[:end_pos + len(end_marker)]
163+
break
164+
165+
parsed = xmltodict.parse(xml_data)
166+
estimate_data[xactdoc_files[0]] = parsed
167+
xml_files = [xactdoc_files[0]]
168+
found_xml = True
169+
break
170+
171+
if not found_xml:
172+
raise ValueError("No XML markers found in XACTDOC data")
151173
except Exception as e:
152-
logger.error(f"Failed to parse XACTDOC as XML: {e}")
153-
logger.error(f"First 100 bytes: {nested_content[:100]}")
154-
raise ValueError(f"XACTDOC file could not be parsed: {e}")
174+
logger.error(f"Failed to extract XML from XACTDOC: {e}")
175+
logger.error(f"First 200 bytes: {nested_content[:200]}")
176+
logger.error(f"XACTDOC.ZIPXML uses proprietary Xactimate compression that cannot be decoded with standard libraries")
177+
raise ValueError(f"XACTDOC file uses proprietary compression format. Please contact Xactimate support for SDK/API access or export data in a different format.")
155178
else:
156179
# Direct XML files in root
157180
xml_files = [f for f in file_list if f.lower().endswith('.xml')]

0 commit comments

Comments
 (0)