anthropics · cdxiaodong · Dec 14, 2025 · Dec 14, 2025 · Dec 14, 2025
diff --git a/skills/docx/ooxml.md b/skills/docx/ooxml.md
@@ -19,6 +19,25 @@
   - **trackRevisions placement**: Add `<w:trackRevisions/>` after `<w:proofState>` in settings.xml
 - **Images**: Add to `word/media/`, reference in `document.xml`, set dimensions to prevent overflow
 
+### XML Serialization Guidelines
+**CRITICAL: XML Serialization**
+When writing XML back to files after manipulation with defusedxml.minidom, you MUST preserve the original formatting:
+- ✅ **USE**: `doc.toxml(encoding='utf-8')` - Preserves original formatting, Word-compatible
+- ❌ **NEVER USE**: `doc.toprettyxml()` - Adds extra whitespace that breaks Word compatibility
+
+Microsoft Word is extremely sensitive to XML whitespace. Using `toprettyxml()` adds newlines and indentation that Word interprets as part of the document structure, causing documents to become unreadable. Always use `toxml()` to maintain the compact format Word expects.
+
+**When writing XML manually (not using Document class):**
+```python
+# ✅ CORRECT - preserves formatting:
+with open(doc_path, 'wb') as f:
+    f.write(doc.toxml(encoding='utf-8'))
+
+# ❌ WRONG - breaks Word compatibility:
+with open(doc_path, 'wb') as f:
+    f.write(doc.toprettyxml(indent='  ', encoding='utf-8'))
+```
+
 ## Document Content Patterns
 
 ### Basic Structure
@@ -607,4 +626,4 @@ The validator checks that the document text matches the original after reverting
 <w:ins w:author="Claude" w:id="51">
   <w:r><w:t>within 30 days</w:t></w:r>
 </w:ins>
-```
+```
diff --git a/skills/docx/ooxml/scripts/unpack.py b/skills/docx/ooxml/scripts/unpack.py
@@ -16,12 +16,16 @@
 output_path.mkdir(parents=True, exist_ok=True)
 zipfile.ZipFile(input_file).extractall(output_path)
 
-# Pretty print all XML files
+# Format all XML files - CRITICAL: Use toxml() to preserve Word compatibility
+# Microsoft Word is extremely sensitive to XML whitespace. Using toprettyxml()
+# adds newlines and indentation that break Word compatibility.
 xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels"))
 for xml_file in xml_files:
     content = xml_file.read_text(encoding="utf-8")
     dom = defusedxml.minidom.parseString(content)
-    xml_file.write_bytes(dom.toprettyxml(indent="  ", encoding="ascii"))
+    # ✅ CORRECT: Use toxml() to preserve original formatting and Word compatibility
+    # ❌ WRONG: Never use toprettyxml() - it adds whitespace that breaks Word
+    xml_file.write_bytes(dom.toxml(encoding="ascii"))
 
 # For .docx files, suggest an RSID for tracked changes
 if input_file.endswith(".docx"):

diff --git a/skills/docx/scripts/utilities.py b/skills/docx/scripts/utilities.py
@@ -305,7 +305,13 @@ def save(self):
 
         Serializes the DOM tree and writes it back to the original file path,
         preserving the original encoding (ascii or utf-8).
+
+        CRITICAL: This method uses toxml() to preserve original formatting and
+        Word compatibility. Never use toprettyxml() as it adds whitespace that
+        breaks Word's ability to read the document.
         """
+        # ✅ CORRECT: Use toxml() to preserve original formatting and Word compatibility
+        # ❌ WRONG: Never use toprettyxml() - it adds whitespace that breaks Word
         content = self.dom.toxml(encoding=self.encoding)
         self.xml_path.write_bytes(content)