diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index bd170c9..8e0142d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,10 +13,12 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v2-beta + - uses: actions/checkout@v4 with: - node-version: '20' + submodules: recursive + - uses: actions/setup-node@v4 + with: + node-version: '22' - uses: ArtiomTr/jest-coverage-report-action@v2 id: coverage with: diff --git a/README.md b/README.md index df3acf0..49956b2 100644 --- a/README.md +++ b/README.md @@ -219,6 +219,54 @@ HTML per se is not strict XML. Because of that, starting on version 2.0.0, this - Tags like `
`, `` and `` don't need to be closed. The output for these tags doesn't close them (adding a `/` before the tag closes, or a corresponding close tag); - This rule doesn't apply for XHTML, which is strict XML. +### Whitespace Handling + +This library supports `xsl:strip-space` and `xsl:preserve-space` for controlling whitespace in the input document. + +#### `xsl:strip-space` + +Use `` to remove whitespace-only text nodes from specified elements in the input document: + +```xml + + + + + + + + + +``` + +The `elements` attribute accepts: +- `*` - matches all elements +- `name` - matches elements with the specified local name +- `prefix:*` - matches all elements in a namespace +- `prefix:name` - matches a specific element in a namespace +- Multiple patterns separated by whitespace (e.g., `"book chapter section"`) + +#### `xsl:preserve-space` + +Use `` to preserve whitespace in specific elements, overriding `xsl:strip-space`: + +```xml + + + + + + + +``` + +#### Precedence Rules + +1. `xml:space="preserve"` attribute on an element takes highest precedence +2. `xsl:preserve-space` overrides `xsl:strip-space` for matching elements +3. `xsl:strip-space` applies to remaining matches +4. By default (no declarations), whitespace is preserved + ## References - XPath Specification: http://www.w3.org/TR/1999/REC-xpath-19991116 diff --git a/src/xslt/xslt.ts b/src/xslt/xslt.ts index 0c0c4d2..03ded8c 100644 --- a/src/xslt/xslt.ts +++ b/src/xslt/xslt.ts @@ -81,6 +81,19 @@ export class Xslt { version: string; firstTemplateRan: boolean; + /** + * List of element name patterns from xsl:strip-space declarations. + * Whitespace-only text nodes inside matching elements will be stripped. + */ + stripSpacePatterns: string[]; + + /** + * List of element name patterns from xsl:preserve-space declarations. + * Whitespace-only text nodes inside matching elements will be preserved. + * preserve-space takes precedence over strip-space for conflicting patterns. + */ + preserveSpacePatterns: string[]; + constructor( options: Partial = { cData: true, @@ -100,6 +113,8 @@ export class Xslt { }; this.outputMethod = 'xml'; this.outputOmitXmlDeclaration = 'no'; + this.stripSpacePatterns = []; + this.preserveSpacePatterns = []; this.decimalFormatSettings = { decimalSeparator: '.', groupingSeparator: ',', @@ -237,14 +252,16 @@ export class Xslt { await this.xsltVariable(context, template, false); break; case 'preserve-space': - throw new Error(`not implemented: ${template.localName}`); + this.xsltPreserveSpace(template); + break; case 'processing-instruction': throw new Error(`not implemented: ${template.localName}`); case 'sort': this.xsltSort(context, template); break; case 'strip-space': - throw new Error(`not implemented: ${template.localName}`); + this.xsltStripSpace(template); + break; case 'stylesheet': case 'transform': await this.xsltTransformOrStylesheet(context, template, output); @@ -435,6 +452,10 @@ export class Xslt { } if (source.nodeType == DOM_TEXT_NODE) { + // Check if this whitespace-only text node should be stripped + if (this.shouldStripWhitespaceNode(source)) { + return null; + } let node = domCreateTextNode(this.outputDocument, source.nodeValue); node.siblingPosition = destination.childNodes.length; domAppendChild(destination, node); @@ -722,6 +743,127 @@ export class Xslt { this.xPath.xPathSort(context, sort); } + /** + * Implements `xsl:strip-space`. + * Collects element name patterns for which whitespace-only text nodes should be stripped. + * @param template The `` node. + */ + protected xsltStripSpace(template: XNode) { + const elements = xmlGetAttribute(template, 'elements'); + if (elements) { + // Split on whitespace to get individual patterns (e.g., "* book" becomes ["*", "book"]) + const patterns = elements.trim().split(/\s+/); + this.stripSpacePatterns.push(...patterns); + } + } + + /** + * Implements `xsl:preserve-space`. + * Collects element name patterns for which whitespace-only text nodes should be preserved. + * preserve-space takes precedence over strip-space for matching elements. + * @param template The `` node. + */ + protected xsltPreserveSpace(template: XNode) { + const elements = xmlGetAttribute(template, 'elements'); + if (elements) { + // Split on whitespace to get individual patterns (e.g., "pre code" becomes ["pre", "code"]) + const patterns = elements.trim().split(/\s+/); + this.preserveSpacePatterns.push(...patterns); + } + } + + /** + * Determines if a text node from the input document should be stripped. + * This applies xsl:strip-space and xsl:preserve-space rules to whitespace-only text nodes. + * @param textNode The text node to check. + * @returns True if the text node should be stripped (not included in output). + */ + protected shouldStripWhitespaceNode(textNode: XNode): boolean { + // Only strip whitespace-only text nodes + if (!textNode.nodeValue || !textNode.nodeValue.match(/^\s*$/)) { + return false; + } + + // If no strip-space patterns are defined, don't strip + if (this.stripSpacePatterns.length === 0) { + return false; + } + + const parentElement = textNode.parentNode; + if (!parentElement || parentElement.nodeType !== DOM_ELEMENT_NODE) { + return false; + } + + // Check for xml:space="preserve" on parent or ancestors (highest precedence) + let ancestor = parentElement; + while (ancestor && ancestor.nodeType === DOM_ELEMENT_NODE) { + const xmlspace = domGetAttributeValue(ancestor, 'xml:space'); + if (xmlspace === 'preserve') { + return false; + } + if (xmlspace === 'default') { + break; // Continue to check strip-space/preserve-space rules + } + ancestor = ancestor.parentNode; + } + + const parentName = parentElement.localName || parentElement.nodeName; + + // Check preserve-space patterns first (they take precedence over strip-space) + for (const pattern of this.preserveSpacePatterns) { + if (this.matchesNamePattern(parentName, pattern, parentElement)) { + return false; + } + } + + // Check strip-space patterns + for (const pattern of this.stripSpacePatterns) { + if (this.matchesNamePattern(parentName, pattern, parentElement)) { + return true; + } + } + + return false; + } + + /** + * Matches an element name against a strip-space/preserve-space pattern. + * Supports: + * - "*" matches any element + * - "prefix:*" matches any element in a namespace + * - "name" matches elements with that local name + * - "prefix:name" matches elements with that QName + * @param elementName The local name of the element. + * @param pattern The pattern to match against. + * @param element The element node (for namespace checking). + * @returns True if the element matches the pattern. + */ + protected matchesNamePattern(elementName: string, pattern: string, element: XNode): boolean { + // Universal match + if (pattern === '*') { + return true; + } + + // Handle patterns with namespace prefixes + if (pattern.includes(':')) { + const [prefix, localPart] = pattern.split(':'); + + // Check if element has a matching prefix + const elementPrefix = element.prefix || ''; + + if (localPart === '*') { + // prefix:* - match any element in that namespace + return elementPrefix === prefix; + } else { + // prefix:name - match specific element in namespace + return elementPrefix === prefix && elementName === localPart; + } + } + + // Simple name match (no namespace prefix in pattern) + return elementName === pattern; + } + /** * Implements `xsl:template`. * @param context The Expression Context. @@ -966,6 +1108,12 @@ export class Xslt { */ private commonLogicTextNode(context: ExprContext, template: XNode, output: XNode) { if (output) { + // Check if this whitespace-only text node should be stripped based on + // xsl:strip-space and xsl:preserve-space declarations + if (this.shouldStripWhitespaceNode(template)) { + return; + } + let node = domCreateTextNode(this.outputDocument, template.nodeValue); // Set siblingPosition to preserve insertion order during serialization node.siblingPosition = output.childNodes.length; diff --git a/tests/xslt/strip-space.test.ts b/tests/xslt/strip-space.test.ts new file mode 100644 index 0000000..f930054 --- /dev/null +++ b/tests/xslt/strip-space.test.ts @@ -0,0 +1,290 @@ +import assert from 'assert'; + +import { XmlParser } from "../../src/dom"; +import { Xslt } from "../../src/xslt"; + +describe('xsl:strip-space', () => { + it('Basic strip-space with wildcard (*) - Issue 100', async () => { + const xmlString = ` + + + First + + + Second + +`; + + const xsltString = ` + + + + + + + + + + + +`; + + const xsltClass = new Xslt(); + const xmlParser = new XmlParser(); + const xml = xmlParser.xmlParse(xmlString); + const xslt = xmlParser.xmlParse(xsltString); + + const outXmlString = await xsltClass.xsltProcess(xml, xslt); + + // Without strip-space, there would be whitespace between entries + assert.equal(outXmlString, `FirstSecond`); + }); + + it('Strip-space with specific element name', async () => { + const xmlString = ` + + Text with spaces + Text with spaces +`; + + const xsltString = ` + + + + + + + +`; + + const xsltClass = new Xslt(); + const xmlParser = new XmlParser(); + const xml = xmlParser.xmlParse(xmlString); + const xslt = xmlParser.xmlParse(xsltString); + + const outXmlString = await xsltClass.xsltProcess(xml, xslt); + + // Whitespace between root's children is stripped, but content of item/other is preserved + assert.equal(outXmlString, ` Text with spaces Text with spaces `); + }); + + it('Strip-space with multiple element names', async () => { + const xmlString = ` + + + content + + + other + +`; + + const xsltString = ` + + + + + + + +`; + + const xsltClass = new Xslt(); + const xmlParser = new XmlParser(); + const xml = xmlParser.xmlParse(xmlString); + const xslt = xmlParser.xmlParse(xsltString); + + const outXmlString = await xsltClass.xsltProcess(xml, xslt); + + // Whitespace inside root and a is stripped, but inside c it's preserved + assert.ok(outXmlString.includes('content')); + assert.ok(outXmlString.includes('')); + }); + + it('Preserve-space overrides strip-space', async () => { + const xmlString = ` + +
+        preserved
+    
+ + also preserved + +
`; + + const xsltString = ` + + + + + + + + +`; + + const xsltClass = new Xslt(); + const xmlParser = new XmlParser(); + const xml = xmlParser.xmlParse(xmlString); + const xslt = xmlParser.xmlParse(xsltString); + + const outXmlString = await xsltClass.xsltProcess(xml, xslt); + + // Whitespace inside pre and code should be preserved despite strip-space="*" + assert.ok(outXmlString.includes('preserved')); + assert.ok(outXmlString.includes('also preserved')); + // Root's direct whitespace children should be stripped + assert.ok(outXmlString.startsWith('
'));
+    });
+
+    it('xml:space="preserve" takes precedence over strip-space', async () => {
+        const xmlString = `
+
+    
+        whitespace here
+    
+`;
+
+        const xsltString = `
+
+    
+    
+
+    
+        
+    
+`;
+
+        const xsltClass = new Xslt();
+        const xmlParser = new XmlParser();
+        const xml = xmlParser.xmlParse(xmlString);
+        const xslt = xmlParser.xmlParse(xsltString);
+
+        const outXmlString = await xsltClass.xsltProcess(xml, xslt);
+
+        // Whitespace inside item should be preserved due to xml:space="preserve"
+        assert.ok(outXmlString.includes('whitespace here'));
+    });
+
+    it('No strip-space by default - whitespace text nodes are processed', async () => {
+        // This test verifies that without strip-space, whitespace-only text nodes
+        // from the input are processed. With apply-templates, text nodes create
+        // output (though serialization may normalize whitespace for display).
+        const xmlString = `
+text`;
+
+        const xsltString = `
+
+    
+
+    
+        
+    
+`;
+
+        const xsltClass = new Xslt();
+        const xmlParser = new XmlParser();
+        const xml = xmlParser.xmlParse(xmlString);
+        const xslt = xmlParser.xmlParse(xsltString);
+
+        const outXmlString = await xsltClass.xsltProcess(xml, xslt);
+
+        // Without strip-space, content should be preserved
+        assert.equal(outXmlString, `text`);
+    });
+
+    it('Strip-space applies to apply-templates', async () => {
+        const xmlString = `
+
+    
+        First Book
+    
+    
+        Second Book
+    
+`;
+
+        const xsltString = `
+
+    
+    
+
+    
+        
+    
+
+    
+        
+    
+`;
+
+        const xsltClass = new Xslt();
+        const xmlParser = new XmlParser();
+        const xml = xmlParser.xmlParse(xmlString);
+        const xslt = xmlParser.xmlParse(xsltString);
+
+        const outXmlString = await xsltClass.xsltProcess(xml, xslt);
+
+        assert.equal(outXmlString, `First BookSecond Book`);
+    });
+
+    it('Non-whitespace text is never stripped', async () => {
+        const xmlString = `
+actual text content`;
+
+        const xsltString = `
+
+    
+    
+
+    
+        
+    
+`;
+
+        const xsltClass = new Xslt();
+        const xmlParser = new XmlParser();
+        const xml = xmlParser.xmlParse(xmlString);
+        const xslt = xmlParser.xmlParse(xsltString);
+
+        const outXmlString = await xsltClass.xsltProcess(xml, xslt);
+
+        // Text content should never be stripped
+        assert.equal(outXmlString, `actual text content`);
+    });
+});
+
+describe('xsl:preserve-space', () => {
+    it('Preserve-space keeps whitespace in specified elements', async () => {
+        const xmlString = `
+
+    
+    function hello() {
+        return "world";
+    }
+    
+`;
+
+        const xsltString = `
+
+    
+    
+    
+
+    
+        
+    
+`;
+
+        const xsltClass = new Xslt();
+        const xmlParser = new XmlParser();
+        const xml = xmlParser.xmlParse(xmlString);
+        const xslt = xmlParser.xmlParse(xsltString);
+
+        const outXmlString = await xsltClass.xsltProcess(xml, xslt);
+
+        // Code content formatting should be preserved
+        assert.ok(outXmlString.includes('function hello()'));
+        assert.ok(outXmlString.includes('return "world"'));
+    });
+});