HenrikJoreteg · adrai · Feb 7, 2025
diff --git a/src/parse.js b/src/parse.js
@@ -24,7 +24,23 @@ export default function parse(html, options) {
     })
   }
 
-  html.replace(tagRE, function (tag, index) {
+  const matches = Array.from(html.matchAll(tagRE))
+  matches.forEach(function (match, i) {
+    const tag = match[0]
+    if (!tag) return console.log({ html, matches })
+    const amountOfLts = tag.split('<').length
+    const amountOfGts = tag.split('>').length
+    if (amountOfLts > 0 && amountOfLts > amountOfGts) {
+      const firstPart = tag.substring(0, tag.indexOf('<', tag.indexOf('<') + 1))
+      const secondPart = tag.substring(firstPart.length)
+      matches[i][0] = secondPart
+      matches[i].index += firstPart.length
+    }
+  })
+  matches.forEach(function (match, i) {
+    const tag = match[0]
+    if (!tag) return
+    const index = match.index
     if (inComponent) {
       if (tag !== '</' + current.name + '>') {
         return
@@ -36,6 +52,13 @@ export default function parse(html, options) {
     const isComment = tag.startsWith('<!--')
     const start = index + tag.length
     const nextChar = html.charAt(start)
+    const nextMatch = matches[i + 1]
+    let isText
+    if (nextChar === '<' && nextMatch)  {
+      const nextTag = html.substring(start, nextMatch.index)
+      isText = nextTag.split('<').length >  nextTag.split('>').length
+    }
+
     let parent
 
     if (isComment) {
@@ -66,9 +89,17 @@ export default function parse(html, options) {
         nextChar &&
         nextChar !== '<'
       ) {
+        let possibleContent = html.slice(start, html.indexOf('<', start))
+        const indexOfPossibleContent = html.indexOf(possibleContent, start)
+        const startAfterPossibleContent = indexOfPossibleContent + possibleContent.length + 1
+        const nextLt = html.indexOf('<', startAfterPossibleContent)
+        const nextGt = html.indexOf('>', startAfterPossibleContent)
+        if (nextLt > -1 && nextLt < nextGt) {
+          possibleContent = html.slice(start, html.indexOf('<', startAfterPossibleContent))
+        }
         current.children.push({
           type: 'text',
-          content: html.slice(start, html.indexOf('<', start)),
+          content: possibleContent,
         })
       }
 
@@ -95,15 +126,19 @@ export default function parse(html, options) {
         // move current up a level to match the end tag
         current = level === -1 ? result : arr[level]
       }
-      if (!inComponent && nextChar !== '<' && nextChar) {
+      if (!inComponent && (nextChar !== '<' || isText) && nextChar) {
         // trailing text node
         // if we're at the root, push a base text node. otherwise add as
         // a child to the current node.
         parent = level === -1 ? result : arr[level].children
 
         // calculate correct end of the content slice in case there's
         // no tag after the text node.
-        const end = html.indexOf('<', start)
+        let end = html.indexOf('<', start)
+        if (isText) {
+          const nextTag = html.substring(nextMatch.index)
+          end = html.indexOf(nextTag, start)
+        }
         let content = html.slice(start, end === -1 ? undefined : end)
         // if a node is nothing but whitespace, collapse it as the spec states:
         // https://www.w3.org/TR/html4/struct/text.html#h-9.1

diff --git a/test/parse.js b/test/parse.js
@@ -724,7 +724,7 @@ test('parse', function (t) {
         children: [
           {
             content:
-              "\n      !function() {\n        var cookies = document.cookie ? document.cookie.split(';') : [];\n        //                |   this less than is triggering probems\n        for (var i = 0; i ",
+              "\n      !function() {\n        var cookies = document.cookie ? document.cookie.split(';') : [];\n        //                |   this less than is triggering probems\n        for (var i = 0; i < cookies.length; i++) {\n          var splitted = cookies[i].split(\'=\');\n          var name = splitted[0];\n        }\n      }();\n      ",
             type: 'text',
           },
         ],
@@ -971,4 +971,81 @@ test('uppercase tags', function (t) {
     }
   ], 'should handle uppercase tags correctly')
   t.end()
+})
+
+test('open tag in html string', function (t) {
+  const html = '<0>hello under <10 <div>under 10</div> ok?</0>'
+  const parsed = HTML.parse(html)
+  t.deepEqual(parsed, [
+    {
+      "type": "tag",
+      "name": "0",
+      "voidElement": false,
+      "attrs": {},
+      "children": [
+        {
+          "type": "text",
+          "content": "hello under <10 "
+        },
+        {
+          "type": "tag",
+          "name": "div",
+          "voidElement": false,
+          "attrs": {},
+          "children": [
+            {
+              "type": "text",
+              "content": "under 10"
+            }
+          ]
+        },
+        {
+          "type": "text",
+          "content": " ok?"
+        }
+      ]
+    }
+  ], 'should handle uppercase tags correctly')
+  t.end()
+})
+
+
+test('open tag in html string complex', function (t) {
+  const html = 'hello <italic>under ten</italic><10 this text after the sign should be rendered<bold>END</bold>'
+  const parsed = HTML.parse(html)
+  t.deepEqual(parsed, [
+    {
+      "type": "text",
+      "content": "hello "
+    },
+    {
+      "type": "tag",
+      "name": "italic",
+      "voidElement": false,
+      "attrs": {},
+      "children": [
+        {
+          "type": "text",
+          "content": "under ten"
+        }
+      ]
+    },
+    {
+      "type": "text",
+      "content": "<10 this text after the sign should be rendered"
+    },
+    {
+      "type": "tag",
+      "name": "bold",
+      "voidElement": false,
+      "attrs": {},
+      "children": [
+        {
+          "type": "text",
+          "content": "END"
+        }
+      ]
+    }
+  ], 'should handle uppercase tags correctly')
+  t.end()
 })