Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ and this project adheres to the
[Python Version Specification](https://packaging.python.org/en/latest/specifications/version-specifiers/).
See the [Contributing Guide](contributing.md) for details.

## [Unreleased]

### Fixed

* Fix an HTML comment parsing case in some Python versions that can cause an infinite loop (#1554).

## [3.9.0] - 2025-09-04

### Changed
Expand Down
19 changes: 19 additions & 0 deletions markdown/htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown

# Included for versions which do not have current comment fix
commentclose = re.compile(r'--!?>')
commentabruptclose = re.compile(r'-?>')

# Import a copy of the html.parser lib as `htmlparser` so we can monkeypatch it.
# Users can still do `from html import parser` and get the default behavior.
Expand Down Expand Up @@ -302,6 +305,22 @@ def parse_pi(self, i: int) -> int:
self.handle_data('<?')
return i + 2

if not hasattr(htmlparser, 'commentabruptclose'):
# Internal -- parse comment, return length or -1 if not terminated
# see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
def parse_comment(self, i, report=True):
rawdata = self.rawdata
assert rawdata.startswith('<!--', i), 'unexpected call to parse_comment()'
match = commentclose.search(rawdata, i+4)
if not match:
match = commentabruptclose.match(rawdata, i+4)
if not match:
return -1
if report:
j = match.start()
self.handle_comment(rawdata[i+4: j])
return match.end()

def parse_html_declaration(self, i: int) -> int:
if self.at_line_start() or self.intail:
if self.rawdata[i:i+3] == '<![' and not self.rawdata[i:i+9] == '<![CDATA[':
Expand Down
18 changes: 17 additions & 1 deletion tests/test_syntax/blocks/test_html_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1018,7 +1018,7 @@ def test_comment_in_code_block(self):
# Note: This is a change in behavior. Previously, Python-Markdown interpreted this in the same manner
# as browsers and all text after the opening comment tag was considered to be in a comment. However,
# that did not match the reference implementation. The new behavior does.
def test_unclosed_comment_(self):
def test_unclosed_comment(self):
self.assertMarkdownRenders(
self.dedent(
"""
Expand All @@ -1035,6 +1035,22 @@ def test_unclosed_comment_(self):
)
)

def test_invalid_comment_end(self):
self.assertMarkdownRenders(
self.dedent(
"""
<!-- This comment is malformed and never closes -- >
Some content after the bad comment.
"""
),
self.dedent(
"""
<p>&lt;!-- This comment is malformed and never closes -- &gt;
Some content after the bad comment.</p>
"""
)
)

def test_raw_processing_instruction_one_line(self):
self.assertMarkdownRenders(
"<?php echo '>'; ?>",
Expand Down