Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion src/main/java/org/owasp/html/HtmlLexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,7 @@ private static enum State {
COMMENT,
COMMENT_DASH,
COMMENT_DASH_DASH,
COMMENT_DASH_AFTER_BANG,
DIRECTIVE,
DONE,
BOGUS_COMMENT,
Expand Down Expand Up @@ -640,17 +641,30 @@ && canonicalElementName(start + 2, end)
case BANG:
if ('-' == ch) {
state = State.BANG_DASH;
} else if('>' == ch) { // <!> is a valid html comment
state = State.DONE;
type = HtmlTokenType.COMMENT;
} else {
state = State.DIRECTIVE;
}
break;
case BANG_DASH:
if ('-' == ch) {
state = State.COMMENT;
state = State.COMMENT_DASH_AFTER_BANG;
} else {
state = State.DIRECTIVE;
}
break;
case COMMENT_DASH_AFTER_BANG:
if ('>' == ch) { // <!--> is a valid html comment
state = State.DONE;
type = HtmlTokenType.COMMENT;
} else if ('-' == ch) { // <!---> is a valid html comment
state = State.COMMENT_DASH_AFTER_BANG;
} else {
state = State.COMMENT;
}
break;
case COMMENT:
if ('-' == ch) {
state = State.COMMENT_DASH;
Expand All @@ -665,6 +679,8 @@ && canonicalElementName(start + 2, end)
if ('>' == ch) {
state = State.DONE;
type = HtmlTokenType.COMMENT;
} else if ('!' == ch) { // --!> is also valid closing sequence
state = State.COMMENT_DASH_DASH;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would seem to suggest that<!-- --!-> is a whole comment tag.
iiuc, after <!-- --!- we should be in the comment_dash state.

Perhaps we need ! to transition to COMMENT_DASH_DASH_BANG here which transitions as does case COMMENT above.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this look right?

flowchart TD
  BANG -- "-" --> BANG_DASH;
  BANG_DASH -- "-" --> COMMENT_DASH_AFTER_BANG;
  BANG_DASH -- "else" --> DIRECTIVE;
  COMMENT_DASH_AFTER_BANG -- "-" --> COMMENT_DASH_AFTER_BANG;
  COMMENT_DASH_AFTER_BANG -- ">" --> DONE;
  COMMENT_DASH_AFTER_BANG -- "else" --> COMMENT;
  COMMENT -- "-" --> COMMENT_DASH;
  COMMENT -- "else" --> COMMENT;
  COMMENT_DASH -- "-" --> COMMENT_DASH_DASH;
  COMMENT_DASH -- "else" --> COMMENT;
  COMMENT_DASH_DASH -- ">" --> DONE;
  COMMENT_DASH_DASH -- "-" --> COMMENT_DASH_DASH;
  COMMENT_DASH_DASH -- "!" --> COMMENT_DASH_DASH_BANG;
  COMMENT_DASH_DASH_BANG -- ">" --> DONE;
  COMMENT_DASH_DASH_BANG -- "-" --> COMMENT_DASH;
  COMMENT_DASH_DASH_BANG -- "else" --> COMMENT;
Loading

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this seems correct. The missing part is the COMMENT_DASH_AFTER_BANG case and the <!> scenario too. But the new COMMENT_DASH_DASH_BANG suggestion seems correct to me.

Copy link
Author

@jfbyers jfbyers Sep 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @mikesamuel , this makes sense. I implemented your proposed changes and added new tests. All the use cases discussed so far pass the tests. Can you please validate / approve the MR? Thanks!

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jfbyers Changes look good to me, let see what mike's thoughts are :)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@subbudvk @mikesamuel are you ok moving forward with this PR ? What are the next steps? Thank you.

} else if ('-' == ch) {
state = State.COMMENT_DASH_DASH;
} else {
Expand Down
39 changes: 39 additions & 0 deletions src/test/java/org/owasp/html/HtmlLexerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,45 @@ public static final void testShortTags() {
"TAGEND: >");
}

@Test
public static final void testCommentDeclarationWith0CommentsAndXss() throws Exception
{
//check https://datatracker.ietf.org/doc/html/rfc1866#section-3.2.5
assertTokens("<!><img src=1 onError=alert(\"nice\")>",
"COMMENT: <!>",
"TAGBEGIN: <img",
"ATTRNAME: src",
"ATTRVALUE: 1",
"ATTRNAME: onError",
"ATTRVALUE: alert(\"nice\")",
"TAGEND: >"
);
}

@Test
public static final void testAbruptClosingOfEmptyComment() throws Exception
{
assertTokens("<!--><img>a<!--->b<!->c",
"COMMENT: <!-->",
"TAGBEGIN: <img",
"TAGEND: >",
"TEXT: a",
"COMMENT: <!--->",
"TEXT: b",
"SERVERCODE: <!->c"
);
}

@Test
public static final void testIncorrectlyClosedComment() throws Exception
{
assertTokens("<!-- Comment --!><img>",
"COMMENT: <!-- Comment --!>",
"TAGBEGIN: <img",
"TAGEND: >"
);
}

private static void lex(String input, Appendable out) throws Exception {
HtmlLexer lexer = new HtmlLexer(input);
int maxTypeLength = 0;
Expand Down