Skip to content

Commit 3ad5799

Browse files
authored
Fix GH-20395: \Dom\ParentNode::querySelector and \Dom\ParentNode::querySelectorAll requires elements in $selectors to be lowercase (#20409)
The selector needs to be compared in a lowercase manner. This also almost completely obsoletes the interned string optimization, so get rid of that for simplicity sake. While there is still theoretical benefit, it is only 1-2% in my random tests, not worth it anymore.
1 parent eee5dcc commit 3ad5799

File tree

5 files changed

+96
-47
lines changed

5 files changed

+96
-47
lines changed

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ PHP NEWS
1414
- DOM:
1515
. Fix memory leak when edge case is hit when registering xpath callback.
1616
(ndossche)
17+
. Fixed bug GH-20395 (querySelector and querySelectorAll requires elements
18+
in $selectors to be lowercase). (ndossche)
1719

1820
- Opcache:
1921
. Fixed bug GH-20329 (opcache.file_cache broken with full interned string

ext/dom/lexbor/lexbor/selectors-adapted/selectors.c

Lines changed: 50 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,22 @@ static void dom_lxb_str_wrapper_release(dom_lxb_str_wrapper *wrapper)
3535
}
3636
}
3737

38-
static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_literal(const xmlNode *node, const char *name)
38+
static bool lxb_selectors_str_cmp_loright(const char *lhs, const char *rhs)
39+
{
40+
while (true) {
41+
if (*rhs != zend_tolower_ascii(*lhs)) {
42+
return false;
43+
}
44+
if (!*lhs) {
45+
return true;
46+
}
47+
++rhs;
48+
++lhs;
49+
}
50+
}
51+
52+
/* `name` is lowercase */
53+
static zend_always_inline bool lxb_selectors_cmp_html_name_lit(const xmlNode *node, const char *name)
3954
{
4055
return strcmp((const char *) node->name, name) == 0;
4156
}
@@ -48,14 +63,15 @@ static zend_always_inline bool lxb_selectors_adapted_cmp_ns(const xmlNode *a, co
4863

4964
static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_id(const xmlNode *node, const lxb_selectors_adapted_id *id)
5065
{
51-
uintptr_t ptr = (uintptr_t) node->name;
52-
if (id->interned && (ptr & (ZEND_MM_ALIGNMENT - 1)) != 0) {
53-
/* It cannot be a heap-allocated string because the pointer is not properly aligned for a heap allocation.
54-
* Therefore, it must be interned into the dictionary pool. */
55-
return node->name == id->name;
66+
ZEND_ASSERT(node->doc != NULL);
67+
if (php_dom_ns_is_html_and_document_is_html(node)) {
68+
/* From https://html.spec.whatwg.org/#case-sensitivity-of-selectors:
69+
* The element name must be compared case sensitively _after_ converting the selector to lowercase.
70+
* E.g. selector "DIV" must match element "div" but not "Div". */
71+
return lxb_selectors_str_cmp_loright((const char *) id->name, (const char *) node->name);
72+
} else {
73+
return strcmp((const char *) node->name, (const char *) id->name) == 0;
5674
}
57-
58-
return strcmp((const char *) node->name, (const char *) id->name) == 0;
5975
}
6076

6177
static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNode *node, const lxb_char_t *name)
@@ -64,9 +80,8 @@ static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNod
6480
ZEND_ASSERT(node->doc != NULL);
6581
if (php_dom_ns_is_html_and_document_is_html(node)) {
6682
/* No need to handle DTD entities as we're in HTML. */
67-
size_t name_bound = strlen((const char *) name) + 1;
6883
for (const xmlAttr *cur = node->properties; cur != NULL; cur = cur->next) {
69-
if (lexbor_str_data_nlocmp_right(cur->name, name, name_bound)) {
84+
if (lxb_selectors_str_cmp_loright((const char *) name, (const char *) cur->name)) {
7085
attr = cur;
7186
break;
7287
}
@@ -154,18 +169,7 @@ static bool lxb_selectors_is_lowercased_html_attrib_name(const lxb_css_selector_
154169
static void lxb_selectors_adapted_set_entry_id_ex(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node)
155170
{
156171
entry->id.attr_case_insensitive = lxb_selectors_is_lowercased_html_attrib_name(selector);
157-
158-
if (node->doc != NULL && node->doc->dict != NULL) {
159-
const xmlChar *interned = xmlDictExists(node->doc->dict, selector->name.data, selector->name.length);
160-
if (interned != NULL) {
161-
entry->id.name = interned;
162-
entry->id.interned = true;
163-
return;
164-
}
165-
}
166-
167172
entry->id.name = selector->name.data;
168-
entry->id.interned = false;
169173
}
170174

171175
static zend_always_inline void lxb_selectors_adapted_set_entry_id(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node)
@@ -1686,8 +1690,8 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
16861690
case LXB_CSS_SELECTOR_PSEUDO_CLASS_ANY_LINK:
16871691
/* https://drafts.csswg.org/selectors/#the-any-link-pseudo */
16881692
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1689-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "a")
1690-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "area")))
1693+
&& (lxb_selectors_cmp_html_name_lit(node, "a")
1694+
|| lxb_selectors_cmp_html_name_lit(node, "area")))
16911695
{
16921696
return lxb_selectors_adapted_has_attr(node, "href");
16931697
}
@@ -1705,7 +1709,7 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
17051709
if (!php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) {
17061710
return false;
17071711
}
1708-
if (lxb_selectors_adapted_cmp_local_name_literal(node, "input")) {
1712+
if (lxb_selectors_cmp_html_name_lit(node, "input")) {
17091713
const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "type");
17101714
if (dom_attr == NULL) {
17111715
return false;
@@ -1729,7 +1733,7 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
17291733

17301734
return res;
17311735
}
1732-
else if(lxb_selectors_adapted_cmp_local_name_literal(node, "option")) {
1736+
else if(lxb_selectors_cmp_html_name_lit(node, "option")) {
17331737
return lxb_selectors_adapted_has_attr(node, "selected");
17341738
}
17351739

@@ -1802,8 +1806,8 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
18021806
case LXB_CSS_SELECTOR_PSEUDO_CLASS_LINK:
18031807
/* https://html.spec.whatwg.org/multipage/semantics-other.html#selector-link */
18041808
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1805-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "a")
1806-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "area")))
1809+
&& (lxb_selectors_cmp_html_name_lit(node, "a")
1810+
|| lxb_selectors_cmp_html_name_lit(node, "area")))
18071811
{
18081812
return lxb_selectors_adapted_has_attr(node, "href");
18091813
}
@@ -1823,9 +1827,9 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
18231827

18241828
case LXB_CSS_SELECTOR_PSEUDO_CLASS_OPTIONAL:
18251829
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1826-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
1827-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "select")
1828-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")))
1830+
&& (lxb_selectors_cmp_html_name_lit(node, "input")
1831+
|| lxb_selectors_cmp_html_name_lit(node, "select")
1832+
|| lxb_selectors_cmp_html_name_lit(node, "textarea")))
18291833
{
18301834
return !lxb_selectors_adapted_has_attr(node, "required");
18311835
}
@@ -1840,8 +1844,8 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
18401844

18411845
case LXB_CSS_SELECTOR_PSEUDO_CLASS_PLACEHOLDER_SHOWN:
18421846
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1843-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
1844-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")))
1847+
&& (lxb_selectors_cmp_html_name_lit(node, "input")
1848+
|| lxb_selectors_cmp_html_name_lit(node, "textarea")))
18451849
{
18461850
return lxb_selectors_adapted_has_attr(node, "placeholder");
18471851
}
@@ -1856,9 +1860,9 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
18561860

18571861
case LXB_CSS_SELECTOR_PSEUDO_CLASS_REQUIRED:
18581862
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1859-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
1860-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "select")
1861-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")))
1863+
&& (lxb_selectors_cmp_html_name_lit(node, "input")
1864+
|| lxb_selectors_cmp_html_name_lit(node, "select")
1865+
|| lxb_selectors_cmp_html_name_lit(node, "textarea")))
18621866
{
18631867
return lxb_selectors_adapted_has_attr(node, "required");
18641868
}
@@ -2104,32 +2108,32 @@ lxb_selectors_pseudo_class_disabled(const xmlNode *node)
21042108
}
21052109

21062110
if (lxb_selectors_adapted_has_attr(node, "disabled")
2107-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "button")
2108-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "input")
2109-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "select")
2110-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")
2111-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "optgroup")
2112-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset")))
2111+
&& (lxb_selectors_cmp_html_name_lit(node, "button")
2112+
|| lxb_selectors_cmp_html_name_lit(node, "input")
2113+
|| lxb_selectors_cmp_html_name_lit(node, "select")
2114+
|| lxb_selectors_cmp_html_name_lit(node, "textarea")
2115+
|| lxb_selectors_cmp_html_name_lit(node, "optgroup")
2116+
|| lxb_selectors_cmp_html_name_lit(node, "fieldset")))
21132117
{
21142118
return true;
21152119
}
21162120

2117-
if (lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset")) {
2121+
if (lxb_selectors_cmp_html_name_lit(node, "fieldset")) {
21182122
const xmlNode *fieldset = node;
21192123
node = node->parent;
21202124

21212125
while (node != NULL && CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
21222126
/* node is a disabled fieldset that is an ancestor of fieldset */
21232127
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
2124-
&& lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset")
2128+
&& lxb_selectors_cmp_html_name_lit(node, "fieldset")
21252129
&& lxb_selectors_adapted_has_attr(node, "disabled"))
21262130
{
21272131
/* Search first legend child and figure out if fieldset is a descendent from that. */
21282132
const xmlNode *search_current = node->children;
21292133
do {
21302134
if (search_current->type == XML_ELEMENT_NODE
21312135
&& php_dom_ns_is_fast(search_current, php_dom_ns_is_html_magic_token)
2132-
&& lxb_selectors_adapted_cmp_local_name_literal(search_current, "legend")) {
2136+
&& lxb_selectors_cmp_html_name_lit(search_current, "legend")) {
21332137
/* search_current is a legend element. */
21342138
const xmlNode *inner_search_current = fieldset;
21352139

@@ -2235,8 +2239,8 @@ static bool
22352239
lxb_selectors_pseudo_class_read_write(const xmlNode *node)
22362240
{
22372241
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) {
2238-
if (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
2239-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")) {
2242+
if (lxb_selectors_cmp_html_name_lit(node, "input")
2243+
|| lxb_selectors_cmp_html_name_lit(node, "textarea")) {
22402244
return !lxb_selectors_adapted_has_attr(node, "readonly") && !lxb_selectors_adapted_has_attr(node, "disabled");
22412245
} else {
22422246
const xmlAttr *attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "contenteditable");

ext/dom/lexbor/lexbor/selectors-adapted/selectors.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ typedef lxb_selectors_entry_t *
7777

7878
typedef struct {
7979
const xmlChar *name;
80-
bool interned;
8180
bool attr_case_insensitive;
8281
} lxb_selectors_adapted_id;
8382

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
--TEST--
2+
GH-20395 (\Dom\ParentNode::querySelector and \Dom\ParentNode::querySelectorAll requires elements in $selectors to be lowercase)
3+
--EXTENSIONS--
4+
dom
5+
--CREDITS--
6+
DeveloperRob
7+
--FILE--
8+
<?php
9+
10+
$html = '<!doctype html><html><head></head><body></body></html>';
11+
$dom = Dom\HtmlDocument::createFromString($html);
12+
var_dump(is_null($dom->querySelector('html')));
13+
var_dump(is_null($dom->querySelector('Html')));
14+
var_dump(is_null($dom->querySelector('HTML')));
15+
16+
$dom->body->appendChild($dom->createElement('div'));
17+
$dom->body->appendChild($dom->createElementNS('http://www.w3.org/1999/xhtml', 'Div'));
18+
19+
foreach ($dom->querySelectorAll('div') as $div) {
20+
var_dump($div->localName);
21+
}
22+
23+
foreach ($dom->querySelectorAll('Div') as $div) {
24+
var_dump($div->localName);
25+
}
26+
27+
?>
28+
--EXPECT--
29+
bool(false)
30+
bool(false)
31+
bool(false)
32+
string(3) "div"
33+
string(3) "div"

ext/dom/tests/modern/css_selectors/pseudo_classes_links.phpt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@ $dom = DOM\XMLDocument::createFromString(<<<XML
1111
<container>
1212
<a href="http://example.com">Link</a>
1313
<a xmlns="http://www.w3.org/1999/xhtml" href="http://example.com">Link</a>
14+
<A xmlns="http://www.w3.org/1999/xhtml" href="http://example.com">Not actually a link</A>
1415
<area xmlns="http://www.w3.org/1999/xhtml" href="http://example.com">Link</area>
1516
</container>
1617
XML);
1718

1819
test_helper($dom, ':any-link');
1920
test_helper($dom, ':link');
2021
test_helper($dom, 'a:not(:any-link)');
22+
test_helper($dom, ':not(:any-link)');
2123

2224
?>
2325
--EXPECT--
@@ -29,3 +31,12 @@ test_helper($dom, 'a:not(:any-link)');
2931
<area xmlns="http://www.w3.org/1999/xhtml" href="http://example.com">Link</area>
3032
--- Selector: a:not(:any-link) ---
3133
<a href="http://example.com">Link</a>
34+
--- Selector: :not(:any-link) ---
35+
<container>
36+
<a href="http://example.com">Link</a>
37+
<a xmlns="http://www.w3.org/1999/xhtml" href="http://example.com">Link</a>
38+
<A xmlns="http://www.w3.org/1999/xhtml" href="http://example.com">Not actually a link</A>
39+
<area xmlns="http://www.w3.org/1999/xhtml" href="http://example.com">Link</area>
40+
</container>
41+
<a href="http://example.com">Link</a>
42+
<A xmlns="http://www.w3.org/1999/xhtml" href="http://example.com">Not actually a link</A>

0 commit comments

Comments
 (0)