Skip to content

Commit 278d41c

Browse files
authored
Fix identifier (un)escaping (#47)
1 parent 82eed09 commit 278d41c

8 files changed

+772
-181
lines changed

tests/WP_SQLite_Driver_Tests.php

+399-12
Large diffs are not rendered by default.

tests/WP_SQLite_Driver_Translation_Tests.php

+60-60
Large diffs are not rendered by default.

wp-includes/mysql/class-wp-mysql-lexer.php

+18-5
Original file line numberDiff line numberDiff line change
@@ -2130,7 +2130,7 @@ class WP_MySQL_Lexer {
21302130
*
21312131
* @var int
21322132
*/
2133-
private $sql_modes;
2133+
private $sql_modes = 0;
21342134

21352135
/**
21362136
* How many bytes from the original SQL payload have been read and tokenized.
@@ -2181,16 +2181,28 @@ class WP_MySQL_Lexer {
21812181
/**
21822182
* @param string $sql The SQL payload to tokenize.
21832183
* @param int $mysql_version The version of the MySQL server that the SQL payload is intended for.
2184-
* @param int $sql_modes The SQL modes that should be considered active during tokenization.
2184+
* @param string[] $sql_modes The SQL modes that should be considered active during tokenization.
21852185
*/
21862186
public function __construct(
21872187
string $sql,
21882188
int $mysql_version = 80038,
2189-
int $sql_modes = 0
2189+
array $sql_modes = array()
21902190
) {
21912191
$this->sql = $sql;
21922192
$this->mysql_version = $mysql_version;
2193-
$this->sql_modes = $sql_modes;
2193+
2194+
foreach ( $sql_modes as $sql_mode ) {
2195+
$sql_mode = strtoupper( $sql_mode );
2196+
if ( 'HIGH_NOT_PRECEDENCE' === $sql_mode ) {
2197+
$this->sql_modes |= self::SQL_MODE_HIGH_NOT_PRECEDENCE;
2198+
} elseif ( 'PIPES_AS_CONCAT' === $sql_mode ) {
2199+
$this->sql_modes |= self::SQL_MODE_PIPES_AS_CONCAT;
2200+
} elseif ( 'IGNORE_SPACE' === $sql_mode ) {
2201+
$this->sql_modes |= self::SQL_MODE_IGNORE_SPACE;
2202+
} elseif ( 'NO_BACKSLASH_ESCAPES' === $sql_mode ) {
2203+
$this->sql_modes |= self::SQL_MODE_NO_BACKSLASH_ESCAPES;
2204+
}
2205+
}
21942206
}
21952207

21962208
/**
@@ -2251,7 +2263,8 @@ public function get_token(): ?WP_MySQL_Token {
22512263
$this->token_type,
22522264
$this->token_starts_at,
22532265
$this->bytes_already_read - $this->token_starts_at,
2254-
$this->sql
2266+
$this->sql,
2267+
$this->is_sql_mode_active( self::SQL_MODE_NO_BACKSLASH_ESCAPES )
22552268
);
22562269
}
22572270

wp-includes/mysql/class-wp-mysql-token.php

+144
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,33 @@
77
* and consumed by WP_MySQL_Parser during the parsing process.
88
*/
99
class WP_MySQL_Token extends WP_Parser_Token {
10+
/**
11+
* Whether the NO_BACKSLASH_ESCAPES SQL mode is enabled.
12+
*
13+
* @var bool
14+
*/
15+
private $sql_mode_no_backslash_escapes_enabled;
16+
17+
/**
18+
* Constructor.
19+
*
20+
* @param int $id Token type.
21+
* @param int $start Byte offset in the input where the token begins.
22+
* @param int $length Byte length of the token in the input.
23+
* @param string $input Input bytes from which the token was parsed.
24+
* @param bool $sql_mode_no_backslash_escapes_enabled Whether the NO_BACKSLASH_ESCAPES SQL mode is enabled.
25+
*/
26+
public function __construct(
27+
int $id,
28+
int $start,
29+
int $length,
30+
string $input,
31+
bool $sql_mode_no_backslash_escapes_enabled
32+
) {
33+
parent::__construct( $id, $start, $length, $input );
34+
$this->sql_mode_no_backslash_escapes_enabled = $sql_mode_no_backslash_escapes_enabled;
35+
}
36+
1037
/**
1138
* Get the name of the token.
1239
*
@@ -24,6 +51,123 @@ public function get_name(): string {
2451
return $name;
2552
}
2653

54+
/**
55+
* Get the real unquoted value of the token.
56+
*
57+
* @return string The token value.
58+
*/
59+
public function get_value(): string {
60+
$value = $this->get_bytes();
61+
if (
62+
WP_MySQL_Lexer::SINGLE_QUOTED_TEXT === $this->id
63+
|| WP_MySQL_Lexer::DOUBLE_QUOTED_TEXT === $this->id
64+
|| WP_MySQL_Lexer::BACK_TICK_QUOTED_ID === $this->id
65+
) {
66+
// Remove bounding quotes.
67+
$quote = $value[0];
68+
$value = substr( $value, 1, -1 );
69+
70+
/*
71+
* When the NO_BACKSLASH_ESCAPES SQL mode is enabled, we only need to
72+
* handle escaped bounding quotes, as the other characters preserve
73+
* their literal values.
74+
*/
75+
if ( $this->sql_mode_no_backslash_escapes_enabled ) {
76+
return str_replace( $quote . $quote, $quote, $value );
77+
}
78+
79+
/**
80+
* Unescape MySQL escape sequences.
81+
*
82+
* MySQL string literals use backslash as an escape character, and
83+
* the string bounding quotes can also be escaped by being doubled.
84+
*
85+
* The escaping is done according to the following rules:
86+
*
87+
* 1. Some special character escape sequences are recognized.
88+
* For example, "\n" is a newline character, "\0" is ASCII NULL.
89+
* 2. A specific treatment is applied to "\%" and "\_" sequences.
90+
* This is due to their special meaning for pattern matching.
91+
* 3. Other backslash-prefixed characters resolve to their literal
92+
* values. For example, "\x" represents "x", "\\" represents "\".
93+
*
94+
* Despite looking similar, these rules are different from the C-style
95+
* string escaping, so we cannot use "strip(c)slashes()" in this case.
96+
*
97+
* See: https://dev.mysql.com/doc/refman/8.4/en/string-literals.html
98+
*/
99+
$backslash = chr( 92 );
100+
$replacements = array(
101+
/*
102+
* MySQL special character escape sequences.
103+
*/
104+
( $backslash . '0' ) => chr( 0 ), // An ASCII NULL character (\0).
105+
( $backslash . "'" ) => chr( 39 ), // A single quote character (').
106+
( $backslash . '"' ) => chr( 34 ), // A double quote character (").
107+
( $backslash . 'b' ) => chr( 8 ), // A backspace character.
108+
( $backslash . 'n' ) => chr( 10 ), // A newline (linefeed) character (\n).
109+
( $backslash . 'r' ) => chr( 13 ), // A carriage return character (\r).
110+
( $backslash . 't' ) => chr( 9 ), // A tab character (\t).
111+
( $backslash . 'Z' ) => chr( 26 ), // An ASCII 26 (Control+Z) character.
112+
113+
/*
114+
* Normalize escaping of "%" and "_" characters.
115+
*
116+
* MySQL has unusual handling for "\%" and "\_" in all string literals.
117+
* While other sequences follow the C-style escaping ("\?" is "?", etc.),
118+
* "\%" resolves to "\%" and "\_" resolves to "\_" (unlike in C strings).
119+
*
120+
* This means that "\%" behaves like "\\%", and "\_" behaves like "\\_".
121+
* To preserve this behavior, we need to add a second backslash here.
122+
*
123+
* From https://dev.mysql.com/doc/refman/8.4/en/string-literals.html:
124+
* > The \% and \_ sequences are used to search for literal instances
125+
* > of % and _ in pattern-matching contexts where they would otherwise
126+
* > be interpreted as wildcard characters. If you use \% or \_ outside
127+
* > of pattern-matching contexts, they evaluate to the strings \% and
128+
* > \_, not to % and _.
129+
*/
130+
( $backslash . '%' ) => $backslash . $backslash . '%',
131+
( $backslash . '_' ) => $backslash . $backslash . '_',
132+
133+
/*
134+
* Preserve a double backslash as-is, so that the trailing backslash
135+
* is not consumed as the beginning of an escape sequence like "\n".
136+
*
137+
* Resolving "\\" to "\" will be handled in the next step, where all
138+
* other backslash-prefixed characters resolve to their literal values.
139+
*/
140+
( $backslash . $backslash )
141+
=> $backslash . $backslash,
142+
143+
/*
144+
* The bounding quotes can also be escaped by being doubled.
145+
*/
146+
( $quote . $quote ) => $quote,
147+
);
148+
149+
/*
150+
* Apply the replacements.
151+
*
152+
* It is important to use "strtr()" and not "str_replace()", because
153+
* "str_replace()" applies replacements one after another, modifying
154+
* intermediate changes rather than just the original string:
155+
*
156+
* - str_replace( [ 'a', 'b' ], [ 'b', 'c' ], 'ab' ); // 'cc' (bad)
157+
* - strtr( 'ab', [ 'a' => 'b', 'b' => 'c' ] ); // 'bc' (good)
158+
*/
159+
$value = strtr( $value, $replacements );
160+
161+
/*
162+
* A backslash with any other character represents the character itself.
163+
* That is, \x evaluates to x, \\ evaluates to \, and \🙂 evaluates to 🙂.
164+
*/
165+
$preg_quoted_backslash = preg_quote( $backslash );
166+
$value = preg_replace( "/$preg_quoted_backslash(.)/u", '$1', $value );
167+
}
168+
return $value;
169+
}
170+
27171
/**
28172
* Get the token representation as a string.
29173
*

wp-includes/parser/class-wp-parser-token.php

+11-2
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,20 @@ public function __construct(
5858
}
5959

6060
/**
61-
* Get the token value as raw bytes from the input.
61+
* Get the raw bytes of the token from the input.
62+
*
63+
* @return string The token bytes.
64+
*/
65+
public function get_bytes(): string {
66+
return substr( $this->input, $this->start, $this->length );
67+
}
68+
69+
/**
70+
* Get the real unquoted value of the token.
6271
*
6372
* @return string The token value.
6473
*/
6574
public function get_value(): string {
66-
return substr( $this->input, $this->start, $this->length );
75+
return $this->get_bytes();
6776
}
6877
}

0 commit comments

Comments
 (0)