77 * and consumed by WP_MySQL_Parser during the parsing process.
88 */
99class WP_MySQL_Token extends WP_Parser_Token {
10+ /**
11+ * Whether the NO_BACKSLASH_ESCAPES SQL mode is enabled.
12+ *
13+ * @var bool
14+ */
15+ private $ sql_mode_no_backslash_escapes_enabled ;
16+
17+ /**
18+ * Constructor.
19+ *
20+ * @param int $id Token type.
21+ * @param int $start Byte offset in the input where the token begins.
22+ * @param int $length Byte length of the token in the input.
23+ * @param string $input Input bytes from which the token was parsed.
24+ * @param bool $sql_mode_no_backslash_escapes_enabled Whether the NO_BACKSLASH_ESCAPES SQL mode is enabled.
25+ */
26+ public function __construct (
27+ int $ id ,
28+ int $ start ,
29+ int $ length ,
30+ string $ input ,
31+ bool $ sql_mode_no_backslash_escapes_enabled
32+ ) {
33+ parent ::__construct ( $ id , $ start , $ length , $ input );
34+ $ this ->sql_mode_no_backslash_escapes_enabled = $ sql_mode_no_backslash_escapes_enabled ;
35+ }
36+
1037 /**
1138 * Get the name of the token.
1239 *
@@ -24,6 +51,123 @@ public function get_name(): string {
2451 return $ name ;
2552 }
2653
54+ /**
55+ * Get the real unquoted value of the token.
56+ *
57+ * @return string The token value.
58+ */
59+ public function get_value (): string {
60+ $ value = $ this ->get_bytes ();
61+ if (
62+ WP_MySQL_Lexer::SINGLE_QUOTED_TEXT === $ this ->id
63+ || WP_MySQL_Lexer::DOUBLE_QUOTED_TEXT === $ this ->id
64+ || WP_MySQL_Lexer::BACK_TICK_QUOTED_ID === $ this ->id
65+ ) {
66+ // Remove bounding quotes.
67+ $ quote = $ value [0 ];
68+ $ value = substr ( $ value , 1 , -1 );
69+
70+ /*
71+ * When the NO_BACKSLASH_ESCAPES SQL mode is enabled, we only need to
72+ * handle escaped bounding quotes, as the other characters preserve
73+ * their literal values.
74+ */
75+ if ( $ this ->sql_mode_no_backslash_escapes_enabled ) {
76+ return str_replace ( $ quote . $ quote , $ quote , $ value );
77+ }
78+
79+ /**
80+ * Unescape MySQL escape sequences.
81+ *
82+ * MySQL string literals use backslash as an escape character, and
83+ * the string bounding quotes can also be escaped by being doubled.
84+ *
85+ * The escaping is done according to the following rules:
86+ *
87+ * 1. Some special character escape sequences are recognized.
88+ * For example, "\n" is a newline character, "\0" is ASCII NULL.
89+ * 2. A specific treatment is applied to "\%" and "\_" sequences.
90+ * This is due to their special meaning for pattern matching.
91+ * 3. Other backslash-prefixed characters resolve to their literal
92+ * values. For example, "\x" represents "x", "\\" represents "\".
93+ *
94+ * Despite looking similar, these rules are different from the C-style
95+ * string escaping, so we cannot use "strip(c)slashes()" in this case.
96+ *
97+ * See: https://dev.mysql.com/doc/refman/8.4/en/string-literals.html
98+ */
99+ $ backslash = chr ( 92 );
100+ $ replacements = array (
101+ /*
102+ * MySQL special character escape sequences.
103+ */
104+ ( $ backslash . '0 ' ) => chr ( 0 ), // An ASCII NULL character (\0).
105+ ( $ backslash . "' " ) => chr ( 39 ), // A single quote character (').
106+ ( $ backslash . '" ' ) => chr ( 34 ), // A double quote character (").
107+ ( $ backslash . 'b ' ) => chr ( 8 ), // A backspace character.
108+ ( $ backslash . 'n ' ) => chr ( 10 ), // A newline (linefeed) character (\n).
109+ ( $ backslash . 'r ' ) => chr ( 13 ), // A carriage return character (\r).
110+ ( $ backslash . 't ' ) => chr ( 9 ), // A tab character (\t).
111+ ( $ backslash . 'Z ' ) => chr ( 26 ), // An ASCII 26 (Control+Z) character.
112+
113+ /*
114+ * Normalize escaping of "%" and "_" characters.
115+ *
116+ * MySQL has unusual handling for "\%" and "\_" in all string literals.
117+ * While other sequences follow the C-style escaping ("\?" is "?", etc.),
118+ * "\%" resolves to "\%" and "\_" resolves to "\_" (unlike in C strings).
119+ *
120+ * This means that "\%" behaves like "\\%", and "\_" behaves like "\\_".
121+ * To preserve this behavior, we need to add a second backslash here.
122+ *
123+ * From https://dev.mysql.com/doc/refman/8.4/en/string-literals.html:
124+ * > The \% and \_ sequences are used to search for literal instances
125+ * > of % and _ in pattern-matching contexts where they would otherwise
126+ * > be interpreted as wildcard characters. If you use \% or \_ outside
127+ * > of pattern-matching contexts, they evaluate to the strings \% and
128+ * > \_, not to % and _.
129+ */
130+ ( $ backslash . '% ' ) => $ backslash . $ backslash . '% ' ,
131+ ( $ backslash . '_ ' ) => $ backslash . $ backslash . '_ ' ,
132+
133+ /*
134+ * Preserve a double backslash as-is, so that the trailing backslash
135+ * is not consumed as the beginning of an escape sequence like "\n".
136+ *
137+ * Resolving "\\" to "\" will be handled in the next step, where all
138+ * other backslash-prefixed characters resolve to their literal values.
139+ */
140+ ( $ backslash . $ backslash )
141+ => $ backslash . $ backslash ,
142+
143+ /*
144+ * The bounding quotes can also be escaped by being doubled.
145+ */
146+ ( $ quote . $ quote ) => $ quote ,
147+ );
148+
149+ /*
150+ * Apply the replacements.
151+ *
152+ * It is important to use "strtr()" and not "str_replace()", because
153+ * "str_replace()" applies replacements one after another, modifying
154+ * intermediate changes rather than just the original string:
155+ *
156+ * - str_replace( [ 'a', 'b' ], [ 'b', 'c' ], 'ab' ); // 'cc' (bad)
157+ * - strtr( 'ab', [ 'a' => 'b', 'b' => 'c' ] ); // 'bc' (good)
158+ */
159+ $ value = strtr ( $ value , $ replacements );
160+
161+ /*
162+ * A backslash with any other character represents the character itself.
163+ * That is, \x evaluates to x, \\ evaluates to \, and \🙂 evaluates to 🙂.
164+ */
165+ $ preg_quoted_backslash = preg_quote ( $ backslash );
166+ $ value = preg_replace ( "/ $ preg_quoted_backslash(.)/u " , '$1 ' , $ value );
167+ }
168+ return $ value ;
169+ }
170+
27171 /**
28172 * Get the token representation as a string.
29173 *
0 commit comments