7
7
* and consumed by WP_MySQL_Parser during the parsing process.
8
8
*/
9
9
class WP_MySQL_Token extends WP_Parser_Token {
10
+ /**
11
+ * Whether the NO_BACKSLASH_ESCAPES SQL mode is enabled.
12
+ *
13
+ * @var bool
14
+ */
15
+ private $ sql_mode_no_backslash_escapes_enabled ;
16
+
17
+ /**
18
+ * Constructor.
19
+ *
20
+ * @param int $id Token type.
21
+ * @param int $start Byte offset in the input where the token begins.
22
+ * @param int $length Byte length of the token in the input.
23
+ * @param string $input Input bytes from which the token was parsed.
24
+ * @param bool $sql_mode_no_backslash_escapes_enabled Whether the NO_BACKSLASH_ESCAPES SQL mode is enabled.
25
+ */
26
+ public function __construct (
27
+ int $ id ,
28
+ int $ start ,
29
+ int $ length ,
30
+ string $ input ,
31
+ bool $ sql_mode_no_backslash_escapes_enabled
32
+ ) {
33
+ parent ::__construct ( $ id , $ start , $ length , $ input );
34
+ $ this ->sql_mode_no_backslash_escapes_enabled = $ sql_mode_no_backslash_escapes_enabled ;
35
+ }
36
+
10
37
/**
11
38
* Get the name of the token.
12
39
*
@@ -24,6 +51,123 @@ public function get_name(): string {
24
51
return $ name ;
25
52
}
26
53
54
+ /**
55
+ * Get the real unquoted value of the token.
56
+ *
57
+ * @return string The token value.
58
+ */
59
+ public function get_value (): string {
60
+ $ value = $ this ->get_bytes ();
61
+ if (
62
+ WP_MySQL_Lexer::SINGLE_QUOTED_TEXT === $ this ->id
63
+ || WP_MySQL_Lexer::DOUBLE_QUOTED_TEXT === $ this ->id
64
+ || WP_MySQL_Lexer::BACK_TICK_QUOTED_ID === $ this ->id
65
+ ) {
66
+ // Remove bounding quotes.
67
+ $ quote = $ value [0 ];
68
+ $ value = substr ( $ value , 1 , -1 );
69
+
70
+ /*
71
+ * When the NO_BACKSLASH_ESCAPES SQL mode is enabled, we only need to
72
+ * handle escaped bounding quotes, as the other characters preserve
73
+ * their literal values.
74
+ */
75
+ if ( $ this ->sql_mode_no_backslash_escapes_enabled ) {
76
+ return str_replace ( $ quote . $ quote , $ quote , $ value );
77
+ }
78
+
79
+ /**
80
+ * Unescape MySQL escape sequences.
81
+ *
82
+ * MySQL string literals use backslash as an escape character, and
83
+ * the string bounding quotes can also be escaped by being doubled.
84
+ *
85
+ * The escaping is done according to the following rules:
86
+ *
87
+ * 1. Some special character escape sequences are recognized.
88
+ * For example, "\n" is a newline character, "\0" is ASCII NULL.
89
+ * 2. A specific treatment is applied to "\%" and "\_" sequences.
90
+ * This is due to their special meaning for pattern matching.
91
+ * 3. Other backslash-prefixed characters resolve to their literal
92
+ * values. For example, "\x" represents "x", "\\" represents "\".
93
+ *
94
+ * Despite looking similar, these rules are different from the C-style
95
+ * string escaping, so we cannot use "strip(c)slashes()" in this case.
96
+ *
97
+ * See: https://dev.mysql.com/doc/refman/8.4/en/string-literals.html
98
+ */
99
+ $ backslash = chr ( 92 );
100
+ $ replacements = array (
101
+ /*
102
+ * MySQL special character escape sequences.
103
+ */
104
+ ( $ backslash . '0 ' ) => chr ( 0 ), // An ASCII NULL character (\0).
105
+ ( $ backslash . "' " ) => chr ( 39 ), // A single quote character (').
106
+ ( $ backslash . '" ' ) => chr ( 34 ), // A double quote character (").
107
+ ( $ backslash . 'b ' ) => chr ( 8 ), // A backspace character.
108
+ ( $ backslash . 'n ' ) => chr ( 10 ), // A newline (linefeed) character (\n).
109
+ ( $ backslash . 'r ' ) => chr ( 13 ), // A carriage return character (\r).
110
+ ( $ backslash . 't ' ) => chr ( 9 ), // A tab character (\t).
111
+ ( $ backslash . 'Z ' ) => chr ( 26 ), // An ASCII 26 (Control+Z) character.
112
+
113
+ /*
114
+ * Normalize escaping of "%" and "_" characters.
115
+ *
116
+ * MySQL has unusual handling for "\%" and "\_" in all string literals.
117
+ * While other sequences follow the C-style escaping ("\?" is "?", etc.),
118
+ * "\%" resolves to "\%" and "\_" resolves to "\_" (unlike in C strings).
119
+ *
120
+ * This means that "\%" behaves like "\\%", and "\_" behaves like "\\_".
121
+ * To preserve this behavior, we need to add a second backslash here.
122
+ *
123
+ * From https://dev.mysql.com/doc/refman/8.4/en/string-literals.html:
124
+ * > The \% and \_ sequences are used to search for literal instances
125
+ * > of % and _ in pattern-matching contexts where they would otherwise
126
+ * > be interpreted as wildcard characters. If you use \% or \_ outside
127
+ * > of pattern-matching contexts, they evaluate to the strings \% and
128
+ * > \_, not to % and _.
129
+ */
130
+ ( $ backslash . '% ' ) => $ backslash . $ backslash . '% ' ,
131
+ ( $ backslash . '_ ' ) => $ backslash . $ backslash . '_ ' ,
132
+
133
+ /*
134
+ * Preserve a double backslash as-is, so that the trailing backslash
135
+ * is not consumed as the beginning of an escape sequence like "\n".
136
+ *
137
+ * Resolving "\\" to "\" will be handled in the next step, where all
138
+ * other backslash-prefixed characters resolve to their literal values.
139
+ */
140
+ ( $ backslash . $ backslash )
141
+ => $ backslash . $ backslash ,
142
+
143
+ /*
144
+ * The bounding quotes can also be escaped by being doubled.
145
+ */
146
+ ( $ quote . $ quote ) => $ quote ,
147
+ );
148
+
149
+ /*
150
+ * Apply the replacements.
151
+ *
152
+ * It is important to use "strtr()" and not "str_replace()", because
153
+ * "str_replace()" applies replacements one after another, modifying
154
+ * intermediate changes rather than just the original string:
155
+ *
156
+ * - str_replace( [ 'a', 'b' ], [ 'b', 'c' ], 'ab' ); // 'cc' (bad)
157
+ * - strtr( 'ab', [ 'a' => 'b', 'b' => 'c' ] ); // 'bc' (good)
158
+ */
159
+ $ value = strtr ( $ value , $ replacements );
160
+
161
+ /*
162
+ * A backslash with any other character represents the character itself.
163
+ * That is, \x evaluates to x, \\ evaluates to \, and \🙂 evaluates to 🙂.
164
+ */
165
+ $ preg_quoted_backslash = preg_quote ( $ backslash );
166
+ $ value = preg_replace ( "/ $ preg_quoted_backslash(.)/u " , '$1 ' , $ value );
167
+ }
168
+ return $ value ;
169
+ }
170
+
27
171
/**
28
172
* Get the token representation as a string.
29
173
*
0 commit comments