@@ -118,38 +118,88 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
118
118
}
119
119
PyObject * res = NULL ;
120
120
121
- // Check if there is a # character in the expression
121
+ // Look for a # character outside of string literals
122
122
int hash_detected = 0 ;
123
+ int in_string = 0 ;
124
+ char quote_char = 0 ;
125
+
123
126
for (Py_ssize_t i = 0 ; i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ; i ++ ) {
124
- if (tok_mode -> last_expr_buffer [i ] == '#' ) {
127
+ char ch = tok_mode -> last_expr_buffer [i ];
128
+
129
+ // Skip escaped characters
130
+ if (ch == '\\' ) {
131
+ i ++ ;
132
+ continue ;
133
+ }
134
+
135
+ // Handle quotes
136
+ if (ch == '"' || ch == '\'' ) {
137
+ // The following if/else block works becase there is an off number
138
+ // of quotes in STRING tokens and the lexer only ever reaches this
139
+ // function with valid STRING tokens.
140
+ // For example: """hello"""
141
+ // First quote: in_string = 1
142
+ // Second quote: in_string = 0
143
+ // Third quote: in_string = 1
144
+ if (!in_string ) {
145
+ in_string = 1 ;
146
+ quote_char = ch ;
147
+ }
148
+ else if (ch == quote_char ) {
149
+ in_string = 0 ;
150
+ }
151
+ continue ;
152
+ }
153
+
154
+ // Check for # outside strings
155
+ if (ch == '#' && !in_string ) {
125
156
hash_detected = 1 ;
126
157
break ;
127
158
}
128
159
}
129
-
160
+ // If we found a # character in the expression, we need to handle comments
130
161
if (hash_detected ) {
131
- Py_ssize_t input_length = tok_mode -> last_expr_size - tok_mode -> last_expr_end ;
132
- char * result = (char * )PyMem_Malloc ((input_length + 1 ) * sizeof (char ));
162
+ // Allocate buffer for processed result
163
+ char * result = (char * )PyMem_Malloc ((tok_mode -> last_expr_size - tok_mode -> last_expr_end + 1 ) * sizeof (char ));
133
164
if (!result ) {
134
165
return -1 ;
135
166
}
136
167
137
- Py_ssize_t i = 0 ;
138
- Py_ssize_t j = 0 ;
168
+ Py_ssize_t i = 0 ; // Input position
169
+ Py_ssize_t j = 0 ; // Output position
170
+ in_string = 0 ; // Whether we're in a string
171
+ quote_char = 0 ; // Current string quote char
139
172
140
- for (i = 0 , j = 0 ; i < input_length ; i ++ ) {
141
- if (tok_mode -> last_expr_buffer [i ] == '#' ) {
142
- // Skip characters until newline or end of string
143
- while (i < input_length && tok_mode -> last_expr_buffer [i ] != '\0' ) {
144
- if (tok_mode -> last_expr_buffer [i ] == '\n' ) {
145
- result [j ++ ] = tok_mode -> last_expr_buffer [i ];
146
- break ;
147
- }
173
+ // Process each character
174
+ while (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ) {
175
+ char ch = tok_mode -> last_expr_buffer [i ];
176
+
177
+ // Handle string quotes
178
+ if (ch == '"' || ch == '\'' ) {
179
+ // See comment above to understand this part
180
+ if (!in_string ) {
181
+ in_string = 1 ;
182
+ quote_char = ch ;
183
+ } else if (ch == quote_char ) {
184
+ in_string = 0 ;
185
+ }
186
+ result [j ++ ] = ch ;
187
+ }
188
+ // Skip comments
189
+ else if (ch == '#' && !in_string ) {
190
+ while (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end &&
191
+ tok_mode -> last_expr_buffer [i ] != '\n' ) {
148
192
i ++ ;
149
193
}
150
- } else {
151
- result [j ++ ] = tok_mode -> last_expr_buffer [i ];
194
+ if (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ) {
195
+ result [j ++ ] = '\n' ;
196
+ }
197
+ }
198
+ // Copy other chars
199
+ else {
200
+ result [j ++ ] = ch ;
152
201
}
202
+ i ++ ;
153
203
}
154
204
155
205
result [j ] = '\0' ; // Null-terminate the result string
@@ -161,11 +211,9 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
161
211
tok_mode -> last_expr_size - tok_mode -> last_expr_end ,
162
212
NULL
163
213
);
164
-
165
214
}
166
215
167
-
168
- if (!res ) {
216
+ if (!res ) {
169
217
return -1 ;
170
218
}
171
219
token -> metadata = res ;
0 commit comments