Skip to content

Commit c1085a0

Browse files
[3.13] gh-135148: Correctly handle f/t strings with comments and debug expressions (#135198) (#136899)
Co-authored-by: Pablo Galindo Salgado <[email protected]>
1 parent f2f30c4 commit c1085a0

File tree

3 files changed

+83
-20
lines changed

3 files changed

+83
-20
lines changed

Lib/test/test_fstring.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1641,6 +1641,18 @@ def __repr__(self):
16411641
self.assertEqual(f"{1+2 = # my comment
16421642
}", '1+2 = \n 3')
16431643

1644+
self.assertEqual(f'{""" # booo
1645+
"""=}', '""" # booo\n """=\' # booo\\n \'')
1646+
1647+
self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
1648+
self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')
1649+
1650+
self.assertEqual(f'{ # some comment goes here
1651+
"""hello"""=}', ' \n """hello"""=\'hello\'')
1652+
self.assertEqual(f'{"""# this is not a comment
1653+
a""" # this is a comment
1654+
}', '# this is not a comment\n a')
1655+
16441656
# These next lines contains tabs. Backslash escapes don't
16451657
# work in f-strings.
16461658
# patchcheck doesn't like these tabs. So the only way to test
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fixed a bug where f-string debug expressions (using =) would incorrectly
2+
strip out parts of strings containing escaped quotes and # characters. Patch
3+
by Pablo Galindo.

Parser/lexer/lexer.c

Lines changed: 68 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -118,38 +118,88 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
118118
}
119119
PyObject *res = NULL;
120120

121-
// Check if there is a # character in the expression
121+
// Look for a # character outside of string literals
122122
int hash_detected = 0;
123+
int in_string = 0;
124+
char quote_char = 0;
125+
123126
for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
124-
if (tok_mode->last_expr_buffer[i] == '#') {
127+
char ch = tok_mode->last_expr_buffer[i];
128+
129+
// Skip escaped characters
130+
if (ch == '\\') {
131+
i++;
132+
continue;
133+
}
134+
135+
// Handle quotes
136+
if (ch == '"' || ch == '\'') {
137+
// The following if/else block works becase there is an off number
138+
// of quotes in STRING tokens and the lexer only ever reaches this
139+
// function with valid STRING tokens.
140+
// For example: """hello"""
141+
// First quote: in_string = 1
142+
// Second quote: in_string = 0
143+
// Third quote: in_string = 1
144+
if (!in_string) {
145+
in_string = 1;
146+
quote_char = ch;
147+
}
148+
else if (ch == quote_char) {
149+
in_string = 0;
150+
}
151+
continue;
152+
}
153+
154+
// Check for # outside strings
155+
if (ch == '#' && !in_string) {
125156
hash_detected = 1;
126157
break;
127158
}
128159
}
129-
160+
// If we found a # character in the expression, we need to handle comments
130161
if (hash_detected) {
131-
Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
132-
char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
162+
// Allocate buffer for processed result
163+
char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
133164
if (!result) {
134165
return -1;
135166
}
136167

137-
Py_ssize_t i = 0;
138-
Py_ssize_t j = 0;
168+
Py_ssize_t i = 0; // Input position
169+
Py_ssize_t j = 0; // Output position
170+
in_string = 0; // Whether we're in a string
171+
quote_char = 0; // Current string quote char
139172

140-
for (i = 0, j = 0; i < input_length; i++) {
141-
if (tok_mode->last_expr_buffer[i] == '#') {
142-
// Skip characters until newline or end of string
143-
while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
144-
if (tok_mode->last_expr_buffer[i] == '\n') {
145-
result[j++] = tok_mode->last_expr_buffer[i];
146-
break;
147-
}
173+
// Process each character
174+
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
175+
char ch = tok_mode->last_expr_buffer[i];
176+
177+
// Handle string quotes
178+
if (ch == '"' || ch == '\'') {
179+
// See comment above to understand this part
180+
if (!in_string) {
181+
in_string = 1;
182+
quote_char = ch;
183+
} else if (ch == quote_char) {
184+
in_string = 0;
185+
}
186+
result[j++] = ch;
187+
}
188+
// Skip comments
189+
else if (ch == '#' && !in_string) {
190+
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
191+
tok_mode->last_expr_buffer[i] != '\n') {
148192
i++;
149193
}
150-
} else {
151-
result[j++] = tok_mode->last_expr_buffer[i];
194+
if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
195+
result[j++] = '\n';
196+
}
197+
}
198+
// Copy other chars
199+
else {
200+
result[j++] = ch;
152201
}
202+
i++;
153203
}
154204

155205
result[j] = '\0'; // Null-terminate the result string
@@ -161,11 +211,9 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
161211
tok_mode->last_expr_size - tok_mode->last_expr_end,
162212
NULL
163213
);
164-
165214
}
166215

167-
168-
if (!res) {
216+
if (!res) {
169217
return -1;
170218
}
171219
token->metadata = res;

0 commit comments

Comments
 (0)