Skip to content

Commit 485c128

Browse files
committed
fix(embedded): Match rustc's whitespace definition
1 parent 30c8da1 commit 485c128

File tree

1 file changed

+37
-4
lines changed

1 file changed

+37
-4
lines changed

src/cargo/util/toml/embedded.rs

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,7 @@ impl<'s> ScriptSource<'s> {
8181
let mut rest = source.content;
8282

8383
// Whitespace may precede a frontmatter but must end with a newline
84-
const WHITESPACE: [char; 4] = [' ', '\t', '\r', '\n'];
85-
let trimmed = rest.trim_start_matches(WHITESPACE);
84+
let trimmed = rest.trim_start_matches(is_whitespace);
8685
if trimmed.len() != rest.len() {
8786
let trimmed_len = rest.len() - trimmed.len();
8887
let last_trimmed_index = trimmed_len - 1;
@@ -116,7 +115,7 @@ impl<'s> ScriptSource<'s> {
116115
anyhow::bail!("no closing `{fence_pattern}` found for frontmatter");
117116
};
118117
let (info, rest) = rest.split_at(info_end_index);
119-
let info = info.trim_matches(WHITESPACE);
118+
let info = info.trim_matches(is_whitespace);
120119
if !info.is_empty() {
121120
source.info = Some(info);
122121
}
@@ -134,7 +133,7 @@ impl<'s> ScriptSource<'s> {
134133
let rest = &rest[frontmatter_nl + nl_fence_pattern.len()..];
135134

136135
let (after_closing_fence, rest) = rest.split_once("\n").unwrap_or((rest, ""));
137-
let after_closing_fence = after_closing_fence.trim_matches(WHITESPACE);
136+
let after_closing_fence = after_closing_fence.trim_matches(is_whitespace);
138137
if !after_closing_fence.is_empty() {
139138
// extra characters beyond the original fence pattern, even if they are extra `-`
140139
anyhow::bail!("trailing characters found after frontmatter close");
@@ -188,6 +187,40 @@ fn strip_shebang(input: &str) -> Option<usize> {
188187
None
189188
}
190189

190+
/// True if `c` is considered a whitespace according to Rust language definition.
191+
/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
192+
/// for definitions of these classes.
193+
///
194+
/// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs `is_whitespace`
195+
fn is_whitespace(c: char) -> bool {
196+
// This is Pattern_White_Space.
197+
//
198+
// Note that this set is stable (ie, it doesn't change with different
199+
// Unicode versions), so it's ok to just hard-code the values.
200+
201+
matches!(
202+
c,
203+
// Usual ASCII suspects
204+
'\u{0009}' // \t
205+
| '\u{000A}' // \n
206+
| '\u{000B}' // vertical tab
207+
| '\u{000C}' // form feed
208+
| '\u{000D}' // \r
209+
| '\u{0020}' // space
210+
211+
// NEXT LINE from latin1
212+
| '\u{0085}'
213+
214+
// Bidi markers
215+
| '\u{200E}' // LEFT-TO-RIGHT MARK
216+
| '\u{200F}' // RIGHT-TO-LEFT MARK
217+
218+
// Dedicated whitespace characters from Unicode
219+
| '\u{2028}' // LINE SEPARATOR
220+
| '\u{2029}' // PARAGRAPH SEPARATOR
221+
)
222+
}
223+
191224
#[cfg(test)]
192225
mod test_expand {
193226
use snapbox::assert_data_eq;

0 commit comments

Comments
 (0)