@@ -81,8 +81,7 @@ impl<'s> ScriptSource<'s> {
81
81
let mut rest = source. content ;
82
82
83
83
// Whitespace may precede a frontmatter but must end with a newline
84
- const WHITESPACE : [ char ; 4 ] = [ ' ' , '\t' , '\r' , '\n' ] ;
85
- let trimmed = rest. trim_start_matches ( WHITESPACE ) ;
84
+ let trimmed = rest. trim_start_matches ( is_whitespace) ;
86
85
if trimmed. len ( ) != rest. len ( ) {
87
86
let trimmed_len = rest. len ( ) - trimmed. len ( ) ;
88
87
let last_trimmed_index = trimmed_len - 1 ;
@@ -116,7 +115,7 @@ impl<'s> ScriptSource<'s> {
116
115
anyhow:: bail!( "no closing `{fence_pattern}` found for frontmatter" ) ;
117
116
} ;
118
117
let ( info, rest) = rest. split_at ( info_end_index) ;
119
- let info = info. trim_matches ( WHITESPACE ) ;
118
+ let info = info. trim_matches ( is_whitespace ) ;
120
119
if !info. is_empty ( ) {
121
120
source. info = Some ( info) ;
122
121
}
@@ -134,7 +133,7 @@ impl<'s> ScriptSource<'s> {
134
133
let rest = & rest[ frontmatter_nl + nl_fence_pattern. len ( ) ..] ;
135
134
136
135
let ( after_closing_fence, rest) = rest. split_once ( "\n " ) . unwrap_or ( ( rest, "" ) ) ;
137
- let after_closing_fence = after_closing_fence. trim_matches ( WHITESPACE ) ;
136
+ let after_closing_fence = after_closing_fence. trim_matches ( is_whitespace ) ;
138
137
if !after_closing_fence. is_empty ( ) {
139
138
// extra characters beyond the original fence pattern, even if they are extra `-`
140
139
anyhow:: bail!( "trailing characters found after frontmatter close" ) ;
@@ -188,6 +187,40 @@ fn strip_shebang(input: &str) -> Option<usize> {
188
187
None
189
188
}
190
189
190
+ /// True if `c` is considered a whitespace according to Rust language definition.
191
+ /// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
192
+ /// for definitions of these classes.
193
+ ///
194
+ /// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs `is_whitespace`
195
+ fn is_whitespace ( c : char ) -> bool {
196
+ // This is Pattern_White_Space.
197
+ //
198
+ // Note that this set is stable (ie, it doesn't change with different
199
+ // Unicode versions), so it's ok to just hard-code the values.
200
+
201
+ matches ! (
202
+ c,
203
+ // Usual ASCII suspects
204
+ '\u{0009}' // \t
205
+ | '\u{000A}' // \n
206
+ | '\u{000B}' // vertical tab
207
+ | '\u{000C}' // form feed
208
+ | '\u{000D}' // \r
209
+ | '\u{0020}' // space
210
+
211
+ // NEXT LINE from latin1
212
+ | '\u{0085}'
213
+
214
+ // Bidi markers
215
+ | '\u{200E}' // LEFT-TO-RIGHT MARK
216
+ | '\u{200F}' // RIGHT-TO-LEFT MARK
217
+
218
+ // Dedicated whitespace characters from Unicode
219
+ | '\u{2028}' // LINE SEPARATOR
220
+ | '\u{2029}' // PARAGRAPH SEPARATOR
221
+ )
222
+ }
223
+
191
224
#[ cfg( test) ]
192
225
mod test_expand {
193
226
use snapbox:: assert_data_eq;
0 commit comments