Skip to content

Commit 3570070

Browse files
committed
Parse optionals and repeats without regexes
Rather than parsing optionals and repeats fully in the recursive descent style, we were using regular expressions to do part of the matching and parsing. That's fine for what it is, but as we think about extending the grammar language surrounding repeats further, it might be more straightforward for this to be parsed in the more usual way. So let's do that. Doing this also results in better and more targeted errors when parsing malformed syntax. We had been supporting a space between an expression and the optional and repeat sigils `?`, `*`, and `+` (but not between an expression and the `{a..b}` ranged repeat syntax). In making this change, we drop this support and adjust the affected productions. We were only using this in a handful of places, and the clarity of the productions seem the same or better by removing these spaces. We verified that, setting aside the removal of these spaces, the rendered output of the Reference is byte identical before and after this change.
1 parent 048d75a commit 3570070

File tree

3 files changed

+57
-28
lines changed

3 files changed

+57
-28
lines changed

mdbook-spec/src/grammar/parser.rs

Lines changed: 54 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ impl Parser<'_> {
214214
return Ok(None);
215215
};
216216

217-
let mut kind = if self.take_str("U+") {
217+
let kind = if self.take_str("U+") {
218218
self.parse_unicode()?
219219
} else if self.input[self.index..]
220220
.chars()
@@ -246,30 +246,13 @@ impl Parser<'_> {
246246
} else {
247247
return Ok(None);
248248
};
249-
250-
static REPEAT_RE: LazyLock<Regex> =
251-
LazyLock::new(|| Regex::new(r"^ ?(\*\?|\+\?|\?|\*|\+)").unwrap());
252-
static RANGE_RE: LazyLock<Regex> =
253-
LazyLock::new(|| Regex::new(r"^\{([0-9]+)?\.\.([0-9]+)?\}").unwrap());
254-
if let Some(cap) = self.take_re(&REPEAT_RE) {
255-
kind = match &cap[1] {
256-
"?" => ExpressionKind::Optional(box_kind(kind)),
257-
"*" => ExpressionKind::Repeat(box_kind(kind)),
258-
"*?" => ExpressionKind::RepeatNonGreedy(box_kind(kind)),
259-
"+" => ExpressionKind::RepeatPlus(box_kind(kind)),
260-
"+?" => ExpressionKind::RepeatPlusNonGreedy(box_kind(kind)),
261-
s => panic!("unexpected `{s}`"),
262-
};
263-
} else if let Some(cap) = self.take_re(&RANGE_RE) {
264-
let a = cap.get(1).map(|m| m.as_str().parse::<u32>().unwrap());
265-
let b = cap.get(2).map(|m| m.as_str().parse::<u32>().unwrap());
266-
match (a, b) {
267-
(Some(a), Some(b)) if b < a => bail!(self, "range {a}..{b} is malformed"),
268-
_ => {}
269-
}
270-
kind = ExpressionKind::RepeatRange(box_kind(kind), a, b);
271-
}
272-
249+
let kind = match self.peek() {
250+
Some(b'?') => self.parse_optional(kind)?,
251+
Some(b'*') => self.parse_repeat(kind)?,
252+
Some(b'+') => self.parse_repeat_plus(kind)?,
253+
Some(b'{') => self.parse_repeat_range(kind)?,
254+
_ => kind,
255+
};
273256
let suffix = self.parse_suffix()?;
274257
let footnote = self.parse_footnote()?;
275258

@@ -370,6 +353,52 @@ impl Parser<'_> {
370353
}
371354
}
372355

356+
/// Parse `?` after expression.
357+
fn parse_optional(&mut self, kind: ExpressionKind) -> Result<ExpressionKind> {
358+
self.expect("?", "expected `?`")?;
359+
Ok(ExpressionKind::Optional(box_kind(kind)))
360+
}
361+
362+
/// Parse `*` | `*?` after expression.
363+
fn parse_repeat(&mut self, kind: ExpressionKind) -> Result<ExpressionKind> {
364+
self.expect("*", "expected `*`")?;
365+
Ok(if self.take_str("?") {
366+
ExpressionKind::RepeatNonGreedy(box_kind(kind))
367+
} else {
368+
ExpressionKind::Repeat(box_kind(kind))
369+
})
370+
}
371+
372+
/// Parse `+` | `+?` after expression.
373+
fn parse_repeat_plus(&mut self, kind: ExpressionKind) -> Result<ExpressionKind> {
374+
self.expect("+", "expected `+`")?;
375+
Ok(if self.take_str("?") {
376+
ExpressionKind::RepeatPlusNonGreedy(box_kind(kind))
377+
} else {
378+
ExpressionKind::RepeatPlus(box_kind(kind))
379+
})
380+
}
381+
382+
/// Parse `{a..}` | `{..b}` | `{a..b}` after expression.
383+
fn parse_repeat_range(&mut self, kind: ExpressionKind) -> Result<ExpressionKind> {
384+
self.expect("{", "expected `{`")?;
385+
let a = self.take_while(&|x| x.is_ascii_digit());
386+
let Ok(a) = (!a.is_empty()).then(|| a.parse::<u32>()).transpose() else {
387+
bail!(self, "malformed range start");
388+
};
389+
self.expect("..", "expected `..`")?;
390+
let b = self.take_while(&|x| x.is_ascii_digit());
391+
let Ok(b) = (!b.is_empty()).then(|| b.parse::<u32>()).transpose() else {
392+
bail!(self, "malformed range end");
393+
};
394+
match (a, b) {
395+
(Some(a), Some(b)) if b < a => bail!(self, "range {a}..{b} is malformed"),
396+
_ => {}
397+
}
398+
self.expect("}", "expected `}`")?;
399+
Ok(ExpressionKind::RepeatRange(box_kind(kind), a, b))
400+
}
401+
373402
fn parse_suffix(&mut self) -> Result<Option<String>> {
374403
if !self.take_str(" _") {
375404
return Ok(None);

src/items/generics.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ r[items.generics.where]
232232

233233
r[items.generics.where.syntax]
234234
```grammar,items
235-
WhereClause -> `where` ( WhereClauseItem `,` )* WhereClauseItem ?
235+
WhereClause -> `where` ( WhereClauseItem `,` )* WhereClauseItem?
236236
237237
WhereClauseItem ->
238238
LifetimeWhereClauseItem

src/patterns.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ r[patterns.ident]
181181

182182
r[patterns.ident.syntax]
183183
```grammar,patterns
184-
IdentifierPattern -> `ref`? `mut`? IDENTIFIER (`@` PatternNoTopAlt ) ?
184+
IdentifierPattern -> `ref`? `mut`? IDENTIFIER ( `@` PatternNoTopAlt )?
185185
```
186186

187187
r[patterns.ident.intro]
@@ -704,7 +704,7 @@ r[patterns.struct.syntax]
704704
```grammar,patterns
705705
StructPattern ->
706706
PathInExpression `{`
707-
StructPatternElements ?
707+
StructPatternElements?
708708
`}`
709709
710710
StructPatternElements ->

0 commit comments

Comments
 (0)