Skip to content

Call compute_locs once per rule #95669

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 6, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
267 changes: 135 additions & 132 deletions compiler/rustc_expand/src/mbe/macro_parser.rs
Original file line number Diff line number Diff line change
@@ -106,9 +106,9 @@ rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
///
/// This means a matcher can be represented by `&[MatcherLoc]`, and traversal mostly involves
/// simply incrementing the current matcher position index by one.
enum MatcherLoc<'tt> {
pub(super) enum MatcherLoc {
Token {
token: &'tt Token,
token: Token,
},
Delimited,
Sequence {
@@ -123,21 +123,93 @@ enum MatcherLoc<'tt> {
idx_first: usize,
},
SequenceSep {
separator: &'tt Token,
separator: Token,
},
SequenceKleeneOpAfterSep {
idx_first: usize,
},
MetaVarDecl {
span: Span,
bind: Ident,
kind: NonterminalKind,
kind: Option<NonterminalKind>,
next_metavar: usize,
seq_depth: usize,
},
Eof,
}

pub(super) fn compute_locs(sess: &ParseSess, matcher: &[TokenTree]) -> Vec<MatcherLoc> {
fn inner(
sess: &ParseSess,
tts: &[TokenTree],
locs: &mut Vec<MatcherLoc>,
next_metavar: &mut usize,
seq_depth: usize,
) {
for tt in tts {
match tt {
TokenTree::Token(token) => {
locs.push(MatcherLoc::Token { token: token.clone() });
}
TokenTree::Delimited(_, delimited) => {
locs.push(MatcherLoc::Delimited);
inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth);
}
TokenTree::Sequence(_, seq) => {
// We can't determine `idx_first_after` and construct the final
// `MatcherLoc::Sequence` until after `inner()` is called and the sequence end
// pieces are processed. So we push a dummy value (`Eof` is cheapest to
// construct) now, and overwrite it with the proper value below.
let dummy = MatcherLoc::Eof;
locs.push(dummy);

let next_metavar_orig = *next_metavar;
let op = seq.kleene.op;
let idx_first = locs.len();
let idx_seq = idx_first - 1;
inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1);

if let Some(separator) = &seq.separator {
locs.push(MatcherLoc::SequenceSep { separator: separator.clone() });
locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
} else {
locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
}

// Overwrite the dummy value pushed above with the proper value.
locs[idx_seq] = MatcherLoc::Sequence {
op,
num_metavar_decls: seq.num_captures,
idx_first_after: locs.len(),
next_metavar: next_metavar_orig,
seq_depth,
};
}
&TokenTree::MetaVarDecl(span, bind, kind) => {
locs.push(MatcherLoc::MetaVarDecl {
span,
bind,
kind,
next_metavar: *next_metavar,
seq_depth,
});
*next_metavar += 1;
}
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
}
}
}

let mut locs = vec![];
let mut next_metavar = 0;
inner(sess, matcher, &mut locs, &mut next_metavar, /* seq_depth */ 0);

// A final entry is needed for eof.
locs.push(MatcherLoc::Eof);

locs
}

/// A single matcher position, representing the state of matching.
struct MatcherPos {
/// The index into `TtParser::locs`, which represents the "dot".
@@ -298,12 +370,9 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool {

// Note: the vectors could be created and dropped within `parse_tt`, but to avoid excess
// allocations we have a single vector fo each kind that is cleared and reused repeatedly.
pub struct TtParser<'tt> {
pub struct TtParser {
macro_name: Ident,

/// The matcher of the current rule.
locs: Vec<MatcherLoc<'tt>>,

/// The set of current mps to be processed. This should be empty by the end of a successful
/// execution of `parse_tt_inner`.
cur_mps: Vec<MatcherPos>,
@@ -320,111 +389,17 @@ pub struct TtParser<'tt> {
empty_matches: Lrc<NamedMatchVec>,
}

impl<'tt> TtParser<'tt> {
pub(super) fn new(macro_name: Ident) -> TtParser<'tt> {
impl TtParser {
pub(super) fn new(macro_name: Ident) -> TtParser {
TtParser {
macro_name,
locs: vec![],
cur_mps: vec![],
next_mps: vec![],
bb_mps: vec![],
empty_matches: Lrc::new(smallvec![]),
}
}

/// Convert a `&[TokenTree]` to a `&[MatcherLoc]`. Note: this conversion happens every time the
/// macro is called, which may be many times if there are many call sites or if it is
/// recursive. This conversion is fairly cheap and the representation is sufficiently better
/// for matching than `&[TokenTree]` that it's a clear performance win even with the overhead.
/// But it might be possible to move the conversion outwards so it only occurs once per macro.
fn compute_locs(
&mut self,
sess: &ParseSess,
matcher: &'tt [TokenTree],
) -> Result<usize, (Span, String)> {
fn inner<'tt>(
sess: &ParseSess,
tts: &'tt [TokenTree],
locs: &mut Vec<MatcherLoc<'tt>>,
next_metavar: &mut usize,
seq_depth: usize,
) -> Result<(), (Span, String)> {
for tt in tts {
match tt {
TokenTree::Token(token) => {
locs.push(MatcherLoc::Token { token });
}
TokenTree::Delimited(_, delimited) => {
locs.push(MatcherLoc::Delimited);
inner(sess, &delimited.all_tts, locs, next_metavar, seq_depth)?;
}
TokenTree::Sequence(_, seq) => {
// We can't determine `idx_first_after` and construct the final
// `MatcherLoc::Sequence` until after `inner()` is called and the sequence
// end pieces are processed. So we push a dummy value (`Eof` is cheapest to
// construct) now, and overwrite it with the proper value below.
let dummy = MatcherLoc::Eof;
locs.push(dummy);

let next_metavar_orig = *next_metavar;
let op = seq.kleene.op;
let idx_first = locs.len();
let idx_seq = idx_first - 1;
inner(sess, &seq.tts, locs, next_metavar, seq_depth + 1)?;

if let Some(separator) = &seq.separator {
locs.push(MatcherLoc::SequenceSep { separator });
locs.push(MatcherLoc::SequenceKleeneOpAfterSep { idx_first });
} else {
locs.push(MatcherLoc::SequenceKleeneOpNoSep { op, idx_first });
}

// Overwrite the dummy value pushed above with the proper value.
locs[idx_seq] = MatcherLoc::Sequence {
op,
num_metavar_decls: seq.num_captures,
idx_first_after: locs.len(),
next_metavar: next_metavar_orig,
seq_depth,
};
}
&TokenTree::MetaVarDecl(span, bind, kind) => {
if let Some(kind) = kind {
locs.push(MatcherLoc::MetaVarDecl {
span,
bind,
kind,
next_metavar: *next_metavar,
seq_depth,
});
*next_metavar += 1;
} else if sess
.missing_fragment_specifiers
.borrow_mut()
.remove(&span)
.is_some()
{
// E.g. `$e` instead of `$e:expr`.
return Err((span, "missing fragment specifier".to_string()));
}
}
TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(),
}
}
Ok(())
}

self.locs.clear();
let mut next_metavar = 0;
inner(sess, matcher, &mut self.locs, &mut next_metavar, /* seq_depth */ 0)?;

// A final entry is needed for eof.
self.locs.push(MatcherLoc::Eof);

// This is the number of metavar decls.
Ok(next_metavar)
}

/// Process the matcher positions of `cur_mps` until it is empty. In the process, this will
/// produce more mps in `next_mps` and `bb_mps`.
///
@@ -434,15 +409,16 @@ impl<'tt> TtParser<'tt> {
/// track of through the mps generated.
fn parse_tt_inner(
&mut self,
num_metavar_decls: usize,
sess: &ParseSess,
matcher: &[MatcherLoc],
token: &Token,
) -> Option<NamedParseResult> {
// Matcher positions that would be valid if the macro invocation was over now. Only
// modified if `token == Eof`.
let mut eof_mps = EofMatcherPositions::None;

while let Some(mut mp) = self.cur_mps.pop() {
match &self.locs[mp.idx] {
match &matcher[mp.idx] {
MatcherLoc::Token { token: t } => {
// If it's a doc comment, we just ignore it and move on to the next tt in the
// matcher. This is a bug, but #95267 showed that existing programs rely on
@@ -532,17 +508,25 @@ impl<'tt> TtParser<'tt> {
mp.idx = idx_first;
self.cur_mps.push(mp);
}
MatcherLoc::MetaVarDecl { kind, .. } => {
&MatcherLoc::MetaVarDecl { span, kind, .. } => {
// Built-in nonterminals never start with these tokens, so we can eliminate
// them from consideration. We use the span of the metavariable declaration
// to determine any edition-specific matching behavior for non-terminals.
if Parser::nonterminal_may_begin_with(*kind, token) {
self.bb_mps.push(mp);
if let Some(kind) = kind {
if Parser::nonterminal_may_begin_with(kind, token) {
self.bb_mps.push(mp);
}
} else {
// Both this check and the one in `nameize` are necessary, surprisingly.
if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
// E.g. `$e` instead of `$e:expr`.
return Some(Error(span, "missing fragment specifier".to_string()));
}
}
}
MatcherLoc::Eof => {
// We are past the matcher's end, and not in a sequence. Try to end things.
debug_assert_eq!(mp.idx, self.locs.len() - 1);
debug_assert_eq!(mp.idx, matcher.len() - 1);
if *token == token::Eof {
eof_mps = match eof_mps {
EofMatcherPositions::None => EofMatcherPositions::One(mp),
@@ -560,11 +544,10 @@ impl<'tt> TtParser<'tt> {
if *token == token::Eof {
Some(match eof_mps {
EofMatcherPositions::One(mut eof_mp) => {
assert_eq!(eof_mp.matches.len(), num_metavar_decls);
// Need to take ownership of the matches from within the `Lrc`.
Lrc::make_mut(&mut eof_mp.matches);
let matches = Lrc::try_unwrap(eof_mp.matches).unwrap().into_iter();
self.nameize(matches)
self.nameize(sess, matcher, matches)
}
EofMatcherPositions::Multiple => {
Error(token.span, "ambiguity: multiple successful parses".to_string())
@@ -586,13 +569,8 @@ impl<'tt> TtParser<'tt> {
pub(super) fn parse_tt(
&mut self,
parser: &mut Cow<'_, Parser<'_>>,
matcher: &'tt [TokenTree],
matcher: &[MatcherLoc],
) -> NamedParseResult {
let num_metavar_decls = match self.compute_locs(parser.sess, matcher) {
Ok(num_metavar_decls) => num_metavar_decls,
Err((span, msg)) => return Error(span, msg),
};

// A queue of possible matcher positions. We initialize it with the matcher position in
// which the "dot" is before the first token of the first token tree in `matcher`.
// `parse_tt_inner` then processes all of these possible matcher positions and produces
@@ -607,7 +585,7 @@ impl<'tt> TtParser<'tt> {

// Process `cur_mps` until either we have finished the input or we need to get some
// parsing from the black-box parser done.
if let Some(res) = self.parse_tt_inner(num_metavar_decls, &parser.token) {
if let Some(res) = self.parse_tt_inner(&parser.sess, matcher, &parser.token) {
return res;
}

@@ -635,9 +613,13 @@ impl<'tt> TtParser<'tt> {
(0, 1) => {
// We need to call the black-box parser to get some nonterminal.
let mut mp = self.bb_mps.pop().unwrap();
let loc = &self.locs[mp.idx];
let loc = &matcher[mp.idx];
if let &MatcherLoc::MetaVarDecl {
span, kind, next_metavar, seq_depth, ..
span,
kind: Some(kind),
next_metavar,
seq_depth,
..
} = loc
{
// We use the span of the metavariable declaration to determine any
@@ -669,20 +651,26 @@ impl<'tt> TtParser<'tt> {

(_, _) => {
// Too many possibilities!
return self.ambiguity_error(parser.token.span);
return self.ambiguity_error(matcher, parser.token.span);
}
}

assert!(!self.cur_mps.is_empty());
}
}

fn ambiguity_error(&self, token_span: rustc_span::Span) -> NamedParseResult {
fn ambiguity_error(
&self,
matcher: &[MatcherLoc],
token_span: rustc_span::Span,
) -> NamedParseResult {
let nts = self
.bb_mps
.iter()
.map(|mp| match &self.locs[mp.idx] {
MatcherLoc::MetaVarDecl { bind, kind, .. } => format!("{} ('{}')", kind, bind),
.map(|mp| match &matcher[mp.idx] {
MatcherLoc::MetaVarDecl { bind, kind: Some(kind), .. } => {
format!("{} ('{}')", kind, bind)
}
_ => unreachable!(),
})
.collect::<Vec<String>>()
@@ -702,16 +690,31 @@ impl<'tt> TtParser<'tt> {
)
}

fn nameize<I: Iterator<Item = NamedMatch>>(&self, mut res: I) -> NamedParseResult {
fn nameize<I: Iterator<Item = NamedMatch>>(
&self,
sess: &ParseSess,
matcher: &[MatcherLoc],
mut res: I,
) -> NamedParseResult {
// Make that each metavar has _exactly one_ binding. If so, insert the binding into the
// `NamedParseResult`. Otherwise, it's an error.
let mut ret_val = FxHashMap::default();
for loc in self.locs.iter() {
if let &MatcherLoc::MetaVarDecl { span, bind, .. } = loc {
match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {
Vacant(spot) => spot.insert(res.next().unwrap()),
Occupied(..) => return Error(span, format!("duplicated bind name: {}", bind)),
};
for loc in matcher {
if let &MatcherLoc::MetaVarDecl { span, bind, kind, .. } = loc {
if kind.is_some() {
match ret_val.entry(MacroRulesNormalizedIdent::new(bind)) {
Vacant(spot) => spot.insert(res.next().unwrap()),
Occupied(..) => {
return Error(span, format!("duplicated bind name: {}", bind));
}
};
} else {
// Both this check and the one in `parse_tt_inner` are necessary, surprisingly.
if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() {
// E.g. `$e` instead of `$e:expr`.
return Error(span, "missing fragment specifier".to_string());
}
}
}
}
Success(ret_val)
51 changes: 34 additions & 17 deletions compiler/rustc_expand/src/mbe/macro_rules.rs
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstF
use crate::mbe;
use crate::mbe::macro_check;
use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser};
use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree};
use crate::mbe::macro_parser::{MatchedSeq, MatchedTokenTree, MatcherLoc};
use crate::mbe::transcribe::transcribe;

use rustc_ast as ast;
@@ -159,7 +159,7 @@ struct MacroRulesMacroExpander {
name: Ident,
span: Span,
transparency: Transparency,
lhses: Vec<mbe::TokenTree>,
lhses: Vec<Vec<MatcherLoc>>,
rhses: Vec<mbe::TokenTree>,
valid: bool,
is_local: bool,
@@ -210,7 +210,7 @@ fn generic_extension<'cx, 'tt>(
name: Ident,
transparency: Transparency,
arg: TokenStream,
lhses: &'tt [mbe::TokenTree],
lhses: &'tt [Vec<MatcherLoc>],
rhses: &'tt [mbe::TokenTree],
is_local: bool,
) -> Box<dyn MacResult + 'cx> {
@@ -245,14 +245,6 @@ fn generic_extension<'cx, 'tt>(
// this situation.)
let parser = parser_from_cx(sess, arg.clone());

// A matcher is always delimited, but the delimiters are ignored.
let delimited_inner_tts = |tt: &'tt mbe::TokenTree| -> &'tt [mbe::TokenTree] {
match tt {
mbe::TokenTree::Delimited(_, delimited) => delimited.inner_tts(),
_ => cx.span_bug(sp, "malformed macro lhs"),
}
};

// Try each arm's matchers.
let mut tt_parser = TtParser::new(name);
for (i, lhs) in lhses.iter().enumerate() {
@@ -262,13 +254,19 @@ fn generic_extension<'cx, 'tt>(
// are not recorded. On the first `Success(..)`ful matcher, the spans are merged.
let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut());

match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), delimited_inner_tts(lhs)) {
match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
Success(named_matches) => {
// The matcher was `Success(..)`ful.
// Merge the gated spans from parsing the matcher with the pre-existing ones.
sess.gated_spans.merge(gated_spans_snapshot);

let rhs = delimited_inner_tts(&rhses[i]).to_vec().clone();
// Ignore the delimiters on the RHS.
let rhs = match &rhses[i] {
mbe::TokenTree::Delimited(_, delimited) => {
delimited.inner_tts().to_vec().clone()
}
_ => cx.span_bug(sp, "malformed macro rhs"),
};
let arm_span = rhses[i].span();

let rhs_spans = rhs.iter().map(|t| t.span()).collect::<Vec<_>>();
@@ -346,10 +344,8 @@ fn generic_extension<'cx, 'tt>(
// Check whether there's a missing comma in this macro call, like `println!("{}" a);`
if let Some((arg, comma_span)) = arg.add_comma() {
for lhs in lhses {
if let Success(_) = tt_parser.parse_tt(
&mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())),
delimited_inner_tts(lhs),
) {
let parser = parser_from_cx(sess, arg.clone());
if let Success(_) = tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs) {
if comma_span.is_dummy() {
err.note("you might be missing a comma");
} else {
@@ -440,6 +436,8 @@ pub fn compile_declarative_macro(
}),
),
];
// Convert it into `MatcherLoc` form.
let argument_gram = mbe::macro_parser::compute_locs(&sess.parse_sess, &argument_gram);

let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS);
let mut tt_parser = TtParser::new(def.ident);
@@ -536,6 +534,25 @@ pub fn compile_declarative_macro(
None => {}
}

// Convert the lhses into `MatcherLoc` form, which is better for doing the
// actual matching. Unless the matcher is invalid.
let lhses = if valid {
lhses
.iter()
.map(|lhs| {
// Ignore the delimiters around the matcher.
match lhs {
mbe::TokenTree::Delimited(_, delimited) => {
mbe::macro_parser::compute_locs(&sess.parse_sess, delimited.inner_tts())
}
_ => sess.parse_sess.span_diagnostic.span_bug(def.span, "malformed macro lhs"),
}
})
.collect()
} else {
vec![]
};

mk_syn_ext(Box::new(MacroRulesMacroExpander {
name: def.ident,
span: def.span,