From 0c004cc6b7062b3d1645173e53977279534cafca Mon Sep 17 00:00:00 2001 From: Pierre-Emmanuel Patry Date: Mon, 17 Nov 2025 15:01:14 +0100 Subject: [PATCH] Use tl::expected in the parser to avoid error state We made heavy use of error state within some AST node and it was the source of multiple errors, and caused confusion with (null) pointers. This commit removes some error state use within our parser in an attempt to remove those error states later. gcc/rust/ChangeLog: * parse/rust-parse-impl.h (Parser::parse_inner_attributes): Change return type to avoid empty/error values that may break invariants in the AST. (Parser::parse_inner_attribute): Likewise. (Parser::parse_outer_attribute): Likewise. (Parser::parse_outer_attributes): Likewise. (Parser::parse_attribute_body): Likewise. (Parser::parse_simple_path): Likewise. (Parser::parse_macro_invocation): Likewise. (Parser::parse_visibility): Likewise. (Parser::parse_use_tree): Likewise. (Parser::parse_delim_token_tree): Likewise. (Parser::parse_identifier_or_keyword_token): Likewise. (Parser::parse_token_tree): Likewise. (Parser::parse_macro_rules_def): Likewise. (Parser::parse_decl_macro_def): Likewise. (Parser::parse_macro_invocation): Likewise. (Parser::parse_macro_rule): Likewise. (Parser::parse_macro_matcher): Likewise. (Parser::parse_type_path_segment): Likewise. (Parser::parse_path_expr_segment): Likewise. (Parser::parse_type): Likewise. (Parser::parse_type_no_bounds): Likewise. (is_simple_path_segment): Move to utility file. (token_id_matches_delims): Likewise. (is_likely_path_next): Remove unused function. (Parser::parse_attr_input): Return a structure instead of a tuple. * expand/rust-macro-builtins-offset-of.cc: Adapt call to expected. * ast/rust-ast.cc (AttributeParser::parse_path_meta_item): Use empty vector when an error is encountered. * expand/rust-macro-builtins-include.cc: Likewise. * parse/rust-parse.h: Update prototypes. * parse/rust-parse-impl-proc-macro.cc: Likewise. * ast/rust-ast.h: Remove error state from Visibility. * ast/rust-item.h: Use private visibility instead of error. * ast/rust-macro.h: Likewise. * expand/rust-macro-expand.cc: Likewise. * hir/rust-ast-lower.cc: Remove error case. * rust-session-manager.cc: Use private visibility * parse/rust-parse-utils.h: New file. * parse/rust-parse-error.h: New file. Signed-off-by: Pierre-Emmanuel Patry --- gcc/rust/ast/rust-ast.cc | 14 +- gcc/rust/ast/rust-ast.h | 23 +- gcc/rust/ast/rust-item.h | 16 +- gcc/rust/ast/rust-macro.h | 2 +- .../expand/rust-macro-builtins-include.cc | 3 +- .../expand/rust-macro-builtins-offset-of.cc | 4 +- gcc/rust/expand/rust-macro-expand.cc | 8 +- gcc/rust/hir/rust-ast-lower.cc | 4 +- gcc/rust/parse/rust-parse-error.h | 320 +++++++++++ gcc/rust/parse/rust-parse-impl-proc-macro.cc | 2 +- gcc/rust/parse/rust-parse-impl.h | 517 +++++++++--------- gcc/rust/parse/rust-parse-utils.h | 73 +++ gcc/rust/parse/rust-parse.h | 39 +- gcc/rust/rust-session-manager.cc | 2 +- 14 files changed, 715 insertions(+), 312 deletions(-) create mode 100644 gcc/rust/parse/rust-parse-error.h create mode 100644 gcc/rust/parse/rust-parse-utils.h diff --git a/gcc/rust/ast/rust-ast.cc b/gcc/rust/ast/rust-ast.cc index 851f7ea4b6f..f15cc3d3cba 100644 --- a/gcc/rust/ast/rust-ast.cc +++ b/gcc/rust/ast/rust-ast.cc @@ -3490,7 +3490,9 @@ Module::load_items () // we need to parse any possible inner attributes for this module inner_attrs = parser.parse_inner_attributes (); - auto parsed_items = parser.parse_items (); + auto parsed_items = parser.parse_items ().value_or ( + std::vector>{}); + for (const auto &error : parser.get_errors ()) error.emit (); @@ -3705,8 +3707,8 @@ AttributeParser::is_end_meta_item_tok (TokenId id) const std::unique_ptr AttributeParser::parse_path_meta_item () { - SimplePath path = parser->parse_simple_path (); - if (path.is_empty ()) + auto path = parser->parse_simple_path (); + if (!path) { rust_error_at (lexer->peek_token ()->get_locus (), "failed to parse simple path in attribute"); @@ -3721,7 +3723,7 @@ AttributeParser::parse_path_meta_item () = parse_meta_item_seq (); return std::unique_ptr ( - new MetaItemSeq (std::move (path), std::move (meta_items))); + new MetaItemSeq (std::move (path.value ()), std::move (meta_items))); } case EQUAL: { @@ -3735,12 +3737,12 @@ AttributeParser::parse_path_meta_item () return nullptr; return std::unique_ptr ( - new MetaItemPathExpr (std::move (path), std::move (expr))); + new MetaItemPathExpr (std::move (path.value ()), std::move (expr))); } case COMMA: // just simple path return std::unique_ptr ( - new MetaItemPath (std::move (path))); + new MetaItemPath (std::move (path.value ()))); default: rust_error_at (lexer->peek_token ()->get_locus (), "unrecognised token '%s' in meta item", diff --git a/gcc/rust/ast/rust-ast.h b/gcc/rust/ast/rust-ast.h index 8610ade830c..07ce5bf9fae 100644 --- a/gcc/rust/ast/rust-ast.h +++ b/gcc/rust/ast/rust-ast.h @@ -477,38 +477,21 @@ struct Visibility SimplePath in_path; location_t locus; - // should this store location info? - -public: - // Creates a Visibility - TODO make constructor protected or private? Visibility (VisType vis_type, SimplePath in_path, location_t locus) : vis_type (vis_type), in_path (std::move (in_path)), locus (locus) {} +public: VisType get_vis_type () const { return vis_type; } - // Returns whether visibility is in an error state. - bool is_error () const - { - return vis_type == PUB_IN_PATH && in_path.is_empty (); - } - // Returns whether a visibility has a path - bool has_path () const { return !is_error () && vis_type >= PUB_CRATE; } + bool has_path () const { return vis_type >= PUB_CRATE; } // Returns whether visibility is public or not. - bool is_public () const { return vis_type != PRIV && !is_error (); } + bool is_public () const { return vis_type != PRIV; } location_t get_locus () const { return locus; } - // empty? - // Creates an error visibility. - static Visibility create_error () - { - return Visibility (PUB_IN_PATH, SimplePath::create_empty (), - UNDEF_LOCATION); - } - // Unique pointer custom clone function /*std::unique_ptr clone_visibility() const { return std::unique_ptr(clone_visibility_impl()); diff --git a/gcc/rust/ast/rust-item.h b/gcc/rust/ast/rust-item.h index 3e3735c3ece..d04f7fad658 100644 --- a/gcc/rust/ast/rust-item.h +++ b/gcc/rust/ast/rust-item.h @@ -814,7 +814,7 @@ class Module : public VisItem // Loaded module constructor, with items Module (Identifier name, location_t locus, std::vector> items, - Visibility visibility = Visibility::create_error (), + Visibility visibility = Visibility::create_private (), Unsafety safety = Unsafety::Normal, std::vector inner_attrs = std::vector (), std::vector outer_attrs = std::vector ()) @@ -1743,7 +1743,7 @@ class StructField bool has_outer_attributes () const { return !outer_attrs.empty (); } // Returns whether struct field has a non-private (non-default) visibility. - bool has_visibility () const { return !visibility.is_error (); } + bool has_visibility () const { return true; } StructField (Identifier field_name, std::unique_ptr field_type, Visibility vis, location_t locus, @@ -1797,8 +1797,8 @@ class StructField // Creates an error state struct field. static StructField create_error () { - return StructField (std::string (""), nullptr, Visibility::create_error (), - UNDEF_LOCATION); + return StructField (std::string (""), nullptr, + Visibility::create_private (), UNDEF_LOCATION); } std::string as_string () const; @@ -1902,7 +1902,7 @@ class TupleField /* Returns whether tuple field has a non-default visibility (i.e. a public * one) */ - bool has_visibility () const { return !visibility.is_error (); } + bool has_visibility () const { return true; } // Complete constructor TupleField (std::unique_ptr field_type, Visibility vis, @@ -1952,7 +1952,7 @@ class TupleField // Creates an error state tuple field. static TupleField create_error () { - return TupleField (nullptr, Visibility::create_error (), UNDEF_LOCATION); + return TupleField (nullptr, Visibility::create_private (), UNDEF_LOCATION); } std::string as_string () const; @@ -3389,7 +3389,7 @@ class ExternalTypeItem : public ExternalItem bool has_outer_attrs () const { return !outer_attrs.empty (); } // Returns whether item has non-default visibility. - bool has_visibility () const { return !visibility.is_error (); } + bool has_visibility () const { return true; } location_t get_locus () const { return locus; } @@ -3481,7 +3481,7 @@ class ExternalStaticItem : public ExternalItem bool has_outer_attrs () const { return !outer_attrs.empty (); } // Returns whether item has non-default visibility. - bool has_visibility () const { return !visibility.is_error (); } + bool has_visibility () const { return true; } location_t get_locus () const { return locus; } diff --git a/gcc/rust/ast/rust-macro.h b/gcc/rust/ast/rust-macro.h index 416507501ac..2100336c71e 100644 --- a/gcc/rust/ast/rust-macro.h +++ b/gcc/rust/ast/rust-macro.h @@ -529,7 +529,7 @@ class MacroRulesDefinition : public VisItem return std::make_unique ( MacroRulesDefinition (rule_name, delim_type, rules, outer_attrs, locus, AST::MacroRulesDefinition::MacroKind::MBE, - AST::Visibility::create_error ())); + AST::Visibility::create_private ())); } static std::unique_ptr diff --git a/gcc/rust/expand/rust-macro-builtins-include.cc b/gcc/rust/expand/rust-macro-builtins-include.cc index 2ab2a3a1dbb..bae5c181fb9 100644 --- a/gcc/rust/expand/rust-macro-builtins-include.cc +++ b/gcc/rust/expand/rust-macro-builtins-include.cc @@ -248,7 +248,8 @@ MacroBuiltin::include_handler (location_t invoc_locus, std::vector> parsed_items{}; if (is_semicoloned) - parsed_items = parser.parse_items (); + parsed_items = parser.parse_items ().value_or ( + std::vector>{}); else parsed_expr = parser.parse_expr (); diff --git a/gcc/rust/expand/rust-macro-builtins-offset-of.cc b/gcc/rust/expand/rust-macro-builtins-offset-of.cc index 02c637bebd0..25bc290ed18 100644 --- a/gcc/rust/expand/rust-macro-builtins-offset-of.cc +++ b/gcc/rust/expand/rust-macro-builtins-offset-of.cc @@ -56,7 +56,7 @@ MacroBuiltin::offset_of_handler (location_t invoc_locus, parser.skip_token (COMMA); auto field_tok = parser.parse_identifier_or_keyword_token (); - auto invalid_field = !field_tok || !field_tok->should_have_str (); + auto invalid_field = !field_tok || !field_tok.value ()->should_have_str (); if (invalid_field) rust_error_at (invoc_locus, "could not parse field argument for %qs", @@ -65,7 +65,7 @@ MacroBuiltin::offset_of_handler (location_t invoc_locus, if (!type || invalid_field) return tl::nullopt; - auto field = Identifier (field_tok->get_str ()); + auto field = Identifier (field_tok.value ()->get_str ()); // FIXME: Do we need to do anything to handle the optional comma at the end? parser.maybe_skip_token (COMMA); diff --git a/gcc/rust/expand/rust-macro-expand.cc b/gcc/rust/expand/rust-macro-expand.cc index b47e43afd76..4504589040f 100644 --- a/gcc/rust/expand/rust-macro-expand.cc +++ b/gcc/rust/expand/rust-macro-expand.cc @@ -858,7 +858,9 @@ transcribe_many_items (Parser &parser, TokenId &delimiter) { return parse_many (parser, delimiter, [&parser] () { auto item = parser.parse_item (true); - return AST::SingleASTNode (std::move (item)); + if (!item) + return AST::SingleASTNode (std::unique_ptr (nullptr)); + return AST::SingleASTNode (std::move (item.value ())); }); } @@ -1191,9 +1193,9 @@ MacroExpander::parse_proc_macro_output (ProcMacro::TokenStream ts) while (lex.peek_token ()->get_id () != END_OF_FILE) { auto result = parser.parse_item (false); - if (result == nullptr) + if (!result) break; - nodes.emplace_back (std::move (result)); + nodes.emplace_back (std::move (result.value ())); } break; case ContextType::STMT: diff --git a/gcc/rust/hir/rust-ast-lower.cc b/gcc/rust/hir/rust-ast-lower.cc index 5b8e7c51437..758bc59f512 100644 --- a/gcc/rust/hir/rust-ast-lower.cc +++ b/gcc/rust/hir/rust-ast-lower.cc @@ -38,8 +38,6 @@ translate_visibility (const AST::Visibility &vis) // the AST vis is an error? // FIXME: We need to add a `create_private()` static function to the // AST::Visibility class and use it when the vis is empty in the parser... - if (vis.is_error ()) - return Visibility::create_error (); switch (vis.get_vis_type ()) { @@ -57,7 +55,7 @@ translate_visibility (const AST::Visibility &vis) break; } - return Visibility::create_error (); + rust_unreachable (); } ASTLowering::ASTLowering (AST::Crate &astCrate) : astCrate (astCrate) {} diff --git a/gcc/rust/parse/rust-parse-error.h b/gcc/rust/parse/rust-parse-error.h new file mode 100644 index 00000000000..8c0c893effb --- /dev/null +++ b/gcc/rust/parse/rust-parse-error.h @@ -0,0 +1,320 @@ +// Copyright (C) 2025 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +#ifndef RUST_PARSE_ERROR_H +#define RUST_PARSE_ERROR_H + +#include "expected.h" +#include "rust-ast.h" +#include "rust-parse-utils.h" + +namespace Rust { +namespace Parse { +namespace Error { + +struct Attribute +{ + static tl::expected make_malformed () + { + return tl::unexpected (Attribute (Kind::MALFORMED)); + } + + static tl::expected make_malformed_body () + { + return tl::unexpected (Attribute (Kind::MALFORMED_BODY)); + } + + static tl::expected make_unexpected_inner () + { + return tl::unexpected (Attribute (Kind::UNEXPECTED_INNER)); + } + + enum class Kind + { + MALFORMED, + MALFORMED_BODY, + UNEXPECTED_INNER, + } kind; + +private: + Attribute (Kind kind) : kind (kind) {} +}; + +struct SimplePath +{ + static tl::expected make_malformed () + { + return tl::unexpected (SimplePath (Kind::MALFORMED)); + } + + enum class Kind + { + MALFORMED, + } kind; + +private: + SimplePath (Kind kind) : kind (kind) {} +}; + +struct AttributeBody +{ + static tl::expected make_invalid_path () + { + return tl::unexpected (AttributeBody (Kind::INVALID_PATH)); + } + + static tl::expected + make_invalid_attrinput () + { + return tl::unexpected ( + AttributeBody (Kind::INVALID_ATTRINPUT)); + } + + enum class Kind + { + INVALID_PATH, + INVALID_ATTRINPUT, + } kind; + +private: + AttributeBody (Kind kind) : kind (kind) {} +}; + +struct SimplePathSegment +{ + static tl::expected + make_invalid_token_or_path_end () + { + return tl::unexpected ( + SimplePathSegment (Kind::INVALID_SIMPLE_PATH_TOKEN)); + } + + enum class Kind + { + /* Invalid token found whilst parsing a simple path segment, could be an + error or the end of the path */ + INVALID_SIMPLE_PATH_TOKEN, + } kind; + +private: + SimplePathSegment (Kind kind) : kind (kind) {} +}; + +struct PathIdentSegment +{ + static tl::expected + make_invalid_token () + { + return tl::unexpected ( + PathIdentSegment (Kind::INVALID_PATH_IDENT_TOKEN)); + } + + enum class Kind + { + INVALID_PATH_IDENT_TOKEN, + } kind; + +private: + PathIdentSegment (Kind kind) : kind (kind) {} +}; + +struct AttrInput +{ + static tl::expected, AttrInput> + make_malformed () + { + return tl::unexpected (AttrInput (Kind::MALFORMED)); + } + + static tl::expected, AttrInput> + make_bad_macro_invocation () + { + return tl::unexpected (AttrInput (Kind::BAD_MACRO_INVOCATION)); + } + + static tl::expected, AttrInput> + make_missing_attrinput () + { + return tl::unexpected (AttrInput (Kind::MISSING)); + } + + static tl::expected, AttrInput> + make_bad_token_tree () + { + return tl::unexpected (AttrInput (Kind::BAD_TOKEN_TREE)); + } + + enum class Kind + { + MALFORMED, + BAD_MACRO_INVOCATION, + BAD_TOKEN_TREE, + // Not an hard error in some context + MISSING, + } kind; + +private: + AttrInput (Kind kind) : kind (kind) {} +}; + +struct DelimTokenTree +{ + static tl::expected + make_expected_delimiter () + { + return tl::unexpected ( + DelimTokenTree (Kind::EXPECTED_DELIMITER)); + } + + static tl::expected + make_invalid_token_tree () + { + return tl::unexpected ( + DelimTokenTree (Kind::INVALID_TOKEN_TREE)); + } + + static tl::expected + make_mismatched_delimiters () + { + return tl::unexpected ( + DelimTokenTree (Kind::INVALID_TOKEN_TREE)); + } + + enum class Kind + { + EXPECTED_DELIMITER, + INVALID_TOKEN_TREE, + MISMATCHED_DELIMITERS, + } kind; + +private: + DelimTokenTree (Kind kind) : kind (kind) {} +}; + +struct Token +{ + static tl::expected, Token> make_malformed () + { + return tl::unexpected (Token (Kind::MALFORMED)); + } + + enum class Kind + { + MALFORMED, + } kind; + +private: + Token (Kind kind) : kind (kind) {} +}; + +struct TokenTree +{ + static tl::expected, TokenTree> + make_malformed () + { + return tl::unexpected (TokenTree (Kind::MALFORMED)); + } + + static tl::expected, TokenTree> + make_malformed_delimited_token_tree () + { + return tl::unexpected ( + TokenTree (Kind::MALFORMED_DELIMITED_TOKEN_TREE)); + } + + enum class Kind + { + MALFORMED, + MALFORMED_DELIMITED_TOKEN_TREE, + } kind; + +private: + TokenTree (Kind kind) : kind (kind) {} +}; + +struct Item +{ + static tl::expected, Item> make_end_of_file () + { + return tl::unexpected (Item (Kind::END_OF_FILE)); + } + + static tl::expected, Item> make_malformed () + { + return tl::unexpected (Item (Kind::MALFORMED)); + } + + enum class Kind + { + END_OF_FILE, + MALFORMED, + } kind; + +private: + Item (Kind kind) : kind (kind) {} +}; + +struct Items +{ + static tl::expected>, Items> + make_malformed (std::vector> items) + { + return tl::unexpected (Items (Kind::MALFORMED, std::move (items))); + } + + enum class Kind + { + MALFORMED, + } kind; + + // Should we do anything with valid items ? + std::vector> items; + +private: + Items (Kind kind, std::vector> items) + : kind (kind), items (std::move (items)) + {} +}; + +struct Visibility +{ + static tl::expected make_malformed () + { + return tl::unexpected (Visibility (Kind::MALFORMED)); + } + + static tl::expected make_missing_path () + { + return tl::unexpected (Visibility (Kind::MISSING_PATH)); + } + + enum class Kind + { + MISSING_PATH, + MALFORMED, + } kind; + +private: + Visibility (Kind kind) : kind (kind) {} +}; + +} // namespace Error +} // namespace Parse +} // namespace Rust + +#endif /* !RUST_PARSE_ERROR_H */ diff --git a/gcc/rust/parse/rust-parse-impl-proc-macro.cc b/gcc/rust/parse/rust-parse-impl-proc-macro.cc index edc484f7583..965c9146d1d 100644 --- a/gcc/rust/parse/rust-parse-impl-proc-macro.cc +++ b/gcc/rust/parse/rust-parse-impl-proc-macro.cc @@ -21,7 +21,7 @@ namespace Rust { -template std::unique_ptr +template tl::expected, Parse::Error::Item> Parser::parse_item (bool); template std::unique_ptr diff --git a/gcc/rust/parse/rust-parse-impl.h b/gcc/rust/parse/rust-parse-impl.h index afc70123095..77f2a6dd691 100644 --- a/gcc/rust/parse/rust-parse-impl.h +++ b/gcc/rust/parse/rust-parse-impl.h @@ -37,6 +37,7 @@ #include "rust-attribute-values.h" #include "rust-keyword-values.h" #include "rust-edition.h" +#include "rust-parse-error.h" #include "optional.h" @@ -420,7 +421,7 @@ Parser::done_end_of_file () // Parses a sequence of items within a module or the implicit top-level module // in a crate template -std::vector> +tl::expected>, Parse::Error::Items> Parser::parse_items () { std::vector> items; @@ -428,19 +429,20 @@ Parser::parse_items () const_TokenPtr t = lexer.peek_token (); while (t->get_id () != END_OF_FILE) { - std::unique_ptr item = parse_item (false); - if (item == nullptr) + auto item = parse_item (false); + if (!item) { + // TODO(patryp): Remove that generic error message, we have probably + // already thrown an error at this point with further details Error error (lexer.peek_token ()->get_locus (), "failed to parse item in crate"); add_error (std::move (error)); - // TODO: should all items be cleared? - items = std::vector> (); - break; + // Should we try parsing more items ? + return Parse::Error::Items::make_malformed (std::move (items)); } - items.push_back (std::move (item)); + items.push_back (std::move (item.value ())); t = lexer.peek_token (); } @@ -457,7 +459,8 @@ Parser::parse_crate () AST::AttrVec inner_attrs = parse_inner_attributes (); // parse items - std::vector> items = parse_items (); + auto items + = parse_items ().value_or (std::vector>{}); // emit all errors for (const auto &error : error_table) @@ -474,18 +477,22 @@ Parser::parse_inner_attributes () { AST::AttrVec inner_attributes; - // only try to parse it if it starts with "#!" not only "#" - while ((lexer.peek_token ()->get_id () == HASH - && lexer.peek_token (1)->get_id () == EXCLAM) - || lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) + auto has_valid_inner_attribute_prefix = [&] () { + auto id = lexer.peek_token ()->get_id (); + /* Outer attribute `#[` is not allowed, only accepts `#!` */ + return (id == HASH && lexer.peek_token (1)->get_id () == EXCLAM) + || id == INNER_DOC_COMMENT; + }; + + while (has_valid_inner_attribute_prefix ()) { - AST::Attribute inner_attr = parse_inner_attribute (); + auto inner_attr = parse_inner_attribute (); /* Ensure only valid inner attributes are added to the inner_attributes * list */ - if (!inner_attr.is_empty ()) + if (inner_attr) { - inner_attributes.push_back (std::move (inner_attr)); + inner_attributes.push_back (std::move (inner_attr.value ())); } else { @@ -520,7 +527,7 @@ Parser::parse_doc_comment () // Parse a single inner attribute. template -AST::Attribute +tl::expected Parser::parse_inner_attribute () { if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT) @@ -532,15 +539,8 @@ Parser::parse_inner_attribute () return AST::Attribute (std::move (path), std::move (input), loc, true); } - if (lexer.peek_token ()->get_id () != HASH) - { - Error error (lexer.peek_token ()->get_locus (), - "BUG: token %<#%> is missing, but % " - "was invoked"); - add_error (std::move (error)); + rust_assert (lexer.peek_token ()->get_id () == HASH); - return AST::Attribute::create_empty (); - } lexer.skip_token (); if (lexer.peek_token ()->get_id () != EXCLAM) @@ -549,37 +549,38 @@ Parser::parse_inner_attribute () "expected % or %<[%> for inner attribute"); add_error (std::move (error)); - return AST::Attribute::create_empty (); + return Parse::Error::Attribute::make_malformed (); } lexer.skip_token (); if (!skip_token (LEFT_SQUARE)) - return AST::Attribute::create_empty (); + return Parse::Error::Attribute::make_malformed (); - auto values = parse_attribute_body (); + auto body_res = parse_attribute_body (); + if (!body_res) + return Parse::Error::Attribute::make_malformed (); + auto body = std::move (body_res.value ()); - auto path = std::move (std::get<0> (values)); - auto input = std::move (std::get<1> (values)); - auto loc = std::get<2> (values); auto actual_attribute - = AST::Attribute (std::move (path), std::move (input), loc, true); + = AST::Attribute (std::move (body.path), std::move (body.input), body.locus, + true); if (!skip_token (RIGHT_SQUARE)) - return AST::Attribute::create_empty (); + return Parse::Error::Attribute::make_malformed (); return actual_attribute; } // Parses the body of an attribute (inner or outer). template -std::tuple, location_t> +tl::expected Parser::parse_attribute_body () { location_t locus = lexer.peek_token ()->get_locus (); - AST::SimplePath attr_path = parse_simple_path (); + auto attr_path = parse_simple_path (); // ensure path is valid to parse attribute input - if (attr_path.is_empty ()) + if (!attr_path) { Error error (lexer.peek_token ()->get_locus (), "empty simple path in attribute"); @@ -587,47 +588,34 @@ Parser::parse_attribute_body () // Skip past potential further info in attribute (i.e. attr_input) skip_after_end_attribute (); - return std::make_tuple (std::move (attr_path), nullptr, UNDEF_LOCATION); + return Parse::Error::AttributeBody::make_invalid_path (); } - std::unique_ptr attr_input = parse_attr_input (); + auto attr_input = parse_attr_input (); // AttrInput is allowed to be null, so no checks here - - return std::make_tuple (std::move (attr_path), std::move (attr_input), locus); -} - -/* Determines whether token is a valid simple path segment. This does not - * include scope resolution operators. */ -inline bool -is_simple_path_segment (TokenId id) -{ - switch (id) - { - case IDENTIFIER: - case SUPER: - case SELF: - case CRATE: - return true; - case DOLLAR_SIGN: - // assume that dollar sign leads to $crate - return true; - default: - return false; - } + if (attr_input) + return Parse::AttributeBody{std::move (attr_path.value ()), + std::move (attr_input.value ()), locus}; + else if (attr_input.error ().kind == Parse::Error::AttrInput::Kind::MISSING) + return Parse::AttributeBody{std::move (attr_path.value ()), nullptr, locus}; + else + return Parse::Error::AttributeBody::make_invalid_attrinput (); } // Parses a SimplePath AST node, if it exists. Does nothing otherwise. template -AST::SimplePath +tl::expected Parser::parse_simple_path () { bool has_opening_scope_resolution = false; location_t locus = UNKNOWN_LOCATION; + using Parse::Utils::is_simple_path_segment; + // don't parse anything if not a path upfront if (!is_simple_path_segment (lexer.peek_token ()->get_id ()) && !is_simple_path_segment (lexer.peek_token (1)->get_id ())) - return AST::SimplePath::create_empty (); + return Parse::Error::SimplePath::make_malformed (); /* Checks for opening scope resolution (i.e. global scope fully-qualified * path) */ @@ -641,42 +629,34 @@ Parser::parse_simple_path () } // Parse single required simple path segment - AST::SimplePathSegment segment = parse_simple_path_segment (); + auto segment = parse_simple_path_segment (); + + if (!segment) + return Parse::Error::SimplePath::make_malformed (); // get location if not gotten already if (locus == UNKNOWN_LOCATION) - locus = segment.get_locus (); + locus = segment->get_locus (); std::vector segments; - - // Return empty vector if first, actually required segment is an error - if (segment.is_error ()) - return AST::SimplePath::create_empty (); - - segments.push_back (std::move (segment)); + segments.push_back (std::move (segment.value ())); // Parse all other simple path segments while (lexer.peek_token ()->get_id () == SCOPE_RESOLUTION) { - AST::SimplePathSegment new_segment = parse_simple_path_segment (1); + auto new_segment = parse_simple_path_segment (1); + using Error = Parse::Error::SimplePathSegment::Kind; // Return path as currently constructed if segment in error state. - if (new_segment.is_error ()) - break; - - segments.push_back (std::move (new_segment)); - } - - // DEBUG: check for any empty segments - for (const auto &seg : segments) - { - if (seg.is_error ()) + if (!new_segment) { - rust_debug ( - "when parsing simple path, somehow empty path segment was " - "not filtered out. Path begins with '%s'", - segments.at (0).as_string ().c_str ()); + if (new_segment.error ().kind == Error::INVALID_SIMPLE_PATH_TOKEN) + break; /* Could be end of path */ + else /* Any other error is an hard error */ + return Parse::Error::SimplePath::make_malformed (); } + + segments.push_back (std::move (new_segment.value ())); } return AST::SimplePath (std::move (segments), has_opening_scope_resolution, @@ -689,7 +669,7 @@ Parser::parse_simple_path () * operators) * Starts parsing at an offset of base_peek */ template -AST::SimplePathSegment +tl::expected Parser::parse_simple_path_segment (int base_peek) { using namespace Values; @@ -725,20 +705,14 @@ Parser::parse_simple_path_segment (int base_peek) /* could put the rust_error_at thing here but fallthrough (from failing * $crate condition) isn't completely obvious if it is. */ - // test prevent error - return AST::SimplePathSegment::create_error (); + return Parse::Error::SimplePathSegment::make_invalid_token_or_path_end (); } rust_unreachable (); - /*rust_error_at( - t->get_locus(), "invalid token '%s' in simple path segment", - t->get_token_description());*/ - // this is not necessarily an error, e.g. end of path - // return AST::SimplePathSegment::create_error(); } // Parses a PathIdentSegment - an identifier segment of a non-SimplePath path. template -AST::PathIdentSegment +tl::expected Parser::parse_path_ident_segment () { const_TokenPtr t = lexer.peek_token (); @@ -779,15 +753,14 @@ Parser::parse_path_ident_segment () * condition) isn't completely obvious if it is. */ // test prevent error - return AST::PathIdentSegment::create_error (); + return Parse::Error::PathIdentSegment::make_invalid_token (); } rust_unreachable (); - // not necessarily an error } // Parses an AttrInput AST node (polymorphic, as AttrInput is abstract) template -std::unique_ptr +tl::expected, Parse::Error::AttrInput> Parser::parse_attr_input () { const_TokenPtr t = lexer.peek_token (); @@ -797,11 +770,13 @@ Parser::parse_attr_input () case LEFT_SQUARE: case LEFT_CURLY: { + auto dtoken_tree = parse_delim_token_tree (); + if (!dtoken_tree) + return Parse::Error::AttrInput::make_bad_token_tree (); + // must be a delimited token tree, so parse that std::unique_ptr input_tree ( - new AST::DelimTokenTree (parse_delim_token_tree ())); - - // TODO: potential checks on DelimTokenTree before returning + new AST::DelimTokenTree (dtoken_tree.value ())); return input_tree; } @@ -815,13 +790,13 @@ Parser::parse_attr_input () // attempt to parse macro // TODO: macros may/may not be allowed in attributes // this is needed for "#[doc = include_str!(...)]" - if (is_simple_path_segment (t->get_id ())) + if (Parse::Utils::is_simple_path_segment (t->get_id ())) { std::unique_ptr invoke = parse_macro_invocation ({}); if (!invoke) - return nullptr; + return Parse::Error::AttrInput::make_bad_macro_invocation (); return std::unique_ptr ( new AST::AttrInputMacro (std::move (invoke))); @@ -838,7 +813,7 @@ Parser::parse_attr_input () add_error (std::move (error)); skip_after_end_attribute (); - return nullptr; + return Parse::Error::AttrInput::make_malformed (); } AST::Literal::LitType lit_type = AST::Literal::STRING; @@ -889,7 +864,7 @@ Parser::parse_attr_input () case RIGHT_CURLY: case END_OF_FILE: // means AttrInput is missing, which is allowed - return nullptr; + return Parse::Error::AttrInput::make_missing_attrinput (); default: add_error ( Error (t->get_locus (), @@ -898,46 +873,15 @@ Parser::parse_attr_input () t->get_token_description ())); skip_after_end_attribute (); - return nullptr; + return Parse::Error::AttrInput::make_malformed (); } rust_unreachable (); // TODO: find out how to stop gcc error on "no return value" } -/* Returns true if the token id matches the delimiter type. Note that this only - * operates for END delimiter tokens. */ -inline bool -token_id_matches_delims (TokenId token_id, AST::DelimType delim_type) -{ - return ((token_id == RIGHT_PAREN && delim_type == AST::PARENS) - || (token_id == RIGHT_SQUARE && delim_type == AST::SQUARE) - || (token_id == RIGHT_CURLY && delim_type == AST::CURLY)); -} - -/* Returns true if the likely result of parsing the next few tokens is a path. - * Not guaranteed, though, especially in the case of syntax errors. */ -inline bool -is_likely_path_next (TokenId next_token_id) -{ - switch (next_token_id) - { - case IDENTIFIER: - case SUPER: - case SELF: - case SELF_ALIAS: - case CRATE: - // maybe - maybe do extra check. But then requires another TokenId. - case DOLLAR_SIGN: - case SCOPE_RESOLUTION: - return true; - default: - return false; - } -} - // Parses a delimited token tree template -AST::DelimTokenTree +tl::expected Parser::parse_delim_token_tree () { const_TokenPtr t = lexer.peek_token (); @@ -965,7 +909,7 @@ Parser::parse_delim_token_tree () "delimited token tree)", t->get_token_description ())); - return AST::DelimTokenTree::create_empty (); + return Parse::Error::DelimTokenTree::make_expected_delimiter (); } // parse actual token tree vector - 0 or more @@ -976,12 +920,11 @@ Parser::parse_delim_token_tree () // repeat loop until finding the matching delimiter t = lexer.peek_token (); - while (!token_id_matches_delims (t->get_id (), delim_type) + while (!Parse::Utils::token_id_matches_delims (t->get_id (), delim_type) && t->get_id () != END_OF_FILE) { - std::unique_ptr tok_tree = parse_token_tree (); - - if (tok_tree == nullptr) + auto tok_tree = parse_token_tree (); + if (!tok_tree) { // TODO: is this error handling appropriate? Error error ( @@ -990,10 +933,10 @@ Parser::parse_delim_token_tree () t->get_token_description ()); add_error (std::move (error)); - return AST::DelimTokenTree::create_empty (); + return Parse::Error::DelimTokenTree::make_invalid_token_tree (); } - token_trees_in_tree.push_back (std::move (tok_tree)); + token_trees_in_tree.push_back (std::move (tok_tree.value ())); // lexer.skip_token(); t = lexer.peek_token (); @@ -1008,17 +951,10 @@ Parser::parse_delim_token_tree () // parse end delimiters t = lexer.peek_token (); - if (token_id_matches_delims (t->get_id (), delim_type)) + if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) { // tokens match opening delimiter, so skip. lexer.skip_token (); - - // DEBUG - rust_debug ("finished parsing new delim token tree - peeked token is now " - "'%s' while t is '%s'", - lexer.peek_token ()->get_token_description (), - t->get_token_description ()); - return token_tree; } else @@ -1033,15 +969,13 @@ Parser::parse_delim_token_tree () : (delim_type == AST::SQUARE ? "]" : "}"))); add_error (std::move (error)); - /* return empty token tree despite possibly parsing valid token tree - - * TODO is this a good idea? */ - return AST::DelimTokenTree::create_empty (); + return Parse::Error::DelimTokenTree::make_mismatched_delimiters (); } } // Parses an identifier/keyword as a Token template -std::unique_ptr +tl::expected, Parse::Error::Token> Parser::parse_identifier_or_keyword_token () { const_TokenPtr t = lexer.peek_token (); @@ -1054,14 +988,14 @@ Parser::parse_identifier_or_keyword_token () else { add_error (Error (t->get_locus (), "expected keyword or identifier")); - return nullptr; + return Parse::Error::Token::make_malformed (); } } /* Parses a TokenTree syntactical production. This is either a delimited token * tree or a non-delimiter token. */ template -std::unique_ptr +tl::expected, Parse::Error::TokenTree> Parser::parse_token_tree () { const_TokenPtr t = lexer.peek_token (); @@ -1071,10 +1005,17 @@ Parser::parse_token_tree () case LEFT_PAREN: case LEFT_SQUARE: case LEFT_CURLY: - // Parse delimited token tree - // TODO: use move rather than copy constructor - return std::unique_ptr ( - new AST::DelimTokenTree (parse_delim_token_tree ())); + { + // Parse delimited token tree + auto delim_token_tree = parse_delim_token_tree (); + if (!delim_token_tree) + return Parse::Error::TokenTree:: + make_malformed_delimited_token_tree (); + + // TODO: use move rather than copy constructor + return std::unique_ptr ( + new AST::DelimTokenTree (delim_token_tree.value ())); + } case RIGHT_PAREN: case RIGHT_SQUARE: case RIGHT_CURLY: @@ -1086,7 +1027,7 @@ Parser::parse_token_tree () t->get_token_description ())); lexer.skip_token (); - return nullptr; + return Parse::Error::TokenTree::make_malformed (); default: // parse token itself as TokenTree lexer.skip_token (); @@ -1108,7 +1049,7 @@ Parser::is_macro_rules_def (const_TokenPtr t) // Parses a single item template -std::unique_ptr +tl::expected, Parse::Error::Item> Parser::parse_item (bool called_from_statement) { // has a "called_from_statement" parameter for better error message handling @@ -1129,7 +1070,7 @@ Parser::parse_item (bool called_from_statement) "expected item after outer attribute or doc comment"); add_error (std::move (error)); } - return nullptr; + return Parse::Error::Item::make_end_of_file (); case ASYNC: case PUB: @@ -1153,22 +1094,34 @@ Parser::parse_item (bool called_from_statement) case UNSAFE: // maybe - unsafe traits are a thing // if any of these (should be all possible VisItem prefixes), parse a // VisItem - return parse_vis_item (std::move (outer_attrs)); - break; + { + auto vis_item = parse_vis_item (std::move (outer_attrs)); + if (!vis_item) + return Parse::Error::Item::make_malformed (); + return vis_item; + } case SUPER: case SELF: case CRATE: case DOLLAR_SIGN: // almost certainly macro invocation semi - return parse_macro_invocation_semi (std::move (outer_attrs)); - break; + { + auto macro_invoc_semi + = parse_macro_invocation_semi (std::move (outer_attrs)); + if (!macro_invoc_semi) + return Parse::Error::Item::make_malformed (); + return macro_invoc_semi; + } // crappy hack to do union "keyword" case IDENTIFIER: // TODO: ensure std::string and literal comparison works if (t->get_str () == Values::WeakKeywords::UNION && lexer.peek_token (1)->get_id () == IDENTIFIER) { - return parse_vis_item (std::move (outer_attrs)); + auto vis_item = parse_vis_item (std::move (outer_attrs)); + if (!vis_item) + return Parse::Error::Item::make_malformed (); + return vis_item; // or should this go straight to parsing union? } else if (t->get_str () == Values::WeakKeywords::DEFAULT @@ -1177,19 +1130,26 @@ Parser::parse_item (bool called_from_statement) add_error (Error (t->get_locus (), "%qs is only allowed on items within %qs blocks", "default", "impl")); - return nullptr; + return Parse::Error::Item::make_malformed (); } else if (is_macro_rules_def (t)) { // macro_rules! macro item - return parse_macro_rules_def (std::move (outer_attrs)); + auto macro_rule_def = parse_macro_rules_def (std::move (outer_attrs)); + if (!macro_rule_def) + return Parse::Error::Item::make_malformed (); + return macro_rule_def; } else if (lexer.peek_token (1)->get_id () == SCOPE_RESOLUTION || lexer.peek_token (1)->get_id () == EXCLAM) { /* path (probably) or macro invocation, so probably a macro invocation * semi */ - return parse_macro_invocation_semi (std::move (outer_attrs)); + auto macro_invocation_semi + = parse_macro_invocation_semi (std::move (outer_attrs)); + if (!macro_invocation_semi) + return Parse::Error::Item::make_malformed (); + return macro_invocation_semi; } gcc_fallthrough (); default: @@ -1200,7 +1160,7 @@ Parser::parse_item (bool called_from_statement) called_from_statement ? "statement" : "item")); // skip somewhere? - return nullptr; + return Parse::Error::Item::make_malformed (); break; } } @@ -1212,19 +1172,21 @@ Parser::parse_outer_attributes () { AST::AttrVec outer_attributes; - while (lexer.peek_token ()->get_id () - == HASH /* Can also be #!, which catches errors. */ - || lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT - || lexer.peek_token ()->get_id () - == INNER_DOC_COMMENT) /* For error handling. */ + auto has_valid_attribute_prefix = [&] () { + auto id = lexer.peek_token ()->get_id (); + /* We allow inner attributes `#!` and catch the error later */ + return id == HASH || id == OUTER_DOC_COMMENT || id == INNER_DOC_COMMENT; + }; + + while (has_valid_attribute_prefix ()) /* For error handling. */ { - AST::Attribute outer_attr = parse_outer_attribute (); + auto outer_attr = parse_outer_attribute (); /* Ensure only valid outer attributes are added to the outer_attributes * list */ - if (!outer_attr.is_empty ()) + if (outer_attr) { - outer_attributes.push_back (std::move (outer_attr)); + outer_attributes.push_back (std::move (outer_attr.value ())); } else { @@ -1243,7 +1205,7 @@ Parser::parse_outer_attributes () // Parse a single outer attribute. template -AST::Attribute +tl::expected Parser::parse_outer_attribute () { if (lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT) @@ -1264,13 +1226,13 @@ Parser::parse_outer_attribute () "and before any outer attribute or doc (%<#[%>, % or %)"); add_error (std::move (error)); lexer.skip_token (); - return AST::Attribute::create_empty (); + return Parse::Error::Attribute::make_unexpected_inner (); } /* OuterAttribute -> '#' '[' Attr ']' */ if (lexer.peek_token ()->get_id () != HASH) - return AST::Attribute::create_empty (); + return Parse::Error::Attribute::make_malformed (); lexer.skip_token (); @@ -1287,20 +1249,22 @@ Parser::parse_outer_attribute () "attributes are not possible at this location"); add_error (std::move (error)); } - return AST::Attribute::create_empty (); + return Parse::Error::Attribute::make_unexpected_inner (); } lexer.skip_token (); - auto values = parse_attribute_body (); - auto path = std::move (std::get<0> (values)); - auto input = std::move (std::get<1> (values)); - auto loc = std::get<2> (values); + auto body_res = parse_attribute_body (); + if (!body_res) + return Parse::Error::Attribute::make_malformed_body (); + auto body = std::move (body_res.value ()); + auto actual_attribute - = AST::Attribute (std::move (path), std::move (input), loc, false); + = AST::Attribute (std::move (body.path), std::move (body.input), body.locus, + false); if (lexer.peek_token ()->get_id () != RIGHT_SQUARE) - return AST::Attribute::create_empty (); + return Parse::Error::Attribute::make_malformed (); lexer.skip_token (); @@ -1313,7 +1277,10 @@ std::unique_ptr Parser::parse_vis_item (AST::AttrVec outer_attrs) { // parse visibility, which may or may not exist - AST::Visibility vis = parse_visibility (); + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + auto vis = vis_res.value (); // select VisItem to create depending on keyword const_TokenPtr t = lexer.peek_token (); @@ -1582,7 +1549,8 @@ Parser::parse_macro_rules_def (AST::AttrVec outer_attrs) lexer.skip_token (); // don't parse if end of macro rules - if (token_id_matches_delims (lexer.peek_token ()->get_id (), delim_type)) + if (Parse::Utils::token_id_matches_delims (lexer.peek_token ()->get_id (), + delim_type)) { // DEBUG rust_debug ( @@ -1612,7 +1580,7 @@ Parser::parse_macro_rules_def (AST::AttrVec outer_attrs) // parse end delimiters t = lexer.peek_token (); - if (token_id_matches_delims (t->get_id (), delim_type)) + if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) { // tokens match opening delimiter, so skip. lexer.skip_token (); @@ -1704,8 +1672,12 @@ Parser::parse_decl_macro_def (AST::Visibility vis, } location_t transcriber_loc = lexer.peek_token ()->get_locus (); - AST::DelimTokenTree delim_tok_tree = parse_delim_token_tree (); - AST::MacroTranscriber transcriber (delim_tok_tree, transcriber_loc); + auto delim_tok_tree = parse_delim_token_tree (); + if (!delim_tok_tree) + return nullptr; + + AST::MacroTranscriber transcriber (delim_tok_tree.value (), + transcriber_loc); if (transcriber.get_token_tree ().get_delim_type () != AST::DelimType::CURLY) @@ -1765,8 +1737,8 @@ Parser::parse_decl_macro_def (AST::Visibility vis, lexer.skip_token (); // don't parse if end of macro rules - if (token_id_matches_delims (lexer.peek_token ()->get_id (), - AST::CURLY)) + if (Parse::Utils::token_id_matches_delims ( + lexer.peek_token ()->get_id (), AST::CURLY)) { break; } @@ -1818,7 +1790,9 @@ Parser::parse_macro_invocation_semi ( AST::AttrVec outer_attrs) { location_t macro_locus = lexer.peek_token ()->get_locus (); - AST::SimplePath path = parse_simple_path (); + auto path = parse_simple_path (); + if (!path) + return nullptr; if (!skip_token (EXCLAM)) { @@ -1861,12 +1835,11 @@ Parser::parse_macro_invocation_semi ( t = lexer.peek_token (); // parse token trees until the initial delimiter token is found again - while (!token_id_matches_delims (t->get_id (), delim_type) + while (!Parse::Utils::token_id_matches_delims (t->get_id (), delim_type) && t->get_id () != END_OF_FILE) { - std::unique_ptr tree = parse_token_tree (); - - if (tree == nullptr) + auto tree = parse_token_tree (); + if (!tree) { Error error (t->get_locus (), "failed to parse token tree for macro invocation semi " @@ -1877,7 +1850,7 @@ Parser::parse_macro_invocation_semi ( return nullptr; } - token_trees.push_back (std::move (tree)); + token_trees.push_back (std::move (tree.value ())); t = lexer.peek_token (); } @@ -1887,11 +1860,12 @@ Parser::parse_macro_invocation_semi ( AST::DelimTokenTree delim_tok_tree (delim_type, std::move (token_trees), tok_tree_locus); - AST::MacroInvocData invoc_data (std::move (path), std::move (delim_tok_tree)); + AST::MacroInvocData invoc_data (std::move (path.value ()), + std::move (delim_tok_tree)); // parse end delimiters t = lexer.peek_token (); - if (token_id_matches_delims (t->get_id (), delim_type)) + if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) { // tokens match opening delimiter, so skip. lexer.skip_token (); @@ -1942,8 +1916,8 @@ std::unique_ptr Parser::parse_macro_invocation (AST::AttrVec outer_attrs) { // parse macro path - AST::SimplePath macro_path = parse_simple_path (); - if (macro_path.is_empty ()) + auto macro_path = parse_simple_path (); + if (!macro_path) { Error error (lexer.peek_token ()->get_locus (), "failed to parse macro invocation path"); @@ -1960,12 +1934,15 @@ Parser::parse_macro_invocation (AST::AttrVec outer_attrs) } // parse internal delim token tree - AST::DelimTokenTree delim_tok_tree = parse_delim_token_tree (); + auto delim_tok_tree = parse_delim_token_tree (); + if (!delim_tok_tree) + return nullptr; - location_t macro_locus = macro_path.get_locus (); + location_t macro_locus = macro_path->get_locus (); return AST::MacroInvocation::Regular ( - AST::MacroInvocData (std::move (macro_path), std::move (delim_tok_tree)), + AST::MacroInvocData (std::move (macro_path.value ()), + std::move (delim_tok_tree.value ())), std::move (outer_attrs), macro_locus); } @@ -1990,7 +1967,11 @@ Parser::parse_macro_rule () // parse transcriber (this is just a delim token tree) location_t token_tree_loc = lexer.peek_token ()->get_locus (); - AST::MacroTranscriber transcriber (parse_delim_token_tree (), token_tree_loc); + auto delim_token_tree = parse_delim_token_tree (); + if (!delim_token_tree) + return AST::MacroRule::create_error (token_tree_loc); + + AST::MacroTranscriber transcriber (delim_token_tree.value (), token_tree_loc); return AST::MacroRule (std::move (matcher), std::move (transcriber), locus); } @@ -2039,7 +2020,7 @@ Parser::parse_macro_matcher () t = lexer.peek_token (); // parse token trees until the initial delimiter token is found again - while (!token_id_matches_delims (t->get_id (), delim_type)) + while (!Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) { std::unique_ptr match = parse_macro_match (); @@ -2092,7 +2073,7 @@ Parser::parse_macro_matcher () // parse end delimiters t = lexer.peek_token (); - if (token_id_matches_delims (t->get_id (), delim_type)) + if (Parse::Utils::token_id_matches_delims (t->get_id (), delim_type)) { // tokens match opening delimiter, so skip. lexer.skip_token (); @@ -2368,7 +2349,7 @@ Parser::parse_macro_match_repetition () /* Parses a visibility syntactical production (i.e. creating a non-default * visibility) */ template -AST::Visibility +tl::expected Parser::parse_visibility () { // check for no visibility @@ -2420,27 +2401,28 @@ Parser::parse_visibility () lexer.skip_token (); // parse the "in" path as well - AST::SimplePath path = parse_simple_path (); - if (path.is_empty ()) + auto path = parse_simple_path (); + if (!path) { Error error (lexer.peek_token ()->get_locus (), "missing path in pub(in path) visibility"); add_error (std::move (error)); // skip after somewhere? - return AST::Visibility::create_error (); + return Parse::Error::Visibility::make_missing_path (); } skip_token (RIGHT_PAREN); - return AST::Visibility::create_in_path (std::move (path), vis_loc); + return AST::Visibility::create_in_path (std::move (path.value ()), + vis_loc); } default: add_error (Error (t->get_locus (), "unexpected token %qs in visibility", t->get_token_description ())); lexer.skip_token (); - return AST::Visibility::create_error (); + return Parse::Error::Visibility::make_malformed (); } } @@ -2513,8 +2495,8 @@ Parser::parse_module (AST::Visibility vis, const_TokenPtr tok = lexer.peek_token (); while (tok->get_id () != RIGHT_CURLY) { - std::unique_ptr item = parse_item (false); - if (item == nullptr) + auto item = parse_item (false); + if (!item) { Error error (tok->get_locus (), "failed to parse item in module"); @@ -2523,7 +2505,7 @@ Parser::parse_module (AST::Visibility vis, return nullptr; } - items.push_back (std::move (item)); + items.push_back (std::move (item.value ())); tok = lexer.peek_token (); } @@ -2715,9 +2697,9 @@ Parser::parse_use_tree () location_t locus = lexer.peek_token ()->get_locus (); // bool has_path = false; - AST::SimplePath path = parse_simple_path (); + auto path = parse_simple_path (); - if (path.is_empty ()) + if (!path) { // has no path, so must be glob or nested tree UseTree type @@ -2828,14 +2810,14 @@ Parser::parse_use_tree () return std::unique_ptr ( new AST::UseTreeRebind (AST::UseTreeRebind::IDENTIFIER, - std::move (path), locus, t)); + std::move (path.value ()), locus, t)); case UNDERSCORE: // skip lexer token lexer.skip_token (); return std::unique_ptr ( new AST::UseTreeRebind (AST::UseTreeRebind::WILDCARD, - std::move (path), locus, + std::move (path.value ()), locus, {Values::Keywords::UNDERSCORE, t->get_locus ()})); default: @@ -2858,8 +2840,8 @@ Parser::parse_use_tree () case RIGHT_CURLY: // this may occur in recursive calls - assume it is ok and ignore it return std::unique_ptr ( - new AST::UseTreeRebind (AST::UseTreeRebind::NONE, std::move (path), - locus)); + new AST::UseTreeRebind (AST::UseTreeRebind::NONE, + std::move (path.value ()), locus)); case SCOPE_RESOLUTION: // keep going break; @@ -2881,7 +2863,7 @@ Parser::parse_use_tree () return std::unique_ptr ( new AST::UseTreeGlob (AST::UseTreeGlob::PATH_PREFIXED, - std::move (path), locus)); + std::move (path.value ()), locus)); case LEFT_CURLY: { // nested tree UseTree type @@ -2917,8 +2899,8 @@ Parser::parse_use_tree () return std::unique_ptr ( new AST::UseTreeList (AST::UseTreeList::PATH_PREFIXED, - std::move (path), std::move (use_trees), - locus)); + std::move (path.value ()), + std::move (use_trees), locus)); } default: add_error (Error (t->get_locus (), @@ -4544,7 +4526,9 @@ Parser::parse_struct_field () AST::AttrVec outer_attrs = parse_outer_attributes (); // parse visibility, if it exists - AST::Visibility vis = parse_visibility (); + auto vis = parse_visibility (); + if (!vis) + return AST::StructField::create_error (); location_t locus = lexer.peek_token ()->get_locus (); @@ -4578,7 +4562,8 @@ Parser::parse_struct_field () } return AST::StructField (std::move (field_name), std::move (field_type), - std::move (vis), locus, std::move (outer_attrs)); + std::move (vis.value ()), locus, + std::move (outer_attrs)); } // Parses tuple fields in tuple/tuple struct declarations. @@ -4647,7 +4632,9 @@ Parser::parse_tuple_field () AST::AttrVec outer_attrs = parse_outer_attributes (); // parse visibility if it exists - AST::Visibility vis = parse_visibility (); + auto visibility = parse_visibility (); + if (!visibility) + return AST::TupleField::create_error (); location_t locus = lexer.peek_token ()->get_locus (); @@ -4664,7 +4651,8 @@ Parser::parse_tuple_field () return AST::TupleField::create_error (); } - return AST::TupleField (std::move (field_type), std::move (vis), locus, + return AST::TupleField (std::move (field_type), + std::move (visibility.value ()), locus, std::move (outer_attrs)); } @@ -4802,7 +4790,10 @@ Parser::parse_enum_item () AST::AttrVec outer_attrs = parse_outer_attributes (); // parse visibility, which may or may not exist - AST::Visibility vis = parse_visibility (); + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + auto vis = vis_res.value (); // parse name for enum item, which is required const_TokenPtr item_name_tok = lexer.peek_token (); @@ -5163,7 +5154,11 @@ Parser::parse_trait_item () // parse outer attributes (if they exist) AST::AttrVec outer_attrs = parse_outer_attributes (); - AST::Visibility vis = parse_visibility (); + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + + auto vis = vis_res.value (); // lookahead to determine what type of trait item to parse const_TokenPtr tok = lexer.peek_token (); @@ -5523,7 +5518,10 @@ Parser::parse_inherent_impl_item () { // visibility, so not a macro invocation semi - must be constant, // function, or method - AST::Visibility vis = parse_visibility (); + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + auto vis = vis_res.value (); // TODO: is a recursive call to parse_inherent_impl_item better? switch (lexer.peek_token ()->get_id ()) @@ -5727,9 +5725,10 @@ Parser::parse_trait_impl_item () // parse outer attributes (if they exist) AST::AttrVec outer_attrs = parse_outer_attributes (); - auto visibility = AST::Visibility::create_private (); - if (lexer.peek_token ()->get_id () == PUB) - visibility = parse_visibility (); + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + auto visibility = vis_res.value (); // branch on next token: const_TokenPtr t = lexer.peek_token (); @@ -6029,7 +6028,10 @@ Parser::parse_external_item () location_t locus = lexer.peek_token ()->get_locus (); // parse optional visibility - AST::Visibility vis = parse_visibility (); + auto vis_res = parse_visibility (); + if (!vis_res) + return nullptr; + auto vis = vis_res.value (); const_TokenPtr t = lexer.peek_token (); switch (t->get_id ()) @@ -6562,12 +6564,13 @@ Parser::parse_type_path_segment () { location_t locus = lexer.peek_token ()->get_locus (); // parse ident segment part - AST::PathIdentSegment ident_segment = parse_path_ident_segment (); - if (ident_segment.is_error ()) + auto ident_segment_res = parse_path_ident_segment (); + if (!ident_segment_res) { // not necessarily an error return nullptr; } + auto ident_segment = ident_segment_res.value (); /* lookahead to determine if variants exist - only consume scope resolution * then */ @@ -6747,12 +6750,13 @@ Parser::parse_path_expr_segment () { location_t locus = lexer.peek_token ()->get_locus (); // parse ident segment - AST::PathIdentSegment ident = parse_path_ident_segment (); - if (ident.is_error ()) + auto ident_result = parse_path_ident_segment (); + if (!ident_result) { // not necessarily an error? return AST::PathExprSegment::create_error (); } + auto ident = ident_result.value (); // parse generic args (and turbofish), if they exist /* use lookahead to determine if they actually exist (don't want to @@ -9188,11 +9192,13 @@ Parser::parse_type (bool save_errors) lexer.skip_token (); - AST::DelimTokenTree tok_tree = parse_delim_token_tree (); + auto tok_tree = parse_delim_token_tree (); + if (!tok_tree) + return nullptr; return AST::MacroInvocation::Regular ( AST::MacroInvocData (std::move (macro_path), - std::move (tok_tree)), + std::move (tok_tree.value ())), {}, locus); } case PLUS: @@ -10053,11 +10059,13 @@ Parser::parse_type_no_bounds () lexer.skip_token (); - AST::DelimTokenTree tok_tree = parse_delim_token_tree (); + auto tok_tree = parse_delim_token_tree (); + if (!tok_tree) + return nullptr; return AST::MacroInvocation::Regular ( AST::MacroInvocData (std::move (macro_path), - std::move (tok_tree)), + std::move (tok_tree.value ())), {}, locus); } default: @@ -14271,14 +14279,17 @@ Parser::parse_macro_invocation_partial ( return nullptr; } - AST::DelimTokenTree tok_tree = parse_delim_token_tree (); + auto tok_tree = parse_delim_token_tree (); + if (!tok_tree) + return nullptr; rust_debug ("successfully parsed macro invocation (via partial)"); location_t macro_locus = converted_path.get_locus (); return AST::MacroInvocation::Regular ( - AST::MacroInvocData (std::move (converted_path), std::move (tok_tree)), + AST::MacroInvocData (std::move (converted_path), + std::move (tok_tree.value ())), std::move (outer_attrs), macro_locus); } diff --git a/gcc/rust/parse/rust-parse-utils.h b/gcc/rust/parse/rust-parse-utils.h new file mode 100644 index 00000000000..1791f6e839f --- /dev/null +++ b/gcc/rust/parse/rust-parse-utils.h @@ -0,0 +1,73 @@ + +// Copyright (C) 2025 Free Software Foundation, Inc. + +// This file is part of GCC. + +// GCC is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3, or (at your option) any later +// version. + +// GCC is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. + +// You should have received a copy of the GNU General Public License +// along with GCC; see the file COPYING3. If not see +// . + +#ifndef RUST_PARSE_UTILS_H +#define RUST_PARSE_UTILS_H + +#include "rust-ast.h" + +namespace Rust { +namespace Parse { +/* Utility structure to return members of an attribute body, was initially a + * tuple but tuples are ugly*/ +struct AttributeBody +{ + AST::SimplePath path; + std::unique_ptr input; + location_t locus; +}; + +namespace Utils { + +/* Returns true if the token id matches the delimiter type. Note that this only + * operates for END delimiter tokens. */ +inline bool +token_id_matches_delims (TokenId token_id, AST::DelimType delim_type) +{ + return ((token_id == RIGHT_PAREN && delim_type == AST::PARENS) + || (token_id == RIGHT_SQUARE && delim_type == AST::SQUARE) + || (token_id == RIGHT_CURLY && delim_type == AST::CURLY)); +} + +/* Determines whether token is a valid simple path segment. This does not + * include scope resolution operators. */ +inline bool +is_simple_path_segment (TokenId id) +{ + switch (id) + { + case IDENTIFIER: + case SUPER: + case SELF: + case CRATE: + return true; + case DOLLAR_SIGN: + // assume that dollar sign leads to $crate + return true; + default: + return false; + } +} + +} // namespace Utils + +} // namespace Parse +} // namespace Rust + +#endif /* !RUST_PARSE_UTILS_H */ diff --git a/gcc/rust/parse/rust-parse.h b/gcc/rust/parse/rust-parse.h index 7b4046365dd..19b280af965 100644 --- a/gcc/rust/parse/rust-parse.h +++ b/gcc/rust/parse/rust-parse.h @@ -22,6 +22,8 @@ along with GCC; see the file COPYING3. If not see #include "rust-lex.h" #include "rust-ast-full.h" #include "rust-diagnostics.h" +#include "rust-parse-error.h" +#include "rust-parse-utils.h" #include "expected.h" @@ -180,7 +182,8 @@ template class Parser location_t loc = UNKNOWN_LOCATION); bool is_macro_rules_def (const_TokenPtr t); - std::unique_ptr parse_item (bool called_from_statement); + tl::expected, Parse::Error::Item> + parse_item (bool called_from_statement); std::unique_ptr parse_pattern (); std::unique_ptr parse_pattern_no_alt (); @@ -202,11 +205,14 @@ template class Parser std::unique_ptr parse_trait_impl_item (); AST::PathInExpression parse_path_in_expression (); std::vector> parse_lifetime_params (); - AST::Visibility parse_visibility (); + tl::expected parse_visibility (); std::unique_ptr parse_identifier_pattern (); - std::unique_ptr parse_identifier_or_keyword_token (); - std::unique_ptr parse_token_tree (); - std::tuple, location_t> + tl::expected, Parse::Error::Token> + parse_identifier_or_keyword_token (); + tl::expected, Parse::Error::TokenTree> + parse_token_tree (); + + tl::expected parse_attribute_body (); AST::AttrVec parse_inner_attributes (); std::unique_ptr @@ -232,18 +238,23 @@ template class Parser void parse_statement_seq (bool (Parser::*done) ()); // AST-related stuff - maybe move or something? - AST::Attribute parse_inner_attribute (); - AST::Attribute parse_outer_attribute (); - std::unique_ptr parse_attr_input (); + tl::expected + parse_inner_attribute (); + tl::expected + parse_outer_attribute (); + tl::expected, Parse::Error::AttrInput> + parse_attr_input (); std::tuple, location_t> parse_doc_comment (); // Path-related - AST::SimplePath parse_simple_path (); - AST::SimplePathSegment parse_simple_path_segment (int base_peek = 0); + tl::expected parse_simple_path (); + tl::expected + parse_simple_path_segment (int base_peek = 0); AST::TypePath parse_type_path (); std::unique_ptr parse_type_path_segment (); - AST::PathIdentSegment parse_path_ident_segment (); + tl::expected + parse_path_ident_segment (); tl::optional parse_generic_arg (); AST::GenericArgs parse_path_generic_args (); AST::GenericArgsBinding parse_generic_args_binding (); @@ -260,7 +271,8 @@ template class Parser AST::QualifiedPathInType parse_qualified_path_in_type (); // Token tree or macro related - AST::DelimTokenTree parse_delim_token_tree (); + tl::expected + parse_delim_token_tree (); std::unique_ptr parse_macro_rules_def (AST::AttrVec outer_attrs); std::unique_ptr @@ -742,7 +754,8 @@ template class Parser // Parse items without parsing an entire crate. This function is the main // parsing loop of AST::Crate::parse_crate(). - std::vector> parse_items (); + tl::expected>, Parse::Error::Items> + parse_items (); // Main entry point for parser. std::unique_ptr parse_crate (); diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc index 64181184683..0454c986194 100644 --- a/gcc/rust/rust-session-manager.cc +++ b/gcc/rust/rust-session-manager.cc @@ -873,7 +873,7 @@ Session::injection (AST::Crate &crate) // create "extern crate" item with the name std::unique_ptr extern_crate ( - new AST::ExternCrate (*it, AST::Visibility::create_error (), + new AST::ExternCrate (*it, AST::Visibility::create_private (), {std::move (attr)}, UNKNOWN_LOCATION)); // insert at beginning