-
Notifications
You must be signed in to change notification settings - Fork 165
Description
This is the bug I was writing about in other issue. It is very bizarre. The reproduction is somewhat large but I'm happy I managed to even recreate it after hours spent of pulling code out of my project without success.
(details at the bottom)
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/ast/position_tagged.hpp>
#include <boost/spirit/home/x3/support/utility/lambda_visitor.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace x3 = boost::spirit::x3;
namespace ast
{
struct identifier : x3::position_tagged
{
identifier& operator=(std::string str)
{
value = std::move(str);
return *this;
}
const std::string& get_value() const { return value; }
std::string value;
};
struct string_literal : std::string, x3::position_tagged
{
const std::string& get_value() const { return *this; }
};
struct integer_literal : x3::position_tagged
{
integer_literal& operator=(int n)
{
value = n;
return *this;
}
int get_value() const { return value; }
int value;
};
struct floating_point_literal : x3::position_tagged
{
floating_point_literal& operator=(double n)
{
value = n;
return *this;
}
double get_value() const { return value; }
double value;
};
struct literal_expression : x3::variant<floating_point_literal, integer_literal, string_literal>, x3::position_tagged
{
using base_type::base_type;
using base_type::operator=;
};
struct sequence : std::vector<literal_expression>, x3::position_tagged {};
struct constant_definition : x3::position_tagged
{
identifier name;
sequence seq;
};
} // namespace ast
BOOST_FUSION_ADAPT_STRUCT(ast::constant_definition, name, seq)
using iterator_type = const char*;
struct parse_error
{
iterator_type error_place;
iterator_type backtracking_place;
std::string what_was_expected;
};
using error_holder_type = std::vector<parse_error>;
using position_cache_type = x3::position_cache<std::vector<iterator_type>>;
struct error_holder_tag;
struct position_cache_tag;
struct annotate_on_success
{
// template <typename Iterator, typename Context, typename... Types>
// void on_success(
// const Iterator& first,
// const Iterator& last,
// x3::variant<Types...>& ast,
// const Context& context)
// {
// ast.apply_visitor(x3::make_lambda_visitor<void>([&](auto& node)
// {
// this->on_success(first, last, node, context);
// }));
// }
// template <typename T, typename Iterator, typename Context>
// void on_success(
// const Iterator& first,
// const Iterator& last,
// T& ast,
// const Context& context)
// {
// position_cache_type& positions = x3::get<position_cache_tag>(context).get();
// positions.annotate(ast, first, last);
// }
};
struct error_on_error
{
template <typename Iterator, typename Exception, typename Context>
x3::error_handler_result on_error(
Iterator& first,
const Iterator& /* last */,
const Exception& ex, // x3::expectation_failure<Iterator> or similar type
const Context& context)
{
error_holder_type& error_holder = x3::get<error_holder_tag>(context).get();
error_holder.push_back(parse_error{ex.where(), first, ex.which()});
return x3::error_handler_result::fail;
}
};
//struct identifier_class : error_on_error, annotate_on_success {};
//struct integer_literal_class : error_on_error, annotate_on_success {};
//struct floating_point_literal_class : error_on_error, annotate_on_success {};
struct string_literal_class : error_on_error, annotate_on_success {};
//struct literal_expression_class : error_on_error, annotate_on_success {};
//struct sequence_class : error_on_error, annotate_on_success {};
//struct constant_definition_class : error_on_error, annotate_on_success {};
struct identifier_class {};
struct integer_literal_class {};
struct floating_point_literal_class {};
//struct string_literal_class {};
struct literal_expression_class {};
struct sequence_class {};
struct constant_definition_class {};
using floating_point_literal_type = x3::rule<floating_point_literal_class, ast::floating_point_literal>;
BOOST_SPIRIT_DECLARE(floating_point_literal_type)
using integer_literal_type = x3::rule<integer_literal_class, ast::integer_literal>;
BOOST_SPIRIT_DECLARE(integer_literal_type)
using string_literal_type = x3::rule<string_literal_class, ast::string_literal>;
BOOST_SPIRIT_DECLARE(string_literal_type)
using literal_expression_type = x3::rule<literal_expression_class, ast::literal_expression>;
BOOST_SPIRIT_DECLARE(literal_expression_type)
using sequence_type = x3::rule<sequence_class, ast::sequence>;
BOOST_SPIRIT_DECLARE(sequence_type)
using identifier_impl_type = x3::rule<struct identifier_impl_class, std::string>;
BOOST_SPIRIT_DECLARE(identifier_impl_type)
using identifier_type = x3::rule<identifier_class, ast::identifier>;
BOOST_SPIRIT_DECLARE(identifier_type)
using constant_definition_type = x3::rule<constant_definition_class, ast::constant_definition>;
BOOST_SPIRIT_DECLARE(constant_definition_type)
const floating_point_literal_type floating_point_literal = "number (fractional)";
const auto floating_point_literal_def = x3::real_parser<double, x3::strict_real_policies<double>>{};
BOOST_SPIRIT_DEFINE(floating_point_literal)
const integer_literal_type integer_literal = "number (integer)";
const auto integer_literal_def = x3::int_;
BOOST_SPIRIT_DEFINE(integer_literal)
const string_literal_type string_literal = "string";
const auto string_literal_def = x3::lexeme['"' > *(x3::char_ - (x3::lit('"') | '\n' | '\r')) > '"'];
BOOST_SPIRIT_DEFINE(string_literal)
const literal_expression_type literal_expression = "literal";
const auto literal_expression_def = floating_point_literal | integer_literal | string_literal;
BOOST_SPIRIT_DEFINE(literal_expression)
const identifier_impl_type identifier_impl = "identifier";
const auto identifier_impl_def = x3::lexeme[(x3::alpha | x3::char_('_')) > *(x3::alnum | x3::char_('_'))];
BOOST_SPIRIT_DEFINE(identifier_impl)
const identifier_type identifier = identifier_impl.name;
const auto identifier_def = identifier_impl;
BOOST_SPIRIT_DEFINE(identifier)
const sequence_type sequence = "list of values";
const auto sequence_def = x3::skip(x3::space - x3::eol)[*literal_expression > (x3::eol | &x3::eoi)];
BOOST_SPIRIT_DEFINE(sequence)
const constant_definition_type constant_definition = "constant definiton";
const auto constant_definition_def = (identifier >> '=') > sequence;
BOOST_SPIRIT_DEFINE(constant_definition)
int main()
{
std::string input =
"abc = 123 \"abc\n" // intentionally bad input: missing "
"xyz = 456 \"xyz\"";
auto first = input.data();
const auto last = input.data() + input.size();
position_cache_type position_cache(first, last);
error_holder_type error_holder;
const auto parser = x3::with<position_cache_tag>(std::ref(position_cache))
[
x3::with<error_holder_tag>(std::ref(error_holder))[*constant_definition > x3::eoi]
];
std::vector<ast::constant_definition> ast;
if (!x3::phrase_parse(first, last, parser, x3::space, ast)) {
std::cout << "parse failed\n";
return 1;
}
if (first != last) {
std::cout << last - first << " characters left\n";
return 1;
}
std::cout << "success:\n";
for (const auto& cd : ast) {
std::cout << cd.name.value << " = {\n";
for (const auto& val : cd.seq) {
std::cout << " literal = ";
val.apply_visitor(x3::make_lambda_visitor<void>([](const auto& val){ std::cout << val.get_value(); }));
std::cout << "\n";
}
std::cout << "}\n";
}
}The code implements a simplified assignment/definition syntax from a more complex human-readable file format from my project. The basic idea of the syntax is to allow assigning multiple values to a single identifier, like this (no , and no ; and line breaks matter):
abc = 123 "abc"
xyz = 456 "xyz"
The code above has a typo in the input and I expect it to result in x3::expectation_failure. It throws this error correctly on Boost 1.76 but accepts the input in later versions.
$ git bisect bad
2db3fde0d02b9a48877147c595289934525882a0 is the first bad commit
commit 2db3fde0d02b9a48877147c595289934525882a0
Author: Nikita Kniazev <nok.raven@gmail.com>
Date: Tue Jun 1 21:56:22 2021 +0300
X3: Remove forced iterator rollback in rule
Removes inconsistency when a parser part is factorized into a rule.
Fixes regression introduced in #670.
example/x3/rexpr/rexpr_full/test/parse_rexpr_test.cpp | 6 ++++--
example/x3/rexpr/rexpr_full/test/testing.hpp | 4 ++--
include/boost/spirit/home/x3/nonterminal/detail/rule.hpp | 10 +++-------
test/x3/error_handler.cpp | 7 +++++--
4 files changed, 14 insertions(+), 13 deletions(-)
When it doesn't error, it prints the following:
success:
abc = {
literal = 123
}
xyz = {
literal = 456
literal = xyz
}
Somehow it skips over entire "abc without reporting any *_literal object or raising any parse error/exception.
What's even more bizarre, if you (un)comment which rules get error_on_error and annotate_on_success, e.g. by making all tag types empty, then the bug disappears. Somehow adding error_on_error to string_literal_class causes it to accept invalid input, even though the handler returns x3::error_handler_result::fail.