Skip to content

Returning error_handler_result::fail on expectation failure results in false continuation on the surrounding grammar #833

@Xeverous

Description

@Xeverous

This is the bug I was writing about in other issue. It is very bizarre. The reproduction is somewhat large but I'm happy I managed to even recreate it after hours spent of pulling code out of my project without success.

(details at the bottom)

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/spirit/home/x3/support/ast/position_tagged.hpp>
#include <boost/spirit/home/x3/support/utility/lambda_visitor.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

#include <iostream>
#include <string>
#include <vector>

namespace x3 = boost::spirit::x3;

namespace ast
{
	struct identifier : x3::position_tagged
	{
		identifier& operator=(std::string str)
		{
			value = std::move(str);
			return *this;
		}

		const std::string& get_value() const { return value; }

		std::string value;
	};

	struct string_literal : std::string, x3::position_tagged
	{
		const std::string& get_value() const { return *this; }
	};

	struct integer_literal : x3::position_tagged
	{
		integer_literal& operator=(int n)
		{
			value = n;
			return *this;
		}

		int get_value() const { return value; }

		int value;
	};

	struct floating_point_literal : x3::position_tagged
	{
		floating_point_literal& operator=(double n)
		{
			value = n;
			return *this;
		}

		double get_value() const { return value; }

		double value;
	};

	struct literal_expression : x3::variant<floating_point_literal, integer_literal, string_literal>, x3::position_tagged
	{
		using base_type::base_type;
		using base_type::operator=;
	};

	struct sequence : std::vector<literal_expression>, x3::position_tagged {};

	struct constant_definition : x3::position_tagged
	{
		identifier name;
		sequence seq;
	};
} // namespace ast

BOOST_FUSION_ADAPT_STRUCT(ast::constant_definition, name, seq)

using iterator_type = const char*;

struct parse_error
{
	iterator_type error_place;
	iterator_type backtracking_place;
	std::string what_was_expected;
};

using error_holder_type = std::vector<parse_error>;
using position_cache_type = x3::position_cache<std::vector<iterator_type>>;

struct error_holder_tag;
struct position_cache_tag;

struct annotate_on_success
{
	// template <typename Iterator, typename Context, typename... Types>
	// void on_success(
	// 	const Iterator& first,
	// 	const Iterator& last,
	// 	x3::variant<Types...>& ast,
	// 	const Context& context)
	// {
	// 	ast.apply_visitor(x3::make_lambda_visitor<void>([&](auto& node)
	// 	{
	// 		this->on_success(first, last, node, context);
	// 	}));
	// }

	// template <typename T, typename Iterator, typename Context>
	// void on_success(
	// 	const Iterator& first,
	// 	const Iterator& last,
	// 	T& ast,
	// 	const Context& context)
	// {
	// 	position_cache_type& positions = x3::get<position_cache_tag>(context).get();
	// 	positions.annotate(ast, first, last);
	// }
};

struct error_on_error
{
	template <typename Iterator, typename Exception, typename Context>
	x3::error_handler_result on_error(
		Iterator& first,
		const Iterator& /* last */,
		const Exception& ex, // x3::expectation_failure<Iterator> or similar type
		const Context& context)
	{
		error_holder_type& error_holder = x3::get<error_holder_tag>(context).get();
		error_holder.push_back(parse_error{ex.where(), first, ex.which()});
		return x3::error_handler_result::fail;
	}
};


//struct identifier_class                      : error_on_error, annotate_on_success {};
//struct integer_literal_class                 : error_on_error, annotate_on_success {};
//struct floating_point_literal_class          : error_on_error, annotate_on_success {};
struct string_literal_class                  : error_on_error, annotate_on_success {};
//struct literal_expression_class              : error_on_error, annotate_on_success {};
//struct sequence_class                        : error_on_error, annotate_on_success {};
//struct constant_definition_class             : error_on_error, annotate_on_success {};

struct identifier_class                      {};
struct integer_literal_class                 {};
struct floating_point_literal_class          {};
//struct string_literal_class                  {};
struct literal_expression_class              {};
struct sequence_class                        {};
struct constant_definition_class             {};

using floating_point_literal_type = x3::rule<floating_point_literal_class, ast::floating_point_literal>;
BOOST_SPIRIT_DECLARE(floating_point_literal_type)

using integer_literal_type = x3::rule<integer_literal_class, ast::integer_literal>;
BOOST_SPIRIT_DECLARE(integer_literal_type)

using string_literal_type = x3::rule<string_literal_class, ast::string_literal>;
BOOST_SPIRIT_DECLARE(string_literal_type)

using literal_expression_type = x3::rule<literal_expression_class, ast::literal_expression>;
BOOST_SPIRIT_DECLARE(literal_expression_type)

using sequence_type = x3::rule<sequence_class, ast::sequence>;
BOOST_SPIRIT_DECLARE(sequence_type)

using identifier_impl_type = x3::rule<struct identifier_impl_class, std::string>;
BOOST_SPIRIT_DECLARE(identifier_impl_type)
using identifier_type = x3::rule<identifier_class, ast::identifier>;
BOOST_SPIRIT_DECLARE(identifier_type)

using constant_definition_type = x3::rule<constant_definition_class, ast::constant_definition>;
BOOST_SPIRIT_DECLARE(constant_definition_type)

const floating_point_literal_type floating_point_literal = "number (fractional)";
const auto floating_point_literal_def = x3::real_parser<double, x3::strict_real_policies<double>>{};
BOOST_SPIRIT_DEFINE(floating_point_literal)

const integer_literal_type integer_literal = "number (integer)";
const auto integer_literal_def = x3::int_;
BOOST_SPIRIT_DEFINE(integer_literal)

const string_literal_type string_literal = "string";
const auto string_literal_def = x3::lexeme['"' > *(x3::char_ - (x3::lit('"') | '\n' | '\r')) > '"'];
BOOST_SPIRIT_DEFINE(string_literal)

const literal_expression_type literal_expression = "literal";
const auto literal_expression_def = floating_point_literal | integer_literal | string_literal;
BOOST_SPIRIT_DEFINE(literal_expression)

const identifier_impl_type identifier_impl = "identifier";
const auto identifier_impl_def = x3::lexeme[(x3::alpha | x3::char_('_')) > *(x3::alnum | x3::char_('_'))];
BOOST_SPIRIT_DEFINE(identifier_impl)

const identifier_type identifier = identifier_impl.name;
const auto identifier_def = identifier_impl;
BOOST_SPIRIT_DEFINE(identifier)

const sequence_type sequence = "list of values";
const auto sequence_def = x3::skip(x3::space - x3::eol)[*literal_expression > (x3::eol | &x3::eoi)];
BOOST_SPIRIT_DEFINE(sequence)

const constant_definition_type constant_definition = "constant definiton";
const auto constant_definition_def = (identifier >> '=') > sequence;
BOOST_SPIRIT_DEFINE(constant_definition)

int main()
{
	std::string input =
		"abc = 123 \"abc\n" // intentionally bad input: missing "
		"xyz = 456 \"xyz\"";

	auto first = input.data();
	const auto last = input.data() + input.size();

	position_cache_type position_cache(first, last);
	error_holder_type error_holder;
	const auto parser = x3::with<position_cache_tag>(std::ref(position_cache))
	[
		x3::with<error_holder_tag>(std::ref(error_holder))[*constant_definition > x3::eoi]
	];

	std::vector<ast::constant_definition> ast;
	if (!x3::phrase_parse(first, last, parser, x3::space, ast)) {
		std::cout << "parse failed\n";
		return 1;
	}

	if (first != last) {
		std::cout << last - first << " characters left\n";
		return 1;
	}

	std::cout << "success:\n";

	for (const auto& cd : ast) {
		std::cout << cd.name.value << " = {\n";

		for (const auto& val : cd.seq) {
			std::cout << "    literal = ";
			val.apply_visitor(x3::make_lambda_visitor<void>([](const auto& val){ std::cout << val.get_value(); }));
			std::cout << "\n";
		}

		std::cout << "}\n";
	}
}

The code implements a simplified assignment/definition syntax from a more complex human-readable file format from my project. The basic idea of the syntax is to allow assigning multiple values to a single identifier, like this (no , and no ; and line breaks matter):

abc = 123 "abc"
xyz = 456 "xyz"

The code above has a typo in the input and I expect it to result in x3::expectation_failure. It throws this error correctly on Boost 1.76 but accepts the input in later versions.

$ git bisect bad
2db3fde0d02b9a48877147c595289934525882a0 is the first bad commit
commit 2db3fde0d02b9a48877147c595289934525882a0
Author: Nikita Kniazev <nok.raven@gmail.com>
Date:   Tue Jun 1 21:56:22 2021 +0300

    X3: Remove forced iterator rollback in rule
    
    Removes inconsistency when a parser part is factorized into a rule.
    
    Fixes regression introduced in #670.

 example/x3/rexpr/rexpr_full/test/parse_rexpr_test.cpp    |  6 ++++--
 example/x3/rexpr/rexpr_full/test/testing.hpp             |  4 ++--
 include/boost/spirit/home/x3/nonterminal/detail/rule.hpp | 10 +++-------
 test/x3/error_handler.cpp                                |  7 +++++--
 4 files changed, 14 insertions(+), 13 deletions(-)

When it doesn't error, it prints the following:

success:
abc = {
    literal = 123
}
xyz = {
    literal = 456
    literal = xyz
}

Somehow it skips over entire "abc without reporting any *_literal object or raising any parse error/exception.

What's even more bizarre, if you (un)comment which rules get error_on_error and annotate_on_success, e.g. by making all tag types empty, then the bug disappears. Somehow adding error_on_error to string_literal_class causes it to accept invalid input, even though the handler returns x3::error_handler_result::fail.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions