Skip to content

Commit 4d67e12

Browse files
committed
🚧 WIP for #4552
1 parent 30cd44d commit 4d67e12

File tree

4 files changed

+25
-1
lines changed

4 files changed

+25
-1
lines changed

include/nlohmann/detail/output/serializer.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,12 @@ class serializer
529529
// thus removing/ignoring the invalid characters
530530
bytes = bytes_after_last_accept;
531531

532+
// fix for #4552
533+
if (error_handler == error_handler_t::ignore)
534+
{
535+
bytes += undumped_chars;
536+
}
537+
532538
if (error_handler == error_handler_t::replace)
533539
{
534540
// add a replacement character

single_include/nlohmann/json.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18816,6 +18816,12 @@ class serializer
1881618816
// thus removing/ignoring the invalid characters
1881718817
bytes = bytes_after_last_accept;
1881818818

18819+
// fix for #4552 - discussion pending
18820+
if (error_handler == error_handler_t::ignore)
18821+
{
18822+
bytes += undumped_chars;
18823+
}
18824+
1881918825
if (error_handler == error_handler_t::replace)
1882018826
{
1882118827
// add a replacement character

tests/src/unit-regression2.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,14 @@ TEST_CASE("regression tests 2")
995995
CHECK(p.x == 1);
996996
CHECK(p.y == 2);
997997
}
998+
999+
SECTION("issue #4552 - UTF-8 invalid characters are not always ignored when dumping with error_handler_t::ignore")
1000+
{
1001+
nlohmann::json node;
1002+
node["test"] = "test\334\005";
1003+
const auto test_dump = node.dump(-1, ' ', false, nlohmann::json::error_handler_t::ignore);
1004+
CHECK(test_dump == "{\"test\":\"test\334\\u0005\"}");
1005+
}
9981006
}
9991007

10001008
DOCTEST_CLANG_SUPPRESS_WARNING_POP

tests/src/unit-serialization.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,11 @@ TEST_CASE("serialization")
107107

108108
CHECK_THROWS_WITH_AS(j.dump(), "[json.exception.type_error.316] invalid UTF-8 byte at index 5: 0x34", json::type_error&);
109109
CHECK_THROWS_AS(j.dump(1, ' ', false, json::error_handler_t::strict), json::type_error&);
110-
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\"");
110+
111+
// see pending discussion at #4452
112+
// CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123456\"");
113+
CHECK(j.dump(-1, ' ', false, json::error_handler_t::ignore) == "\"123\xF1\xB0\x34\x35\x36\"");
114+
111115
CHECK(j.dump(-1, ' ', false, json::error_handler_t::replace) == "\"123\xEF\xBF\xBD\x34\x35\x36\"");
112116
CHECK(j.dump(-1, ' ', true, json::error_handler_t::replace) == "\"123\\ufffd456\"");
113117
}

0 commit comments

Comments
 (0)