Skip to content

Commit b0dcdb7

Browse files
committed
emmit value and type name when failing to deserialize json
1 parent afa7022 commit b0dcdb7

File tree

5 files changed

+91
-19
lines changed

5 files changed

+91
-19
lines changed

cpp/src/arrow/extension/fixed_shape_tensor.cc

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ Result<std::shared_ptr<DataType>> FixedShapeTensorType::Deserialize(
122122
std::vector<int64_t> shape;
123123
for (const auto& x : document["shape"].GetArray()) {
124124
if (!x.IsInt64()) {
125-
return Status::Invalid("shape must contain integers");
125+
return Status::Invalid("shape must contain integers, got ",
126+
internal::JsonTypeName(x));
126127
}
127128
shape.emplace_back(x.GetInt64());
128129
}
@@ -131,11 +132,13 @@ Result<std::shared_ptr<DataType>> FixedShapeTensorType::Deserialize(
131132
if (document.HasMember("permutation")) {
132133
const auto& json_permutation = document["permutation"];
133134
if (!json_permutation.IsArray()) {
134-
return Status::Invalid("permutation must be an array");
135+
return Status::Invalid("permutation must be an array, got ",
136+
internal::JsonTypeName(json_permutation));
135137
}
136138
for (const auto& x : json_permutation.GetArray()) {
137139
if (!x.IsInt64()) {
138-
return Status::Invalid("permutation must contain integers");
140+
return Status::Invalid("permutation must contain integers, got ",
141+
internal::JsonTypeName(x));
139142
}
140143
permutation.emplace_back(x.GetInt64());
141144
}
@@ -148,11 +151,13 @@ Result<std::shared_ptr<DataType>> FixedShapeTensorType::Deserialize(
148151
if (document.HasMember("dim_names")) {
149152
const auto& json_dim_names = document["dim_names"];
150153
if (!json_dim_names.IsArray()) {
151-
return Status::Invalid("dim_names must be an array");
154+
return Status::Invalid("dim_names must be an array, got ",
155+
internal::JsonTypeName(json_dim_names));
152156
}
153157
for (const auto& x : json_dim_names.GetArray()) {
154158
if (!x.IsString()) {
155-
return Status::Invalid("dim_names must contain strings");
159+
return Status::Invalid("dim_names must contain strings, got ",
160+
internal::JsonTypeName(x));
156161
}
157162
dim_names.emplace_back(x.GetString());
158163
}

cpp/src/arrow/extension/tensor_extension_array_test.cc

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -220,13 +220,18 @@ TEST_F(TestFixedShapeTensorType, MetadataSerializationRoundtrip) {
220220
R"({"shape":[3],"dim_names":["x","y"]})",
221221
"Invalid dim_names");
222222

223-
// Validate shape values must be integers
223+
// Validate shape values must be integers. Error message should include the
224+
// JSON type name of the offending value.
224225
CheckDeserializationRaises(ext_type_, storage_type, R"({"shape":[3.5,4]})",
225-
"shape must contain integers");
226+
"shape must contain integers, got Number");
226227
CheckDeserializationRaises(ext_type_, storage_type, R"({"shape":["3","4"]})",
227-
"shape must contain integers");
228+
"shape must contain integers, got String");
228229
CheckDeserializationRaises(ext_type_, storage_type, R"({"shape":[null]})",
229-
"shape must contain integers");
230+
"shape must contain integers, got Null");
231+
CheckDeserializationRaises(ext_type_, storage_type, R"({"shape":[true]})",
232+
"shape must contain integers, got True");
233+
CheckDeserializationRaises(ext_type_, storage_type, R"({"shape":[false]})",
234+
"shape must contain integers, got False");
230235

231236
// Validate shape values must be non-negative
232237
CheckDeserializationRaises(ext_type_, fixed_size_list(int64(), 1), R"({"shape":[-1]})",
@@ -239,10 +244,16 @@ TEST_F(TestFixedShapeTensorType, MetadataSerializationRoundtrip) {
239244
// Validate permutation member must be an array with integer values
240245
CheckDeserializationRaises(ext_type_, storage_type,
241246
R"({"shape":[3,4],"permutation":"invalid"})",
242-
"permutation must be an array");
247+
"permutation must be an array, got String");
248+
CheckDeserializationRaises(ext_type_, storage_type,
249+
R"({"shape":[3,4],"permutation":{"a":1}})",
250+
"permutation must be an array, got Object");
243251
CheckDeserializationRaises(ext_type_, storage_type,
244252
R"({"shape":[3,4],"permutation":[1.5,0.5]})",
245-
"permutation must contain integers");
253+
"permutation must contain integers, got Number");
254+
CheckDeserializationRaises(ext_type_, storage_type,
255+
R"({"shape":[3,4],"permutation":["a","b"]})",
256+
"permutation must contain integers, got String");
246257

247258
// Validate permutation values must be unique integers in [0, N-1]
248259
CheckDeserializationRaises(ext_type_, storage_type,
@@ -258,10 +269,13 @@ TEST_F(TestFixedShapeTensorType, MetadataSerializationRoundtrip) {
258269
// Validate dim_names member must be an array with string values
259270
CheckDeserializationRaises(ext_type_, storage_type,
260271
R"({"shape":[3,4],"dim_names":"invalid"})",
261-
"dim_names must be an array");
272+
"dim_names must be an array, got String");
262273
CheckDeserializationRaises(ext_type_, storage_type,
263274
R"({"shape":[3,4],"dim_names":[1,2]})",
264-
"dim_names must contain strings");
275+
"dim_names must contain strings, got Number");
276+
CheckDeserializationRaises(ext_type_, storage_type,
277+
R"({"shape":[3,4],"dim_names":[null,null]})",
278+
"dim_names must contain strings, got Null");
265279
}
266280

267281
TEST_F(TestFixedShapeTensorType, MakeValidatesShape) {
@@ -847,6 +861,32 @@ TEST_F(TestVariableShapeTensorType, MetadataSerializationRoundtrip) {
847861
"Invalid: permutation");
848862
CheckDeserializationRaises(ext_type_, storage_type, R"({"dim_names":["x","y"]})",
849863
"Invalid: dim_names");
864+
865+
// Validate permutation member must be an array with integer values. Error
866+
// message should include the JSON type name of the offending value.
867+
CheckDeserializationRaises(ext_type_, storage_type, R"({"permutation":"invalid"})",
868+
"permutation must be an array, got String");
869+
CheckDeserializationRaises(ext_type_, storage_type, R"({"permutation":[1.5,0.5,2.5]})",
870+
"permutation must contain integers, got Number");
871+
CheckDeserializationRaises(ext_type_, storage_type,
872+
R"({"permutation":[null,null,null]})",
873+
"permutation must contain integers, got Null");
874+
875+
// Validate dim_names member must be an array with string values
876+
CheckDeserializationRaises(ext_type_, storage_type, R"({"dim_names":"invalid"})",
877+
"dim_names must be an array, got String");
878+
CheckDeserializationRaises(ext_type_, storage_type, R"({"dim_names":[1,2,3]})",
879+
"dim_names must contain strings, got Number");
880+
881+
// Validate uniform_shape member must be an array with integer-or-null values
882+
CheckDeserializationRaises(ext_type_, storage_type, R"({"uniform_shape":"invalid"})",
883+
"uniform_shape must be an array, got String");
884+
CheckDeserializationRaises(ext_type_, storage_type,
885+
R"({"uniform_shape":[1.5,null,null]})",
886+
"uniform_shape must contain integers or nulls, got Number");
887+
CheckDeserializationRaises(ext_type_, storage_type,
888+
R"({"uniform_shape":["x",null,null]})",
889+
"uniform_shape must contain integers or nulls, got String");
850890
}
851891

852892
TEST_F(TestVariableShapeTensorType, RoundtripBatch) {

cpp/src/arrow/extension/tensor_internal.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,20 @@
3030

3131
namespace arrow::internal {
3232

33+
namespace {
34+
35+
// Names indexed by rapidjson::Type enum value:
36+
// kNullType=0, kFalseType=1, kTrueType=2, kObjectType=3,
37+
// kArrayType=4, kStringType=5, kNumberType=6.
38+
constexpr const char* kJsonTypeNames[] = {"Null", "False", "True", "Object",
39+
"Array", "String", "Number"};
40+
41+
} // namespace
42+
43+
const char* JsonTypeName(const ::arrow::rapidjson::Value& v) {
44+
return kJsonTypeNames[v.GetType()];
45+
}
46+
3347
Result<int64_t> ComputeShapeProduct(std::span<const int64_t> shape) {
3448
int64_t product = 1;
3549
for (const auto dim : shape) {

cpp/src/arrow/extension/tensor_internal.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,18 @@
2121
#include <span>
2222
#include <vector>
2323

24+
#include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep
2425
#include "arrow/result.h"
2526
#include "arrow/type_fwd.h"
2627

28+
#include <rapidjson/document.h>
29+
2730
namespace arrow::internal {
2831

32+
/// \brief Return the name of a RapidJSON value's type (e.g., "Null", "Array", "Number").
33+
ARROW_EXPORT
34+
const char* JsonTypeName(const ::arrow::rapidjson::Value& v);
35+
2936
/// \brief Compute the product of the given shape dimensions.
3037
///
3138
/// Returns Status::Invalid if the product would overflow int64_t.

cpp/src/arrow/extension/variable_shape_tensor.cc

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -159,12 +159,14 @@ Result<std::shared_ptr<DataType>> VariableShapeTensorType::Deserialize(
159159
if (document.HasMember("permutation")) {
160160
const auto& json_permutation = document["permutation"];
161161
if (!json_permutation.IsArray()) {
162-
return Status::Invalid("permutation must be an array");
162+
return Status::Invalid("permutation must be an array, got ",
163+
internal::JsonTypeName(json_permutation));
163164
}
164165
permutation.reserve(ndim);
165166
for (const auto& x : json_permutation.GetArray()) {
166167
if (!x.IsInt64()) {
167-
return Status::Invalid("permutation must contain integers");
168+
return Status::Invalid("permutation must contain integers, got ",
169+
internal::JsonTypeName(x));
168170
}
169171
permutation.emplace_back(x.GetInt64());
170172
}
@@ -174,12 +176,14 @@ Result<std::shared_ptr<DataType>> VariableShapeTensorType::Deserialize(
174176
if (document.HasMember("dim_names")) {
175177
const auto& json_dim_names = document["dim_names"];
176178
if (!json_dim_names.IsArray()) {
177-
return Status::Invalid("dim_names must be an array");
179+
return Status::Invalid("dim_names must be an array, got ",
180+
internal::JsonTypeName(json_dim_names));
178181
}
179182
dim_names.reserve(ndim);
180183
for (const auto& x : json_dim_names.GetArray()) {
181184
if (!x.IsString()) {
182-
return Status::Invalid("dim_names must contain strings");
185+
return Status::Invalid("dim_names must contain strings, got ",
186+
internal::JsonTypeName(x));
183187
}
184188
dim_names.emplace_back(x.GetString());
185189
}
@@ -189,7 +193,8 @@ Result<std::shared_ptr<DataType>> VariableShapeTensorType::Deserialize(
189193
if (document.HasMember("uniform_shape")) {
190194
const auto& json_uniform_shape = document["uniform_shape"];
191195
if (!json_uniform_shape.IsArray()) {
192-
return Status::Invalid("uniform_shape must be an array");
196+
return Status::Invalid("uniform_shape must be an array, got ",
197+
internal::JsonTypeName(json_uniform_shape));
193198
}
194199
uniform_shape.reserve(ndim);
195200
for (const auto& x : json_uniform_shape.GetArray()) {
@@ -198,7 +203,8 @@ Result<std::shared_ptr<DataType>> VariableShapeTensorType::Deserialize(
198203
} else if (x.IsInt64()) {
199204
uniform_shape.emplace_back(x.GetInt64());
200205
} else {
201-
return Status::Invalid("uniform_shape must contain integers or nulls");
206+
return Status::Invalid("uniform_shape must contain integers or nulls, got ",
207+
internal::JsonTypeName(x));
202208
}
203209
}
204210
}

0 commit comments

Comments
 (0)