Skip to content

Commit 3e6c5ba

Browse files
committed
work
1 parent 6321220 commit 3e6c5ba

1 file changed

Lines changed: 14 additions & 1 deletion

File tree

cpp/src/arrow/json/reader.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,14 +182,27 @@ class ChunkingTransformer {
182182
}
183183

184184
private:
185+
// Strip UTF-8 BOM from the beginning of the first buffer if present
186+
static std::shared_ptr<Buffer> StripBOM(std::shared_ptr<Buffer> buffer) {
187+
// UTF-8 BOM is 0xEF 0xBB 0xBF
188+
if (buffer && buffer->size() >= 3) {
189+
const uint8_t* data = buffer->data();
190+
if (data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF) {
191+
return SliceBuffer(buffer, 3);
192+
}
193+
}
194+
return buffer;
195+
}
196+
185197
Result<TransformFlow<ChunkedBlock>> operator()(std::shared_ptr<Buffer> next_buffer) {
186198
if (!buffer_) {
187199
if (ARROW_PREDICT_TRUE(!next_buffer)) {
188200
DCHECK_EQ(partial_, nullptr) << "Logic error: non-null partial with null buffer";
189201
return TransformFinish();
190202
}
191203
partial_ = std::make_shared<Buffer>("");
192-
buffer_ = std::move(next_buffer);
204+
// Strip BOM from the first buffer
205+
buffer_ = StripBOM(std::move(next_buffer));
193206
return TransformSkip();
194207
}
195208
DCHECK_NE(partial_, nullptr);

0 commit comments

Comments
 (0)