@@ -552,9 +552,15 @@ class TestIpcRoundTrip : public ::testing::TestWithParam<MakeRecordBatch*>,
552552 ASSERT_OK (WriteRecordBatch (*batch, buffer_offset, mmap_.get (), &metadata_length,
553553 &body_length, options_));
554554
555- ASSERT_OK_AND_ASSIGN (std::unique_ptr<Message> message ,
555+ ASSERT_OK_AND_ASSIGN (std::unique_ptr<Message> message1 ,
556556 ReadMessage (0 , metadata_length, mmap_.get ()));
557- ASSERT_EQ (expected_version, message->metadata_version ());
557+ ASSERT_EQ (expected_version, message1->metadata_version ());
558+
559+ ASSERT_OK_AND_ASSIGN (auto message2,
560+ ReadMessage (0 , metadata_length, body_length, mmap_.get ()));
561+ ASSERT_EQ (expected_version, message2->metadata_version ());
562+
563+ ASSERT_TRUE (message1->Equals (*message2));
558564 }
559565};
560566
@@ -613,6 +619,27 @@ TEST(TestReadMessage, CorruptedSmallInput) {
613619 ASSERT_EQ (nullptr , message);
614620}
615621
622+ TEST (TestReadMessage, ReadBodyWithLength) {
623+ // Test the optimized ReadMessage(offset, meta_len, body_len, file) overload
624+ std::shared_ptr<RecordBatch> batch;
625+ ASSERT_OK (MakeIntRecordBatch (&batch));
626+
627+ ASSERT_OK_AND_ASSIGN (auto stream, io::BufferOutputStream::Create (0 ));
628+ int32_t metadata_length;
629+ int64_t body_length;
630+ ASSERT_OK (WriteRecordBatch (*batch, 0 , stream.get (), &metadata_length, &body_length,
631+ IpcWriteOptions::Defaults ()));
632+
633+ ASSERT_OK_AND_ASSIGN (auto buffer, stream->Finish ());
634+ io::BufferReader reader (buffer);
635+
636+ ASSERT_OK_AND_ASSIGN (auto message,
637+ ReadMessage (0 , metadata_length, body_length, &reader));
638+
639+ ASSERT_EQ (body_length, message->body_length ());
640+ ASSERT_TRUE (message->Verify ());
641+ }
642+
616643TEST (TestMetadata, GetMetadataVersion) {
617644 ASSERT_EQ (MetadataVersion::V1, ipc::internal::GetMetadataVersion (
618645 flatbuf::MetadataVersion::MetadataVersion_V1));
@@ -1094,7 +1121,7 @@ TEST_F(RecursionLimits, ReadLimit) {
10941121 &schema));
10951122
10961123 ASSERT_OK_AND_ASSIGN (std::unique_ptr<Message> message,
1097- ReadMessage (0 , metadata_length, mmap_.get ()));
1124+ ReadMessage (0 , metadata_length, body_length, mmap_.get ()));
10981125
10991126 io::BufferReader reader (message->body ());
11001127
@@ -1119,7 +1146,7 @@ TEST_F(RecursionLimits, StressLimit) {
11191146 &schema));
11201147
11211148 ASSERT_OK_AND_ASSIGN (std::unique_ptr<Message> message,
1122- ReadMessage (0 , metadata_length, mmap_.get ()));
1149+ ReadMessage (0 , metadata_length, body_length, mmap_.get ()));
11231150
11241151 DictionaryMemo empty_memo;
11251152
@@ -3018,25 +3045,56 @@ void GetReadRecordBatchReadRanges(
30183045
30193046 auto read_ranges = tracked->get_read_ranges ();
30203047
3021- // there are 3 read IOs before reading body:
3022- // 1) read magic and footer length IO
3023- // 2) read footer IO
3024- // 3) read record batch metadata IO
3025- EXPECT_EQ (read_ranges.size (), 3 + expected_body_read_lengths.size ());
30263048 const int32_t magic_size = static_cast <int >(strlen (ipc::internal::kArrowMagicBytes ));
30273049 // read magic and footer length IO
30283050 auto file_end_size = magic_size + sizeof (int32_t );
30293051 auto footer_length_offset = buffer->size () - file_end_size;
30303052 auto footer_length = bit_util::FromLittleEndian (
30313053 util::SafeLoadAs<int32_t >(buffer->data () + footer_length_offset));
3054+
3055+ // there are at least 2 read IOs before reading body:
3056+ // 1) read magic and footer length IO
3057+ // 2) footer IO
3058+ EXPECT_GE (read_ranges.size (), 2 );
3059+
3060+ // read magic and footer length IO
30323061 EXPECT_EQ (read_ranges[0 ].length , file_end_size);
30333062 // read footer IO
30343063 EXPECT_EQ (read_ranges[1 ].length , footer_length);
3035- // read record batch metadata. The exact size is tricky to determine but it doesn't
3036- // matter for this test and it should be smaller than the footer.
3037- EXPECT_LE (read_ranges[2 ].length , footer_length);
3038- for (uint32_t i = 0 ; i < expected_body_read_lengths.size (); i++) {
3039- EXPECT_EQ (read_ranges[3 + i].length , expected_body_read_lengths[i]);
3064+
3065+ if (included_fields.empty ()) {
3066+ // When no fields are explicitly included, the reader optimizes by
3067+ // reading metadata and the entire body in a single IO.
3068+ // Thus, there are exactly 3 read IOs in total:
3069+ // 1) magic and footer length
3070+ // 2) footer
3071+ // 3) record batch metadata + body
3072+ EXPECT_EQ (read_ranges.size (), 3 );
3073+
3074+ int64_t total_body = 0 ;
3075+ for (auto len : expected_body_read_lengths) total_body += len;
3076+
3077+ // In the optimized path (included_fields is empty), the 3rd read operation
3078+ // fetches both the message metadata (flatbuffer) and the entire message body
3079+ // in one contiguous block. Therefore, its length must at least exceed the
3080+ // total body length by the size of the metadata.
3081+ EXPECT_GT (read_ranges[2 ].length , total_body);
3082+ EXPECT_LE (read_ranges[2 ].length , total_body + footer_length);
3083+ } else {
3084+ // When fields are filtered, we see 3 initial reads followed by N body reads
3085+ // (one for each field/buffer range):
3086+ // 1) magic and footer length
3087+ // 2) footer
3088+ // 3) record batch metadata
3089+ // 4) individual body buffer reads
3090+ EXPECT_EQ (read_ranges.size (), 3 + expected_body_read_lengths.size ());
3091+
3092+ // read record batch metadata. The exact size is tricky to determine but it doesn't
3093+ // matter for this test and it should be smaller than the footer.
3094+ EXPECT_LE (read_ranges[2 ].length , footer_length);
3095+ for (uint32_t i = 0 ; i < expected_body_read_lengths.size (); i++) {
3096+ EXPECT_EQ (read_ranges[3 + i].length , expected_body_read_lengths[i]);
3097+ }
30403098 }
30413099}
30423100
@@ -3186,7 +3244,9 @@ class PreBufferingTest : public ::testing::TestWithParam<bool> {
31863244 metadata_reads++;
31873245 }
31883246 }
3189- ASSERT_EQ (metadata_reads, reader_->num_record_batches () - num_indices_pre_buffered);
3247+ // With ReadMessage optimization, non-prebuffered reads verify metadata and body
3248+ // in a single large read, so we no longer see small metadata-only reads here.
3249+ ASSERT_EQ (metadata_reads, 0 );
31903250 ASSERT_EQ (data_reads, reader_->num_record_batches ());
31913251 }
31923252
0 commit comments