Skip to content

Commit 72c050b

Browse files
committed
- Adding lazy initialization for offsets_ in read-only buffers.
- Moving to use flag for initialization of write buffer for efficiency.
1 parent efac4b1 commit 72c050b

3 files changed

Lines changed: 67 additions & 38 deletions

File tree

src/processing/byte_buffer.cpp

Lines changed: 44 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -42,26 +42,27 @@ ByteBuffer::ByteBuffer(
4242
tcb::span<const uint8_t> elements_span,
4343
size_t element_size)
4444
: elements_span_(elements_span),
45+
num_elements_(0),
4546
has_fixed_sized_elements_(true),
46-
element_size_(element_size) {
47-
InitializeFromSpan();
48-
}
47+
element_size_(element_size),
48+
is_initialized_from_span_(false) {}
4949

5050
// Constructor for read-only buffer with variable-size elements.
5151
ByteBuffer::ByteBuffer(
5252
tcb::span<const uint8_t> elements_span)
5353
: elements_span_(elements_span),
54+
num_elements_(0),
5455
has_fixed_sized_elements_(false),
55-
element_size_(0) {
56-
InitializeFromSpan();
57-
}
56+
element_size_(0),
57+
is_initialized_from_span_(false) {}
5858

5959
// Initializes `num_elements_` and `offsets_` from the span.
60-
void ByteBuffer::InitializeFromSpan() {
60+
void ByteBuffer::InitializeFromSpan() const {
6161
// No elements to index. Initialize with empty values.
6262
if (elements_span_.empty()) {
6363
num_elements_ = 0;
6464
offsets_.clear();
65+
is_initialized_from_span_ = true;
6566
return;
6667
}
6768

@@ -76,6 +77,7 @@ void ByteBuffer::InitializeFromSpan() {
7677
}
7778
num_elements_ = elements_span_.size() / element_size_;
7879
offsets_.clear();
80+
is_initialized_from_span_ = true;
7981
return;
8082
}
8183

@@ -96,9 +98,20 @@ void ByteBuffer::InitializeFromSpan() {
9698
}
9799
cursor += current_element_size;
98100
}
99-
100-
// Set the number of elements from parsed offsets.
101101
num_elements_ = offsets_.size();
102+
is_initialized_from_span_ = true;
103+
}
104+
105+
void ByteBuffer::EnsureInitializedFromSpan() const {
106+
// If the span is already initialized, skip it.
107+
if (is_initialized_from_span_) {
108+
return;
109+
}
110+
// If the write buffer is initialized, we don't need to initialize from the span.
111+
if (is_write_buffer_initialized_) {
112+
return;
113+
}
114+
InitializeFromSpan();
102115
}
103116

104117
size_t ByteBuffer::EstimateOffsetsReserveCountFromSample(tcb::span<const uint8_t> bytes) {
@@ -146,6 +159,7 @@ size_t ByteBuffer::EstimateOffsetsReserveCountFromSample(tcb::span<const uint8_t
146159
// -----------------------------------------------------------------------------
147160

148161
size_t ByteBuffer::CalculateOffsetOfElement(size_t position) const {
162+
EnsureInitializedFromSpan();
149163
if (position >= num_elements_) {
150164
throw InvalidInputException("Element position out of range during CalculateOffsetOfElement");
151165
}
@@ -156,6 +170,7 @@ size_t ByteBuffer::CalculateOffsetOfElement(size_t position) const {
156170
}
157171

158172
tcb::span<const uint8_t> ByteBuffer::GetElement(size_t position) const {
173+
EnsureInitializedFromSpan();
159174
if (position >= num_elements_) {
160175
throw InvalidInputException("Element position out of range during GetElement");
161176
}
@@ -213,6 +228,7 @@ void ByteBuffer::InitializeForWriteBuffer(size_t variable_size_reserved_bytes_hi
213228
const size_t fixed_size_total_bytes = num_elements_ * element_size_;
214229
write_buffer_.clear();
215230
write_buffer_.resize(fixed_size_total_bytes, static_cast<uint8_t>(0));
231+
is_write_buffer_initialized_ = true;
216232

217233
// offsets_ are not used for fixed-size elements.
218234
offsets_.clear();
@@ -231,13 +247,14 @@ void ByteBuffer::InitializeForWriteBuffer(size_t variable_size_reserved_bytes_hi
231247
const size_t variable_size_reserved_bytes = std::max(variable_size_reserved_bytes_hint, min_required_prefix_bytes);
232248
write_buffer_.clear();
233249
write_buffer_.reserve(variable_size_reserved_bytes);
250+
is_write_buffer_initialized_ = true;
234251

235252
// offsets_ is initialized so the vector is fully allocated and have random-ish access during writes.
236253
offsets_.clear();
237254
offsets_.resize(num_elements_, kUnsetVariableElementOffset);
238255

239256
// next_expected_sequential_position_ is initialized to 0 for sequential write checking.
240-
next_expected_sequential_position_ = 0;
257+
next_expected_write_position_ = 0;
241258

242259
// elements_span_ is re-bound to the write buffer.
243260
RebindSpanToWriteBuffer();
@@ -248,16 +265,16 @@ void ByteBuffer::InitializeForWriteBuffer(size_t variable_size_reserved_bytes_hi
248265
// -----------------------------------------------------------------------------
249266

250267
void ByteBuffer::SetElement(size_t position, tcb::span<const uint8_t> element) {
251-
if (write_buffer_finalized_) {
268+
if (is_write_buffer_finalized_) {
252269
throw InvalidInputException("Cannot SetElement: write buffer has been finalized");
253270
}
254271

255272
if (position >= num_elements_) {
256273
throw InvalidInputException("Element position out of range during SetElement");
257274
}
258275

259-
if (write_buffer_.empty() && write_buffer_.capacity() == 0) {
260-
throw InvalidInputException("Cannot SetElement: write buffer is not initialized");
276+
if (!is_write_buffer_initialized_) {
277+
throw InvalidInputException("Cannot SetElement: write buffer is not initialized.");
261278
}
262279

263280
// For fixed-size elements, we write the element to buffer at the offset. No need to re-bind the span.
@@ -285,37 +302,37 @@ void ByteBuffer::SetElement(size_t position, tcb::span<const uint8_t> element) {
285302
append_u32_le(write_buffer_, static_cast<uint32_t>(element.size()));
286303
write_buffer_.insert(write_buffer_.end(), element.begin(), element.end()); // Appends at the end of the buffer.
287304

288-
// Update next_expected_sequential_position_ for sequential write checking.
289-
if (next_expected_sequential_position_ != kUnsetVariableElementOffset) {
290-
if (position == next_expected_sequential_position_) {
291-
next_expected_sequential_position_ += 1;
305+
// Update next_expected_write_position_ for sequential write checking.
306+
if (next_expected_write_position_ != kUnsetVariableElementOffset) {
307+
if (position == next_expected_write_position_) {
308+
next_expected_write_position_ += 1;
292309
} else {
293-
next_expected_sequential_position_ = kUnsetVariableElementOffset;
310+
next_expected_write_position_ = kUnsetVariableElementOffset;
294311
}
295312
}
296313

297314
RebindSpanToWriteBuffer();
298315
}
299316

300317
std::vector<uint8_t> ByteBuffer::FinalizeAndTakeBuffer() {
301-
if (write_buffer_finalized_) {
318+
if (is_write_buffer_finalized_) {
302319
throw InvalidInputException("FinalizeAndTakeBuffer: write buffer has already been finalized");
303320
}
304321

305-
if (write_buffer_.empty() && write_buffer_.capacity() == 0) {
322+
if (!is_write_buffer_initialized_) {
306323
throw InvalidInputException("FinalizeAndTakeBuffer: write buffer is not initialized");
307324
}
308325

309326
// Fixed-size: write_buffer_ is always in element order, transfer ownership directly.
310327
if (has_fixed_sized_elements_) {
311-
write_buffer_finalized_ = true;
328+
is_write_buffer_finalized_ = true;
312329
return std::move(write_buffer_);
313330
}
314331

315332
// For variable-size when all elements were written exactly once and in sequential order,
316333
// we can skip out-of-order or fragmentation checks. This is the fast path.
317334
// This is the most common behavior when writing elements in single threaded mode.
318-
if (next_expected_sequential_position_ == num_elements_) {
335+
if (next_expected_write_position_ == num_elements_) {
319336
if (num_elements_ > 0) {
320337
const size_t last_element_offset = offsets_[num_elements_ - 1];
321338
const size_t last_element_size = ReadSizeAt(elements_span_, last_element_offset);
@@ -324,12 +341,12 @@ std::vector<uint8_t> ByteBuffer::FinalizeAndTakeBuffer() {
324341
throw InvalidInputException("FinalizeAndTakeBuffer: trailing bytes detected beyond last element");
325342
}
326343
}
327-
write_buffer_finalized_ = true;
344+
is_write_buffer_finalized_ = true;
328345
return std::move(write_buffer_);
329346
}
330347

331348
// For variable-size, when elements are written out of order, assume the buffer is fragmented and potentially with orphaned bytes
332-
// The buffer is validated and rebuilt into a compact buffer in one pass.
349+
// The buffer is validated and rebuilt into an ordered compact buffer in one pass.
333350
std::vector<uint8_t> result;
334351
result.reserve(write_buffer_.size());
335352
for (size_t i = 0; i < num_elements_; ++i) {
@@ -356,8 +373,9 @@ std::vector<uint8_t> ByteBuffer::FinalizeAndTakeBuffer() {
356373
// Defrag path returns a new buffer; release the original fragmented write buffer.
357374
write_buffer_.clear();
358375
write_buffer_.shrink_to_fit();
359-
360-
write_buffer_finalized_ = true;
376+
is_write_buffer_initialized_ = false;
377+
is_write_buffer_finalized_ = true;
378+
361379
return result;
362380
}
363381

src/processing/byte_buffer.h

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,29 +66,32 @@ class ByteBuffer {
6666

6767
// Variables for span elements reading
6868
tcb::span<const uint8_t> elements_span_;
69-
size_t num_elements_;
69+
mutable size_t num_elements_;
7070
bool has_fixed_sized_elements_;
7171

7272
// Variables for determining offset of elements.
73-
size_t element_size_; // for fixed-size elements
74-
std::vector<size_t> offsets_; // for variable-size elements
73+
size_t element_size_; // for fixed-size elements
74+
mutable std::vector<size_t> offsets_; // for variable-size elements
7575

7676
// Variables for write buffer.
7777
std::vector<uint8_t> write_buffer_;
78-
bool write_buffer_finalized_ = false;
7978

8079
// Variable for sequential variable-size writes.
8180
// Tracks next expected position for sequential variable-size writes.
8281
// Value is invalidated to kUnsetVariableElementOffset once order is violated.
83-
size_t next_expected_sequential_position_ = 0;
82+
size_t next_expected_write_position_ = 0;
8483

8584
private:
86-
// Initialization methods for read-only buffer
87-
void InitializeFromSpan();
85+
// Initialization methods and flags for read-only buffer
86+
void InitializeFromSpan() const;
87+
void EnsureInitializedFromSpan() const;
88+
mutable bool is_initialized_from_span_ = false;
8889

89-
// Initialization methods for write buffer
90+
// Initialization methods and flags for write buffer
9091
void InitializeForWriteBuffer(size_t variable_size_reserved_bytes_hint);
9192
void RebindSpanToWriteBuffer();
93+
bool is_write_buffer_initialized_ = false;
94+
bool is_write_buffer_finalized_ = false;
9295
};
9396

9497
inline constexpr size_t kUnsetVariableElementOffset = std::numeric_limits<size_t>::max();

src/processing/byte_buffer_test.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ std::vector<uint8_t> MakePayload(size_t size, uint8_t seed) {
8282
TEST(ByteBufferTest, ConstructFixedSize_ValidBuffer_InitializesExpectedState) {
8383
std::vector<uint8_t> bytes = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06};
8484
ByteBufferTestProxy buffer(tcb::span<const uint8_t>(bytes), 2);
85+
// Trigger lazy span initialization.
86+
EXPECT_NO_THROW((void)buffer.GetElement(0));
8587
ExpectCommonState(buffer, 3u, true, 2u);
8688
EXPECT_TRUE(buffer.GetOffsets().empty());
8789
}
@@ -107,12 +109,14 @@ TEST(ByteBufferTest, GetElement_FixedSize_ReturnsExpectedSlices) {
107109

108110
TEST(ByteBufferTest, ConstructFixedSize_ZeroElementSize_Throws) {
109111
std::vector<uint8_t> bytes = {0x01, 0x02, 0x03, 0x04};
110-
EXPECT_THROW((void)ByteBufferTestProxy(tcb::span<const uint8_t>(bytes), 0), InvalidInputException);
112+
ByteBufferTestProxy buffer(tcb::span<const uint8_t>(bytes), 0);
113+
EXPECT_THROW((void)buffer.GetElement(0), InvalidInputException);
111114
}
112115

113116
TEST(ByteBufferTest, ConstructFixedSize_NonDivisibleSize_Throws) {
114117
std::vector<uint8_t> bytes = {0x01, 0x02, 0x03};
115-
EXPECT_THROW((void)ByteBufferTestProxy(tcb::span<const uint8_t>(bytes), 2), InvalidInputException);
118+
ByteBufferTestProxy buffer(tcb::span<const uint8_t>(bytes), 2);
119+
EXPECT_THROW((void)buffer.GetElement(0), InvalidInputException);
116120
}
117121

118122
TEST(ByteBufferTest, ConstructVariableSize_ValidEncodedBuffer_InitializesExpectedState) {
@@ -122,6 +126,8 @@ TEST(ByteBufferTest, ConstructVariableSize_ValidEncodedBuffer_InitializesExpecte
122126
0x07, 0x00, 0x00, 0x00, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
123127
};
124128
ByteBufferTestProxy buffer{tcb::span<const uint8_t>(bytes)};
129+
// Trigger lazy variable-size index parsing.
130+
EXPECT_NO_THROW((void)buffer.GetElement(0));
125131
ExpectCommonState(buffer, 2u, false, 0u);
126132
ASSERT_EQ(buffer.GetOffsets().size(), 2u);
127133
EXPECT_EQ(buffer.GetOffsets()[0], 0u);
@@ -603,13 +609,15 @@ TEST(ByteBufferTest, ConstructVariableSize_EmptyBuffer_InitializesEmptyState) {
603609

604610
TEST(ByteBufferTest, ConstructVariableSize_TruncatedLengthPrefix_Throws) {
605611
std::vector<uint8_t> bytes = {0x01, 0x00, 0x00}; // only 3 bytes
606-
EXPECT_THROW((void)ByteBufferTestProxy(tcb::span<const uint8_t>(bytes)), InvalidInputException);
612+
ByteBufferTestProxy buffer{tcb::span<const uint8_t>(bytes)};
613+
EXPECT_THROW((void)buffer.GetElement(0), InvalidInputException);
607614
}
608615

609616
TEST(ByteBufferTest, ConstructVariableSize_TruncatedPayload_Throws) {
610617
// Declares payload length 5, but provides only 2 bytes.
611618
std::vector<uint8_t> bytes = {
612619
0x05, 0x00, 0x00, 0x00, 0xAA, 0xBB
613620
};
614-
EXPECT_THROW((void)ByteBufferTestProxy(tcb::span<const uint8_t>(bytes)), InvalidInputException);
621+
ByteBufferTestProxy buffer{tcb::span<const uint8_t>(bytes)};
622+
EXPECT_THROW((void)buffer.GetElement(0), InvalidInputException);
615623
}

0 commit comments

Comments
 (0)