Skip to content

Commit 06ac2f8

Browse files
committed
bit-wise ops
Signed-off-by: Adam Gutglick <adam@spiraldb.com>
1 parent 341039a commit 06ac2f8

5 files changed

Lines changed: 80 additions & 21 deletions

File tree

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-buffer/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ warn-copy = ["dep:tracing"]
2424

2525
[dependencies]
2626
arrow-buffer = { workspace = true }
27-
bitvec = { workspace = true }
2827
bytes = { workspace = true }
2928
itertools = { workspace = true }
3029
memmap2 = { workspace = true, optional = true }

vortex-buffer/src/bit/buf.rs

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,10 +203,11 @@ impl BitBuffer {
203203
unsafe { buffer.push_unchecked(packed) }
204204
}
205205

206-
buffer.truncate(len.div_ceil(8));
206+
let mut bytes = buffer.into_byte_buffer();
207+
bytes.truncate(len.div_ceil(8));
207208

208209
Self {
209-
buffer: buffer.freeze().into_byte_buffer(),
210+
buffer: bytes.freeze(),
210211
offset: 0,
211212
len,
212213
}
@@ -287,7 +288,23 @@ impl BitBuffer {
287288
assert!(end <= self.len);
288289
let len = end - start;
289290

290-
Self::new_with_offset(self.buffer.clone(), len, self.offset + start)
291+
let offset = self.offset + start;
292+
let byte_offset = offset / 8;
293+
let bit_offset = offset % 8;
294+
295+
// Trim whole bytes off the front directly rather than going through `new_with_offset`,
296+
// which would slice (and re-clone) the clone we'd have to pass it.
297+
let buffer = if byte_offset != 0 {
298+
self.buffer.slice_unaligned(byte_offset..)
299+
} else {
300+
self.buffer.clone().aligned(Alignment::none())
301+
};
302+
303+
Self {
304+
buffer,
305+
offset: bit_offset,
306+
len,
307+
}
291308
}
292309

293310
/// Slice any full bytes from the buffer, leaving the offset < 8.

vortex-buffer/src/bit/buf_mut.rs

Lines changed: 59 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
use std::ops::Not;
55

6-
use bitvec::view::BitView;
6+
use arrow_buffer::bit_mask::set_bits;
77

88
use crate::BitBuffer;
99
use crate::BufferMut;
@@ -487,24 +487,32 @@ impl BitBufferMut {
487487
let end_bit_pos = start_bit_pos + bit_len;
488488
let required_bytes = end_bit_pos.div_ceil(8);
489489

490+
// `set_bits` below ORs into the destination, so stale bits past `len` in the existing
491+
// bytes (e.g. after `truncate` or `clear`) must be cleared first. Bytes appended below
492+
// are already zeroed.
493+
let existing_bits = self.buffer.len() * 8;
494+
if existing_bits > start_bit_pos {
495+
fill_bits(
496+
self.buffer.as_mut_slice(),
497+
start_bit_pos,
498+
existing_bits.min(end_bit_pos),
499+
false,
500+
);
501+
}
502+
490503
// Ensure buffer has enough bytes
491504
if required_bytes > self.buffer.len() {
492505
self.buffer.push_n(0x00, required_bytes - self.buffer.len());
493506
}
494507

495-
// Use bitvec for efficient bit copying
496-
let self_slice = self
497-
.buffer
498-
.as_mut_slice()
499-
.view_bits_mut::<bitvec::prelude::Lsb0>();
500-
let other_slice = buffer
501-
.inner()
502-
.as_slice()
503-
.view_bits::<bitvec::prelude::Lsb0>();
504-
505-
// Copy from source buffer (accounting for its offset) to destination (accounting for our offset + len)
506-
let source_range = buffer.offset()..buffer.offset() + bit_len;
507-
self_slice[start_bit_pos..end_bit_pos].copy_from_bitslice(&other_slice[source_range]);
508+
// Word-wise bit copy that handles mismatched source/destination bit offsets.
509+
set_bits(
510+
self.buffer.as_mut_slice(),
511+
buffer.inner().as_slice(),
512+
start_bit_pos,
513+
buffer.offset(),
514+
bit_len,
515+
);
508516

509517
self.len += bit_len;
510518
}
@@ -879,7 +887,43 @@ mod tests {
879887
assert!(frozen.value(7));
880888
}
881889

882-
#[cfg_attr(miri, ignore)] // bitvec crate uses a ptr cast that Miri doesn't support
890+
#[test]
891+
fn test_append_buffer_after_truncate() {
892+
// Truncating leaves stale set bits in the last partial byte; an append after that
893+
// must overwrite them rather than OR into them.
894+
let mut buf = BitBufferMut::new_set(16);
895+
buf.truncate(3);
896+
buf.append_buffer(&crate::BitBuffer::new_unset(8));
897+
898+
let frozen = buf.freeze();
899+
assert_eq!(frozen.len(), 11);
900+
for i in 0..3 {
901+
assert!(frozen.value(i), "bit {i} should be set");
902+
}
903+
for i in 3..11 {
904+
assert!(!frozen.value(i), "bit {i} should be unset");
905+
}
906+
}
907+
908+
#[test]
909+
fn test_append_buffer_misaligned_long() {
910+
// Force mismatched source/destination bit offsets across many words.
911+
let source = crate::BitBuffer::from_iter((0..301).map(|i| i % 3 == 0));
912+
let source = source.slice(5..301);
913+
914+
let mut dest = BitBufferMut::with_capacity(512);
915+
dest.append_n(true, 3);
916+
dest.append_buffer(&source);
917+
918+
assert_eq!(dest.len(), 3 + source.len());
919+
for i in 0..3 {
920+
assert!(dest.value(i), "prefix bit {i}");
921+
}
922+
for i in 0..source.len() {
923+
assert_eq!(dest.value(3 + i), source.value(i), "bit {i}");
924+
}
925+
}
926+
883927
#[test]
884928
fn test_append_buffer_with_offsets() {
885929
// Create source buffer with offset

vortex-buffer/src/string.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ impl BufferString {
2525

2626
/// Creates an empty `BufferString`.
2727
pub fn empty() -> Self {
28-
Self(ByteBuffer::from(vec![]))
28+
Self(ByteBuffer::empty())
2929
}
3030

3131
/// Return a view of the contents of BufferString as an immutable `&str`.

0 commit comments

Comments
 (0)