@@ -98,9 +98,11 @@ using SpreadBufferUint = std::conditional_t<
9898// / stop if it finds a byte aligned value start.
9999template <int kPackedBitWidth , bool kIsProlog , typename Uint>
100100int unpack_exact (const uint8_t * in, Uint* out, int batch_size, int bit_offset) {
101+ static_assert (kPackedBitWidth > 0 );
102+
101103 // For the epilog we adapt the max spread since better alignment give shorter spreads
102- ARROW_DCHECK (kIsProlog || bit_offset == 0 );
103- ARROW_DCHECK (bit_offset >= 0 && bit_offset < 8 );
104+ ARROW_CHECK (kIsProlog || bit_offset == 0 );
105+ ARROW_CHECK (bit_offset >= 0 && bit_offset < 8 );
104106 constexpr int kMaxSpreadBytes = kIsProlog ? PackedMaxSpreadBytes (kPackedBitWidth )
105107 : PackedMaxSpreadBytes (kPackedBitWidth , 0 );
106108 using buffer_uint = SpreadBufferUint<kMaxSpreadBytes >;
@@ -112,16 +114,17 @@ int unpack_exact(const uint8_t* in, Uint* out, int batch_size, int bit_offset) {
112114 constexpr buffer_uint kLowMask =
113115 bit_util::LeastSignificantBitMask<buffer_uint, true >(kPackedBitWidth );
114116
115- ARROW_DCHECK_GE (bit_offset, 0 );
116- ARROW_DCHECK_LE (bit_offset, 8 );
117+ ARROW_CHECK_GE (bit_offset, 0 );
118+ ARROW_CHECK_LE (bit_offset, 8 );
117119
118120 // Looping over values one by one
119121 const int start_bit_term = batch_size * kPackedBitWidth + bit_offset;
120122 int start_bit = bit_offset;
121123 while ((start_bit < start_bit_term) && (!kIsProlog || (start_bit % 8 != 0 ))) {
122124 const int start_byte = start_bit / 8 ;
123125 const int spread_bytes = ((start_bit + kPackedBitWidth - 1 ) / 8 ) - start_byte + 1 ;
124- ARROW_COMPILER_ASSUME (spread_bytes <= kMaxSpreadBytes );
126+ ARROW_CHECK_LE (spread_bytes, kMaxSpreadBytes );
127+ // ARROW_COMPILER_ASSUME(spread_bytes <= kMaxSpreadBytes);
125128
126129 // Reading the bytes for the current value.
127130 // Must be careful not to read out of input bounds.
@@ -130,8 +133,10 @@ int unpack_exact(const uint8_t* in, Uint* out, int batch_size, int bit_offset) {
130133 // We read the max possible bytes in the first pass and handle the rest after.
131134 // Even though the worst spread does not happen on all iterations we can still read
132135 // all bytes because we will mask them.
136+ // ARROW_LOG(INFO) << " > reading " << std::min(kBufferSize, spread_bytes) << " bytes from " <<reinterpret_cast<const void*>(in + start_byte);
133137 std::memcpy (&buffer, in + start_byte, std::min (kBufferSize , spread_bytes));
134138 } else {
139+ // ARROW_LOG(INFO) << " > reading " << spread_bytes << " bytes from " <<reinterpret_cast<const void*>(in + start_byte);
135140 std::memcpy (&buffer, in + start_byte, spread_bytes);
136141 }
137142
@@ -144,6 +149,7 @@ int unpack_exact(const uint8_t* in, Uint* out, int batch_size, int bit_offset) {
144149 if constexpr (kLarge ) {
145150 // The oversized bytes do not happen at all iterations
146151 if (spread_bytes > kBufferSize ) {
152+ // ARROW_LOG(INFO) << " > reading " << spread_bytes - kBufferSize << " bytes from " <<reinterpret_cast<const void*>(in + start_byte + kBufferSize);
147153 std::memcpy (&buffer, in + start_byte + kBufferSize , spread_bytes - kBufferSize );
148154 buffer = bit_util::FromLittleEndian (buffer);
149155 buffer <<= 8 * kBufferSize - bit_offset;
@@ -156,7 +162,7 @@ int unpack_exact(const uint8_t* in, Uint* out, int batch_size, int bit_offset) {
156162 start_bit += kPackedBitWidth ;
157163 }
158164
159- ARROW_DCHECK ((start_bit - bit_offset) % kPackedBitWidth == 0 );
165+ ARROW_CHECK ((start_bit - bit_offset) % kPackedBitWidth == 0 );
160166 return (start_bit - bit_offset) / kPackedBitWidth ;
161167}
162168
@@ -185,16 +191,23 @@ void unpack_width(const uint8_t* in, UnpackedUInt* out, int batch_size, int bit_
185191 bit_util::BytesForBits (batch_size * kPackedBitWidth + bit_offset));
186192 // If specified, max_read_bytes must be greater that the bytes needed to extract the
187193 // number of desired values.
188- ARROW_DCHECK (max_read_bytes < 0 || bytes_batch <= max_read_bytes);
194+ ARROW_CHECK (max_read_bytes < 0 || bytes_batch <= max_read_bytes);
189195 const uint8_t * in_end = in + (max_read_bytes >= 0 ? max_read_bytes : bytes_batch);
190196
197+ ARROW_LOG (INFO) << " ... unpack: width=" << kPackedBitWidth
198+ << " , in=" <<reinterpret_cast <const void *>(in)
199+ << " , batch_size = " << batch_size << " , bit_offset = " << bit_offset
200+ << " , max_read_bytes=" << max_read_bytes
201+ << " (reading up to" << reinterpret_cast <const void *>(in_end - 1 )
202+ << " )" ;
203+
191204 // In case of misalignment, we need to run the prolog until aligned.
192205 int extracted = unpack_exact<kPackedBitWidth , true >(in, out, batch_size, bit_offset);
193206 // We either extracted everything or found a alignment
194207 const int start_bit = extracted * kPackedBitWidth + bit_offset;
195- ARROW_DCHECK ((extracted == batch_size) || ((start_bit) % 8 == 0 ));
208+ ARROW_CHECK ((extracted == batch_size) || ((start_bit) % 8 == 0 ));
196209 batch_size -= extracted;
197- ARROW_DCHECK_GE (batch_size, 0 );
210+ ARROW_CHECK_GE (batch_size, 0 );
198211 in += start_bit / 8 ;
199212 out += extracted;
200213
@@ -221,14 +234,22 @@ void unpack_width(const uint8_t* in, UnpackedUInt* out, int batch_size, int bit_
221234 // Performance check making sure we ran the kernel loop as much as possible:
222235 // Either we ran out because we could not pack enough values, or because we would
223236 // overread.
224- ARROW_DCHECK ((batch_size < kValuesUnpacked ) || (in_end - in) < kBytesRead );
237+ ARROW_CHECK ((batch_size < kValuesUnpacked ) || (in_end - in) < kBytesRead );
225238 }
226239
227240 // Running the epilog for the remaining values that don't fit in a kernel
228- ARROW_DCHECK_GE (batch_size, 0 );
241+ const auto epilog_bytes = bit_util::BytesForBits (batch_size * kPackedBitWidth );
242+ ARROW_LOG (INFO) << " > calling unpack_exact: "
243+ << " in=" << reinterpret_cast <const void *>(in)
244+ << " , batch_size=" << batch_size
245+ << " , epilog_bytes=" << epilog_bytes
246+ << " (expecting to read up to " << reinterpret_cast <const void *>(in + (epilog_bytes - 1 ))
247+ << " )" ;
248+ ARROW_CHECK_GE (batch_size, 0 );
229249 ARROW_COMPILER_ASSUME (batch_size >= 0 );
230250 unpack_exact<kPackedBitWidth , false >(in, out, batch_size, /* bit_offset= */ 0 );
231251 }
252+ ARROW_LOG (INFO) << " > /unpack finished" ;
232253 }
233254}
234255
@@ -628,6 +649,6 @@ static void unpack_jump(const uint8_t* in, UnpackedUint* out, const UnpackOption
628649 opt.max_read_bytes );
629650 }
630651 }
631- ARROW_DCHECK (false ) << " Unsupported num_bits " << opt.bit_width ;
652+ ARROW_CHECK (false ) << " Unsupported num_bits " << opt.bit_width ;
632653}
633654} // namespace arrow::internal::bpacking
0 commit comments