Skip to content

Commit 803380c

Browse files
committed
Fix clippy lints in VarBin UTF-8 validation
The reworked `validate_utf8` tripped two clippy lints: - `cognitive_complexity`: the per-validity-variant `match` lived inside the `match_each_integer_ptype!` macro, multiplying its complexity across every integer arm. Resolve the validity mask once before the dtype dispatch and keep a single loop inside the macro. - `unnecessary_fallible_conversions`: replace `usize::try_from(..)` on the offset with the infallible `AsPrimitive::as_()` already used for the window bounds. Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 71aedb3 commit 803380c

1 file changed

Lines changed: 20 additions & 23 deletions

File tree

vortex-array/src/arrays/varbin/array.rs

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -246,39 +246,36 @@ impl VarBinData {
246246
// TODO(joe): update the created VarBin with this decompressed Array.
247247
let primitive_offsets = offsets.clone().execute::<PrimitiveArray>(&mut ctx)?;
248248

249+
// Array-backed validity is the only variant that needs an execution context: execute it into
250+
// a mask once. The constant variants resolve null-ness without one. Resolving this before
251+
// the per-type dispatch keeps the dtype loop simple.
252+
let mask = match validity {
253+
Validity::Array(_) => {
254+
Some(validity.execute_mask(primitive_offsets.len().saturating_sub(1), &mut ctx)?)
255+
}
256+
_ => None,
257+
};
258+
let all_invalid = matches!(validity, Validity::AllInvalid);
259+
249260
match_each_integer_ptype!(primitive_offsets.dtype().as_ptype(), |O| {
250261
let offsets_slice = primitive_offsets.as_slice::<O>();
251-
let windows = offsets_slice.windows(2).map(|o| (o[0].as_(), o[1].as_()));
252262

253-
let last_offset = usize::try_from(offsets_slice[offsets.len() - 1])
254-
.vortex_expect("must fit into usize");
263+
let last_offset: usize = offsets_slice[offsets_slice.len() - 1].as_();
255264
vortex_ensure!(
256265
last_offset <= bytes.len(),
257266
InvalidArgument: "Last offset {} exceeds bytes length {}",
258267
last_offset,
259268
bytes.len()
260269
);
261270

262-
match validity {
263-
// Array-backed validity is the only variant that needs an execution context:
264-
// execute it into a mask once and zip it with the offset windows, validating only
265-
// the valid (non-null) entries.
266-
Validity::Array(_) => {
267-
let mask =
268-
validity.execute_mask(offsets_slice.len().saturating_sub(1), &mut ctx)?;
269-
for (i, ((start, end), valid)) in windows.zip(mask.iter()).enumerate() {
270-
if valid {
271-
validate_at(i, start, end)?;
272-
}
273-
}
274-
}
275-
// Every entry is null, so there is nothing to validate.
276-
Validity::AllInvalid => {}
277-
// No nulls: validate every entry.
278-
Validity::NonNullable | Validity::AllValid => {
279-
for (i, (start, end)) in windows.enumerate() {
280-
validate_at(i, start, end)?;
281-
}
271+
for (i, (start, end)) in offsets_slice
272+
.windows(2)
273+
.map(|o| (o[0].as_(), o[1].as_()))
274+
.enumerate()
275+
{
276+
let valid = mask.as_ref().map_or(!all_invalid, |mask| mask.value(i));
277+
if valid {
278+
validate_at(i, start, end)?;
282279
}
283280
}
284281
});

0 commit comments

Comments
 (0)