Skip to content

Commit d65fb86

Browse files
authored
Remove unnecessary bit counting code from spark bit_count (#18841)
## Which issue does this PR close? - Followup to #18225 and PR #18322 ## Rationale for this change Spark's `bit_count` function always operators on 64-bit values, while the original `bit_count` implementation in `datafusion_spark` operated on the native size of the input value. In order to fix this a custom bit counting implementation was ported over from the Java Spark implementation. This isn't really necessary though. Widening signed integers to `i64` and then using `i64::count_ones` will get you the exact same result and is less obscure. ## What changes are included in this PR? Remove custom `bitcount` logic and use `i64::count_ones` instead. ## Are these changes tested? Covered by existing tests that were added for #18225 ## Are there any user-facing changes? No
1 parent 195bd5c commit d65fb86

File tree

1 file changed

+6
-19
lines changed

1 file changed

+6
-19
lines changed

datafusion/spark/src/function/bitwise/bit_count.rs

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -102,24 +102,25 @@ fn spark_bit_count(value_array: &[ArrayRef]) -> Result<ArrayRef> {
102102
DataType::Int8 => {
103103
let result: Int32Array = value_array
104104
.as_primitive::<Int8Type>()
105-
.unary(|v| bit_count(v.into()));
105+
.unary(|v| (v as i64).count_ones() as i32);
106106
Ok(Arc::new(result))
107107
}
108108
DataType::Int16 => {
109109
let result: Int32Array = value_array
110110
.as_primitive::<Int16Type>()
111-
.unary(|v| bit_count(v.into()));
111+
.unary(|v| (v as i64).count_ones() as i32);
112112
Ok(Arc::new(result))
113113
}
114114
DataType::Int32 => {
115115
let result: Int32Array = value_array
116116
.as_primitive::<Int32Type>()
117-
.unary(|v| bit_count(v.into()));
117+
.unary(|v| (v as i64).count_ones() as i32);
118118
Ok(Arc::new(result))
119119
}
120120
DataType::Int64 => {
121-
let result: Int32Array =
122-
value_array.as_primitive::<Int64Type>().unary(bit_count);
121+
let result: Int32Array = value_array
122+
.as_primitive::<Int64Type>()
123+
.unary(|v| v.count_ones() as i32);
123124
Ok(Arc::new(result))
124125
}
125126
DataType::UInt8 => {
@@ -155,20 +156,6 @@ fn spark_bit_count(value_array: &[ArrayRef]) -> Result<ArrayRef> {
155156
}
156157
}
157158

158-
// Here’s the equivalent Rust implementation of the bitCount function (similar to Apache Spark's bitCount for LongType)
159-
// Spark: https://github.com/apache/spark/blob/ac717dd7aec665de578d7c6b0070e8fcdde3cea9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala#L243
160-
// Java impl: https://github.com/openjdk/jdk/blob/d226023643f90027a8980d161ec6d423887ae3ce/src/java.base/share/classes/java/lang/Long.java#L1584
161-
fn bit_count(i: i64) -> i32 {
162-
let mut u = i as u64;
163-
u = u - ((u >> 1) & 0x5555555555555555);
164-
u = (u & 0x3333333333333333) + ((u >> 2) & 0x3333333333333333);
165-
u = (u + (u >> 4)) & 0x0f0f0f0f0f0f0f0f;
166-
u = u + (u >> 8);
167-
u = u + (u >> 16);
168-
u = u + (u >> 32);
169-
(u as i32) & 0x7f
170-
}
171-
172159
#[cfg(test)]
173160
mod tests {
174161
use super::*;

0 commit comments

Comments
 (0)