Remove unnecessary bit counting code from spark bit_count (#18841)

pepijnve · web-flow · commit d65fb860d2a6 · 2025-11-21T14:31:50.000Z
## Which issue does this PR close? - Followup to #18225 and PR #18322 ## Rationale for this change Spark's `bit_count` function always operators on 64-bit values, while the original `bit_count` implementation in `datafusion_spark` operated on the native size of the input value. In order to fix this a custom bit counting implementation was ported over from the Java Spark implementation. This isn't really necessary though. Widening signed integers to `i64` and then using `i64::count_ones` will get you the exact same result and is less obscure. ## What changes are included in this PR? Remove custom `bitcount` logic and use `i64::count_ones` instead. ## Are these changes tested? Covered by existing tests that were added for #18225 ## Are there any user-facing changes? No
diff --git a/datafusion/spark/src/function/bitwise/bit_count.rs b/datafusion/spark/src/function/bitwise/bit_count.rs
@@ -102,24 +102,25 @@ fn spark_bit_count(value_array: &[ArrayRef]) -> Result<ArrayRef> {
         DataType::Int8 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int8Type>()
-                .unary(|v| bit_count(v.into()));
+                .unary(|v| (v as i64).count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::Int16 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int16Type>()
-                .unary(|v| bit_count(v.into()));
+                .unary(|v| (v as i64).count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::Int32 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int32Type>()
-                .unary(|v| bit_count(v.into()));
+                .unary(|v| (v as i64).count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::Int64 => {
-            let result: Int32Array =
-                value_array.as_primitive::<Int64Type>().unary(bit_count);
+            let result: Int32Array = value_array
+                .as_primitive::<Int64Type>()
+                .unary(|v| v.count_ones() as i32);
             Ok(Arc::new(result))
         }
         DataType::UInt8 => {
@@ -155,20 +156,6 @@ fn spark_bit_count(value_array: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
-// Here’s the equivalent Rust implementation of the bitCount function (similar to Apache Spark's bitCount for LongType)
-// Spark: https://github.com/apache/spark/blob/ac717dd7aec665de578d7c6b0070e8fcdde3cea9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala#L243
-// Java impl: https://github.com/openjdk/jdk/blob/d226023643f90027a8980d161ec6d423887ae3ce/src/java.base/share/classes/java/lang/Long.java#L1584
-fn bit_count(i: i64) -> i32 {
-    let mut u = i as u64;
-    u = u - ((u >> 1) & 0x5555555555555555);
-    u = (u & 0x3333333333333333) + ((u >> 2) & 0x3333333333333333);
-    u = (u + (u >> 4)) & 0x0f0f0f0f0f0f0f0f;
-    u = u + (u >> 8);
-    u = u + (u >> 16);
-    u = u + (u >> 32);
-    (u as i32) & 0x7f
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;