diff --git a/datafusion/spark/src/function/hash/sha1.rs b/datafusion/spark/src/function/hash/sha1.rs index 3250c8ca848c4..605d2a9567c49 100644 --- a/datafusion/spark/src/function/hash/sha1.rs +++ b/datafusion/spark/src/function/hash/sha1.rs @@ -16,7 +16,6 @@ // under the License. use std::any::Any; -use std::fmt::Write; use std::sync::Arc; use arrow::array::{ArrayRef, StringArray}; @@ -95,11 +94,16 @@ impl ScalarUDFImpl for SparkSha1 { } } +/// Hex encoding lookup table for fast byte-to-hex conversion +const HEX_CHARS_LOWER: &[u8; 16] = b"0123456789abcdef"; + +#[inline] fn spark_sha1_digest(value: &[u8]) -> String { let result = Sha1::digest(value); let mut s = String::with_capacity(result.len() * 2); - for b in result.as_slice() { - write!(&mut s, "{b:02x}").unwrap(); + for &b in result.as_slice() { + s.push(HEX_CHARS_LOWER[(b >> 4) as usize] as char); + s.push(HEX_CHARS_LOWER[(b & 0x0f) as usize] as char); } s } diff --git a/datafusion/spark/src/function/math/hex.rs b/datafusion/spark/src/function/math/hex.rs index dbbea17db5bfa..ef62b08fb03d2 100644 --- a/datafusion/spark/src/function/math/hex.rs +++ b/datafusion/spark/src/function/math/hex.rs @@ -37,8 +37,6 @@ use datafusion_expr::{ Coercion, ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, TypeSignatureClass, Volatility, }; -use std::fmt::Write; - /// #[derive(Debug, PartialEq, Eq, Hash)] pub struct SparkHex { @@ -116,19 +114,22 @@ fn hex_int64(num: i64) -> String { format!("{num:X}") } -#[inline(always)] +/// Hex encoding lookup tables for fast byte-to-hex conversion +const HEX_CHARS_LOWER: &[u8; 16] = b"0123456789abcdef"; +const HEX_CHARS_UPPER: &[u8; 16] = b"0123456789ABCDEF"; + +#[inline] fn hex_encode>(data: T, lower_case: bool) -> String { - let mut s = String::with_capacity(data.as_ref().len() * 2); - if lower_case { - for b in data.as_ref() { - // Writing to a string never errors, so we can unwrap here. - write!(&mut s, "{b:02x}").unwrap(); - } + let bytes = data.as_ref(); + let mut s = String::with_capacity(bytes.len() * 2); + let hex_chars = if lower_case { + HEX_CHARS_LOWER } else { - for b in data.as_ref() { - // Writing to a string never errors, so we can unwrap here. - write!(&mut s, "{b:02X}").unwrap(); - } + HEX_CHARS_UPPER + }; + for &b in bytes { + s.push(hex_chars[(b >> 4) as usize] as char); + s.push(hex_chars[(b & 0x0f) as usize] as char); } s }