@@ -37,7 +37,43 @@ use datafusion_expr::{
3737 Coercion , ColumnarValue , ScalarFunctionArgs , ScalarUDFImpl , Signature , TypeSignature ,
3838 TypeSignatureClass , Volatility ,
3939} ;
40- use std:: fmt:: Write ;
40+ /// Lookup table for uppercase hex encoding (0-255 -> "00"-"FF")
41+ const HEX_UPPER : & [ u8 ; 512 ] = b"\
42+ 000102030405060708090A0B0C0D0E0F\
43+ 101112131415161718191A1B1C1D1E1F\
44+ 202122232425262728292A2B2C2D2E2F\
45+ 303132333435363738393A3B3C3D3E3F\
46+ 404142434445464748494A4B4C4D4E4F\
47+ 505152535455565758595A5B5C5D5E5F\
48+ 606162636465666768696A6B6C6D6E6F\
49+ 707172737475767778797A7B7C7D7E7F\
50+ 808182838485868788898A8B8C8D8E8F\
51+ 909192939495969798999A9B9C9D9E9F\
52+ A0A1A2A3A4A5A6A7A8A9AAABACADAEAF\
53+ B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF\
54+ C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF\
55+ D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF\
56+ E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF\
57+ F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF";
58+
59+ /// Lookup table for lowercase hex encoding (0-255 -> "00"-"ff")
60+ const HEX_LOWER : & [ u8 ; 512 ] = b"\
61+ 000102030405060708090a0b0c0d0e0f\
62+ 101112131415161718191a1b1c1d1e1f\
63+ 202122232425262728292a2b2c2d2e2f\
64+ 303132333435363738393a3b3c3d3e3f\
65+ 404142434445464748494a4b4c4d4e4f\
66+ 505152535455565758595a5b5c5d5e5f\
67+ 606162636465666768696a6b6c6d6e6f\
68+ 707172737475767778797a7b7c7d7e7f\
69+ 808182838485868788898a8b8c8d8e8f\
70+ 909192939495969798999a9b9c9d9e9f\
71+ a0a1a2a3a4a5a6a7a8a9aaabacadaeaf\
72+ b0b1b2b3b4b5b6b7b8b9babbbcbdbebf\
73+ c0c1c2c3c4c5c6c7c8c9cacbcccdcecf\
74+ d0d1d2d3d4d5d6d7d8d9dadbdcdddedf\
75+ e0e1e2e3e4e5e6e7e8e9eaebecedeeef\
76+ f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
4177
4278/// <https://spark.apache.org/docs/latest/api/sql/index.html#hex>
4379#[ derive( Debug , PartialEq , Eq , Hash ) ]
@@ -116,21 +152,24 @@ fn hex_int64(num: i64) -> String {
116152 format ! ( "{num:X}" )
117153}
118154
155+ /// Fast hex encoding using a lookup table.
156+ /// Each byte maps to 2 characters in the lookup table at index `byte * 2`.
119157#[ inline( always) ]
120158fn hex_encode < T : AsRef < [ u8 ] > > ( data : T , lower_case : bool ) -> String {
121- let mut s = String :: with_capacity ( data. as_ref ( ) . len ( ) * 2 ) ;
122- if lower_case {
123- for b in data. as_ref ( ) {
124- // Writing to a string never errors, so we can unwrap here.
125- write ! ( & mut s, "{b:02x}" ) . unwrap ( ) ;
126- }
127- } else {
128- for b in data. as_ref ( ) {
129- // Writing to a string never errors, so we can unwrap here.
130- write ! ( & mut s, "{b:02X}" ) . unwrap ( ) ;
131- }
159+ let bytes = data. as_ref ( ) ;
160+ let table = if lower_case { HEX_LOWER } else { HEX_UPPER } ;
161+
162+ // Pre-allocate the exact size needed
163+ let mut result = vec ! [ 0u8 ; bytes. len( ) * 2 ] ;
164+
165+ for ( i, & byte) in bytes. iter ( ) . enumerate ( ) {
166+ let idx = ( byte as usize ) * 2 ;
167+ result[ i * 2 ] = table[ idx] ;
168+ result[ i * 2 + 1 ] = table[ idx + 1 ] ;
132169 }
133- s
170+
171+ // SAFETY: The lookup table contains only valid ASCII hex characters
172+ unsafe { String :: from_utf8_unchecked ( result) }
134173}
135174
136175#[ inline( always) ]
0 commit comments