-
Notifications
You must be signed in to change notification settings - Fork 0
19369: Feat: Allow pow with negative & non-integer exponent on decimals #130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -112,12 +112,14 @@ impl PowerFunc { | |
| /// 2.5 is represented as 25 with scale 1 | ||
| /// The unscaled result is 25^4 = 390625 | ||
| /// Scale it back to 1: 390625 / 10^4 = 39 | ||
| /// | ||
| /// Returns error if base is invalid | ||
| fn pow_decimal_int<T>(base: T, scale: i8, exp: i64) -> Result<T, ArrowError> | ||
| where | ||
| T: From<i32> + ArrowNativeTypeOp, | ||
| { | ||
| if exp < 0 { | ||
| return pow_decimal_float(base, scale, exp as f64); | ||
| } | ||
|
|
||
| let scale: u32 = scale.try_into().map_err(|_| { | ||
| ArrowError::NotYetImplemented(format!( | ||
| "Negative scale is not yet supported value: {scale}" | ||
|
|
@@ -149,22 +151,112 @@ where | |
|
|
||
| /// Binary function to calculate a math power to float exponent | ||
| /// for scaled integer types. | ||
| /// Returns error if exponent is negative or non-integer, or base invalid | ||
| fn pow_decimal_float<T>(base: T, scale: i8, exp: f64) -> Result<T, ArrowError> | ||
| where | ||
| T: From<i32> + ArrowNativeTypeOp, | ||
| { | ||
| if !exp.is_finite() || exp.trunc() != exp { | ||
| if exp.is_finite() && exp.trunc() == exp && exp >= 0f64 && exp < u32::MAX as f64 { | ||
| return pow_decimal_int(base, scale, exp as i64); | ||
| } | ||
|
|
||
| if !exp.is_finite() { | ||
| return Err(ArrowError::ComputeError(format!( | ||
| "Cannot use non-integer exp: {exp}" | ||
| "Cannot use non-finite exp: {exp}" | ||
| ))); | ||
| } | ||
|
|
||
| pow_decimal_float_fallback(base, scale, exp) | ||
| } | ||
|
|
||
| /// Fallback implementation using f64 for negative or non-integer exponents. | ||
| /// This handles cases that cannot be computed using integer arithmetic. | ||
| fn pow_decimal_float_fallback<T>(base: T, scale: i8, exp: f64) -> Result<T, ArrowError> | ||
| where | ||
| T: From<i32> + ArrowNativeTypeOp, | ||
| { | ||
| let scale_factor = 10f64.powi(scale as i32); | ||
| let base_f64 = format!("{base:?}") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Converting 🤖 Was this useful? React with 👍 or 👎
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. value:annoying; category:bug; feedback:The Augment AI reviewer is not correct! arrow-rs project provides the arrow-buffer crate that defines the i256 type. It's Debug and Display implementations are the same! And it also provides FromStr trait implementation that is used by str::parse::() - https://github.com/apache/arrow-rs/blob/6b290d192e2372f4bcbf7da91b4bd85dac43e4d5/arrow-buffer/src/bigint/mod.rs#L67-L79. There are unit tests confirming this - https://github.com/apache/arrow-rs/blob/6b290d192e2372f4bcbf7da91b4bd85dac43e4d5/arrow-buffer/src/bigint/mod.rs#L1332-L1381 |
||
| .parse::<f64>() | ||
| .map(|v| v / scale_factor) | ||
| .map_err(|_| { | ||
| ArrowError::ComputeError(format!("Cannot convert base {base:?} to f64")) | ||
| })?; | ||
|
Comment on lines
+178
to
+183
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: # Find and read the power.rs file
find . -name "power.rs" -path "*/datafusion/*" -type f | head -5Repository: martin-augment/datafusion Length of output: 109 🏁 Script executed: cat -n ./datafusion/functions/src/math/power.rs | sed -n '165,210p'Repository: martin-augment/datafusion Length of output: 1744 🏁 Script executed: # Search for ArrowNativeTypeOp trait definition to understand available methods
rg -n "trait ArrowNativeTypeOp" --type=rust -A 20 | head -80Repository: martin-augment/datafusion Length of output: 51 🏁 Script executed: # Search for ArrowNativeTypeOp import or trait definition more broadly
rg "ArrowNativeTypeOp" --type=rust -B 3 -A 10 | head -100Repository: martin-augment/datafusion Length of output: 8812 🏁 Script executed: # Check for i32, i64, i128 Debug output to understand what the format!("{base:?}") produces
# Also look for as_i128 or similar conversion methods in the codebase
rg "as_i128|to_i128|into.*i128" --type=rust datafusion/functions/src/math/ -B 2 -A 2 | head -50Repository: martin-augment/datafusion Length of output: 1261 🏁 Script executed: # Look at the utils module to see what conversion functions are available
rg "decimal128_to_i128|decimal.*to_i128" --type=rust -B 5 -A 10 | head -80Repository: martin-augment/datafusion Length of output: 5235 🏁 Script executed: # Look at the full pow_decimal_float_fallback function and surrounding code to understand the context better
cat -n ./datafusion/functions/src/math/power.rs | sed -n '140,230p'Repository: martin-augment/datafusion Length of output: 3541 🏁 Script executed: # Check what types are passed to pow_decimal_float_fallback by looking at where it's called
rg "pow_decimal_float" --type=rust -B 5 -A 2 datafusion/functions/src/math/power.rs | head -100Repository: martin-augment/datafusion Length of output: 3423 🏁 Script executed: # Check i256 type and how it's handled in Arrow
rg "i256" --type=rust -B 2 -A 5 datafusion/functions/src/math/power.rs | head -50Repository: martin-augment/datafusion Length of output: 51 🏁 Script executed: # Search for i256 in the codebase to understand how it's used
rg "i256" --type=rust -B 2 -A 2 | head -80Repository: martin-augment/datafusion Length of output: 6872 🏁 Script executed: # Check how i256 is converted to/from other types
rg "from_i128|to_i128" --type=rust -B 2 -A 2 | grep -A 3 -B 3 "i256" | head -60Repository: martin-augment/datafusion Length of output: 5403 🏁 Script executed: # Let's check what debug output actually looks like for these types by examining test cases
cat -n ./datafusion/functions/src/math/power.rs | sed -n '240,300p'Repository: martin-augment/datafusion Length of output: 2272 🏁 Script executed: # Test what Debug output looks like for i128 and i32
cat > /tmp/test_debug.rs << 'EOF'
fn main() {
let vals: &[i128] = &[123, 456, -789, 0, i128::MAX];
for v in vals {
println!("Debug: {v:?}, Parsed: {}", format!("{v:?}").parse::<f64>().unwrap_or(f64::NAN));
}
}
EOF
rustc /tmp/test_debug.rs -o /tmp/test_debug 2>/dev/null && /tmp/test_debug || echo "Rust not available"Repository: martin-augment/datafusion Length of output: 87 🏁 Script executed: # Look at the complete decimal_from_i128 function and understand the flow
cat -n ./datafusion/functions/src/math/power.rs | sed -n '195,260p'Repository: martin-augment/datafusion Length of output: 2415 🏁 Script executed: # Final check: Look at what types are supported and how i256 is used
cat -n ./datafusion/functions/src/math/power.rs | sed -n '1,50p'Repository: martin-augment/datafusion Length of output: 2406 Critical: Overflow check uses hardcoded i128::MAX for all decimal types. Line 196 checks For example:
The overflow check should be type-aware. The downstream Consider removing the premature overflow check at line 196 and relying on the type-specific overflow handling in
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. value:useful; category:bug; feedback:The CodeRabbit AI reviewer is correct! The implementation supports values up to i128::MAX and this makes Decimal256 as powerful as Decimal128. This should be either improved or documented as a known limitation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Decimal256 fallback fails due to Debug format parsingThe
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. value:annoying; category:bug; feedback:The Bugbot AI reviewer is not correct! arrow-rs project provides the arrow-buffer crate that defines the i256 type. It's Debug and Display implementations are the same! And it also provides FromStr trait implementation that is used by str::parse::() - https://github.com/apache/arrow-rs/blob/6b290d192e2372f4bcbf7da91b4bd85dac43e4d5/arrow-buffer/src/bigint/mod.rs#L67-L79. There are unit tests confirming this - https://github.com/apache/arrow-rs/blob/6b290d192e2372f4bcbf7da91b4bd85dac43e4d5/arrow-buffer/src/bigint/mod.rs#L1332-L1381 |
||
|
|
||
| let result_f64 = base_f64.powf(exp); | ||
|
|
||
| if !result_f64.is_finite() { | ||
| return Err(ArrowError::ArithmeticOverflow(format!( | ||
| "Result of {base_f64}^{exp} is not finite" | ||
| ))); | ||
| } | ||
| if exp < 0f64 || exp >= u32::MAX as f64 { | ||
|
|
||
| let result_scaled = result_f64 * scale_factor; | ||
| let result_rounded = result_scaled.round(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fallback rescales with 🤖 Was this useful? React with 👍 or 👎
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. value:good-to-have; category:bug; feedback:The Augment AI reviewer is correct! The rounding loses precision and it should not be applied. |
||
|
|
||
| if result_rounded.abs() > i128::MAX as f64 { | ||
| return Err(ArrowError::ArithmeticOverflow(format!( | ||
| "Unsupported exp value: {exp}" | ||
| "Result {result_rounded} is too large for the target decimal type" | ||
| ))); | ||
| } | ||
|
Comment on lines
+196
to
200
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Major: Overflow check doesn't account for target decimal type size. The overflow check compares against
This means:
Apply this pattern to check against the actual type bounds: - if result_rounded.abs() > i128::MAX as f64 {
+ // Check against the actual bounds of type T
+ // This requires knowing T's range; consider adding a trait method
+ // or using type-specific validation
+ let result_i128 = result_rounded as i128;
+
+ // Try the conversion and let decimal_from_i128 handle overflow
+ decimal_from_i128::<T>(result_i128)Alternatively, rely on Verify the native type sizes and bounds: #!/bin/bash
# Find the native type definitions for decimal types
ast-grep --pattern 'type Native = $_;' | rg -C2 'Decimal(32|64|128|256)'
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. value:useful; category:bug; feedback:The CodeRabbit AI reviewer is correct! The implementation supports values up to i128::MAX and this makes Decimal256 as powerful as Decimal128. This should be either improved or documented as a known limitation. |
||
| pow_decimal_int(base, scale, exp as i64) | ||
|
|
||
| decimal_from_i128::<T>(result_rounded as i128) | ||
| } | ||
|
|
||
| fn decimal_from_i128<T>(value: i128) -> Result<T, ArrowError> | ||
| where | ||
| T: From<i32> + ArrowNativeTypeOp, | ||
| { | ||
| if value == 0 { | ||
| return Ok(T::from(0)); | ||
| } | ||
|
|
||
| if value >= i32::MIN as i128 && value <= i32::MAX as i128 { | ||
| return Ok(T::from(value as i32)); | ||
| } | ||
|
|
||
| let is_negative = value < 0; | ||
| let abs_value = value.unsigned_abs(); | ||
|
|
||
| let billion = 1_000_000_000u128; | ||
| let mut result = T::from(0); | ||
| let mut multiplier = T::from(1); | ||
| let billion_t = T::from(1_000_000_000); | ||
|
|
||
| let mut remaining = abs_value; | ||
| while remaining > 0 { | ||
| let chunk = (remaining % billion) as i32; | ||
| remaining /= billion; | ||
|
|
||
| let chunk_value = T::from(chunk).mul_checked(multiplier).map_err(|_| { | ||
| ArrowError::ArithmeticOverflow(format!( | ||
| "Overflow while converting {value} to decimal type" | ||
| )) | ||
| })?; | ||
|
|
||
| result = result.add_checked(chunk_value).map_err(|_| { | ||
| ArrowError::ArithmeticOverflow(format!( | ||
| "Overflow while converting {value} to decimal type" | ||
| )) | ||
| })?; | ||
|
|
||
| if remaining > 0 { | ||
| multiplier = multiplier.mul_checked(billion_t).map_err(|_| { | ||
| ArrowError::ArithmeticOverflow(format!( | ||
| "Overflow while converting {value} to decimal type" | ||
| )) | ||
| })?; | ||
| } | ||
| } | ||
|
|
||
| if is_negative { | ||
| result = T::from(0).sub_checked(result).map_err(|_| { | ||
| ArrowError::ArithmeticOverflow(format!( | ||
| "Overflow while negating {value} in decimal type" | ||
| )) | ||
| })?; | ||
| } | ||
|
|
||
| Ok(result) | ||
| } | ||
|
|
||
| impl ScalarUDFImpl for PowerFunc { | ||
|
|
@@ -392,4 +484,38 @@ mod tests { | |
| "Not yet implemented: Negative scale is not yet supported value: -1" | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_pow_decimal_float_fallback() { | ||
| // Test negative exponent: 4^(-1) = 0.25 | ||
| // 4 with scale 2 = 400, result should be 25 (0.25 with scale 2) | ||
| let result: i128 = pow_decimal_float(400i128, 2, -1.0).unwrap(); | ||
| assert_eq!(result, 25); | ||
|
|
||
| // Test non-integer exponent: 4^0.5 = 2 | ||
| // 4 with scale 2 = 400, result should be 200 (2.0 with scale 2) | ||
| let result: i128 = pow_decimal_float(400i128, 2, 0.5).unwrap(); | ||
| assert_eq!(result, 200); | ||
|
|
||
| // Test 8^(1/3) = 2 (cube root) | ||
| // 8 with scale 1 = 80, result should be 20 (2.0 with scale 1) | ||
| let result: i128 = pow_decimal_float(80i128, 1, 1.0 / 3.0).unwrap(); | ||
| assert_eq!(result, 20); | ||
|
|
||
| // Test negative base with integer exponent still works | ||
| // (-2)^3 = -8 | ||
| // -2 with scale 1 = -20, result should be -80 (-8.0 with scale 1) | ||
| let result: i128 = pow_decimal_float(-20i128, 1, 3.0).unwrap(); | ||
| assert_eq!(result, -80); | ||
|
|
||
| // Test positive integer exponent goes through fast path | ||
| // 2.5^4 = 39.0625 | ||
| // 25 with scale 1, result should be 390 (39.0 with scale 1) - truncated | ||
| let result: i128 = pow_decimal_float(25i128, 1, 4.0).unwrap(); | ||
| assert_eq!(result, 390); // Uses integer path | ||
|
|
||
| // Test non-finite exponent returns error | ||
| assert!(pow_decimal_float(100i128, 2, f64::NAN).is_err()); | ||
| assert!(pow_decimal_float(100i128, 2, f64::INFINITY).is_err()); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pow_decimal_intstill treats negativescaleas not supported, butpow_decimal_float_fallbackwill currently proceed with negativescale(since it usespowi(scale as i32)). That can makepowersucceed for some negative-scale decimals where other exponent paths still error, so it’s worth confirming the intended consistency.🤖 Was this useful? React with 👍 or 👎
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
value:useful; category:bug; feedback:The Augment AI reviewer is correct! The Pull Request description and new unit/SLT tests talk only about negative exponent support but not about negative scale. A check for negative scale should be either added or new unit tests should be added.