Skip to content

Commit 61ea76f

Browse files
committed
pp_{add,subtract,multiply}: use __builtin_{add,sub,mul}_overflow if available
This will hopefully make the code faster and smaller, and make more cases to be handled as "simple common cases". Note that this change uses HAS_BUILTIN_{ADD,SUB,MUL}_OVERFLOW macros which have already been defined in config.h but seem not to have been used by existing code. t/op/64bitint.t: Add tests to exercise "simple common cases". Note that these tests should pass even before this change. Thanks to @tonycoz for advices to make this patch work better for LLP64 platforms, especially Win32 x86-64.
1 parent 052a79a commit 61ea76f

File tree

5 files changed

+229
-85
lines changed

5 files changed

+229
-85
lines changed

inline.h

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3413,6 +3413,192 @@ S_lossless_NV_to_IV(const NV nv, IV *ivp)
34133413
return FALSE;
34143414
}
34153415

3416+
/*
3417+
* S_iv_{add,sub,mul}_may_overflow(a, b, p) virtually compute "a <op> b"
3418+
* (where <op> is +, -, or *) in infinite precision, and, if the result
3419+
* is (or may be) not representable with IV, return true.
3420+
* Otherwise (no overflow), store the result to *p and return false.
3421+
* These functions allow false positives (so their names contain "may")
3422+
* to speed up simple common cases.
3423+
*/
3424+
3425+
/* Define IV_*_OVERFLOW_IS_EXPENSIVE below to nonzero value
3426+
* if strict overflow checks are too expensive
3427+
* (for example, for CPUs that have no hardware overflow detection flags).
3428+
* If these macros have nonzero value, or overflow-checking compiler intrinsics
3429+
* are not available, good-old heuristics (with some false positives)
3430+
* will be used. */
3431+
# ifndef IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
3432+
# define IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE 0
3433+
# endif
3434+
# ifndef IV_MUL_OVERFLOW_IS_EXPENSIVE
3435+
/* Strict overflow check for IV multiplication is generally expensive
3436+
* when IV is a multi-word integer.
3437+
* We assume that PTRSIZE matches the platform word size; LONGSIZE might not
3438+
* match for LLP64 platforms such as Win32 x86-64. */
3439+
# define IV_MUL_OVERFLOW_IS_EXPENSIVE (IVSIZE > PTRSIZE)
3440+
# endif
3441+
3442+
# if defined(I_STDCKDINT) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
3443+
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
3444+
# define S_iv_add_may_overflow(il, ir, result) ckd_add(result, il, ir)
3445+
# elif defined(HAS_BUILTIN_ADD_OVERFLOW) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
3446+
# define S_iv_add_may_overflow __builtin_add_overflow
3447+
# else
3448+
PERL_STATIC_INLINE bool
3449+
S_iv_add_may_overflow (IV il, IV ir, IV *const result)
3450+
{
3451+
/* topl and topr hold only 2 bits */
3452+
PERL_UINT_FAST8_T const topl = ((UV)il) >> (UVSIZE * 8 - 2);
3453+
PERL_UINT_FAST8_T const topr = ((UV)ir) >> (UVSIZE * 8 - 2);
3454+
3455+
/* if both are in a range that can't under/overflow, do a simple integer
3456+
* add: if the top of both numbers are 00 or 11, then it's safe */
3457+
if (!( ((topl+1) | (topr+1)) & 2)) {
3458+
*result = il + ir;
3459+
return false;
3460+
}
3461+
return true; /* addition may overflow */
3462+
}
3463+
# endif
3464+
3465+
/*
3466+
* S_uv_{add,sub,mul}_overflow(a, b, p) are similar, but the results are UV
3467+
* and they should perform strict overflow check (no false positives).
3468+
*/
3469+
3470+
# if defined(I_STDCKDINT)
3471+
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
3472+
# define S_uv_add_overflow(auv, buv, result) ckd_add(result, auv, buv)
3473+
# elif defined(HAS_BUILTIN_ADD_OVERFLOW)
3474+
# define S_uv_add_overflow __builtin_add_overflow
3475+
# else
3476+
PERL_STATIC_INLINE bool
3477+
S_uv_add_overflow (UV auv, UV buv, UV *const result)
3478+
{
3479+
/* (auv + buv) < auv means that the addition wrapped around,
3480+
i.e. overflowed. Note that unsigned integer overflow is well-defined
3481+
in standard C to wrap around, in constrast to signed integer overflow
3482+
whose behaviour is undefined. */
3483+
return (*result = auv + buv) < auv;
3484+
}
3485+
# endif
3486+
3487+
# if defined(I_STDCKDINT) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
3488+
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
3489+
# define S_iv_sub_may_overflow(il, ir, result) ckd_sub(result, il, ir)
3490+
# elif defined(HAS_BUILTIN_SUB_OVERFLOW) && !IV_ADD_SUB_OVERFLOW_IS_EXPENSIVE
3491+
# define S_iv_sub_may_overflow __builtin_sub_overflow
3492+
# else
3493+
PERL_STATIC_INLINE bool
3494+
S_iv_sub_may_overflow (IV il, IV ir, IV *const result)
3495+
{
3496+
PERL_UINT_FAST8_T const topl = ((UV)il) >> (UVSIZE * 8 - 2);
3497+
PERL_UINT_FAST8_T const topr = ((UV)ir) >> (UVSIZE * 8 - 2);
3498+
3499+
/* if both are in a range that can't under/overflow, do a simple integer
3500+
* subtract: if the top of both numbers are 00 or 11, then it's safe */
3501+
if (!( ((topl+1) | (topr+1)) & 2)) {
3502+
*result = il - ir;
3503+
return false;
3504+
}
3505+
return true; /* subtraction may overflow */
3506+
}
3507+
# endif
3508+
3509+
# if defined(I_STDCKDINT)
3510+
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
3511+
# define S_uv_sub_overflow(auv, buv, result) ckd_sub(result, auv, buv)
3512+
# elif defined(HAS_BUILTIN_SUB_OVERFLOW)
3513+
# define S_uv_sub_overflow __builtin_sub_overflow
3514+
# else
3515+
PERL_STATIC_INLINE bool
3516+
S_uv_sub_overflow (UV auv, UV buv, UV *const result)
3517+
{
3518+
return (*result = auv - buv) > auv;
3519+
}
3520+
# endif
3521+
3522+
# if defined(I_STDCKDINT) && !IV_MUL_OVERFLOW_IS_EXPENSIVE
3523+
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
3524+
# define S_iv_mul_may_overflow(il, ir, result) ckd_mul(result, il, ir)
3525+
# elif defined(HAS_BUILTIN_MUL_OVERFLOW) && !IV_MUL_OVERFLOW_IS_EXPENSIVE
3526+
# define S_iv_mul_may_overflow __builtin_mul_overflow
3527+
# else
3528+
PERL_STATIC_INLINE bool
3529+
S_iv_mul_may_overflow (IV il, IV ir, IV *const result)
3530+
{
3531+
UV const topl = ((UV)il) >> (UVSIZE * 4 - 1);
3532+
UV const topr = ((UV)ir) >> (UVSIZE * 4 - 1);
3533+
3534+
/* if both are in a range that can't under/overflow, do a simple integer
3535+
* multiply: if the top halves(*) of both numbers are 00...00 or 11...11,
3536+
* then it's safe.
3537+
* (*) for 32-bits, the "top half" is the top 17 bits,
3538+
* for 64-bits, its 33 bits */
3539+
if (!(
3540+
((topl+1) | (topr+1))
3541+
& ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */
3542+
)) {
3543+
*result = il * ir;
3544+
return false;
3545+
}
3546+
return true; /* multiplication may overflow */
3547+
}
3548+
# endif
3549+
3550+
# if defined(I_STDCKDINT)
3551+
/* XXX Preparation for upcoming C23, but I_STDCKDINT is not yet tested */
3552+
# define S_uv_mul_overflow(auv, buv, result) ckd_mul(result, auv, buv)
3553+
# elif defined(HAS_BUILTIN_MUL_OVERFLOW)
3554+
# define S_uv_mul_overflow __builtin_mul_overflow
3555+
# else
3556+
PERL_STATIC_INLINE bool
3557+
S_uv_mul_overflow (UV auv, UV buv, UV *const result)
3558+
{
3559+
const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
3560+
const UV botmask = ~topmask;
3561+
3562+
# if UVSIZE > LONGSIZE && UVSIZE <= 2 * LONGSIZE
3563+
/* If UV is double-word integer, declare these variables as single-word
3564+
integers to help compiler to avoid double-word multiplication. */
3565+
unsigned long alow, ahigh, blow, bhigh;
3566+
# else
3567+
UV alow, ahigh, blow, bhigh;
3568+
# endif
3569+
3570+
/* If this does sign extension on unsigned it's time for plan B */
3571+
ahigh = auv >> (4 * sizeof (UV));
3572+
alow = auv & botmask;
3573+
bhigh = buv >> (4 * sizeof (UV));
3574+
blow = buv & botmask;
3575+
3576+
if (ahigh && bhigh)
3577+
/* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
3578+
which is overflow. */
3579+
return true;
3580+
3581+
UV product_middle = 0;
3582+
if (ahigh || bhigh) {
3583+
/* One operand is large, 1 small */
3584+
/* Either ahigh or bhigh is zero here, so the addition below
3585+
can't overflow. */
3586+
product_middle = (UV)ahigh * blow + (UV)alow * bhigh;
3587+
if (product_middle & topmask)
3588+
return true;
3589+
/* OK, product_middle won't lose bits when we shift it. */
3590+
product_middle <<= 4 * sizeof (UV);
3591+
}
3592+
/* else: eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
3593+
so the unsigned multiply cannot overflow. */
3594+
3595+
/* (UV) cast below is necessary to force the multiplication to produce
3596+
UV result, as alow and blow might be narrower than UV */
3597+
UV product_low = (UV)alow * blow;
3598+
return S_uv_add_overflow(product_middle, product_low, result);
3599+
}
3600+
# endif
3601+
34163602
#endif
34173603

34183604
/* ------------------ pp.c, regcomp.c, toke.c, universal.c ------------ */

pod/perldelta.pod

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ There may well be none in a stable release.
110110

111111
=item *
112112

113+
Simple (non-overflowing) addition (C<+>), subtraction (C<->) and
114+
multiplication (C<*>) of IVs are slightly sped up, as long as
115+
sufficient underlying C compiler support is available.
116+
117+
=item *
118+
113119
XXX
114120

115121
=back

pp.c

Lines changed: 8 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,23 +1336,12 @@ PP(pp_multiply)
13361336
U32 flags = (svl->sv_flags & svr->sv_flags);
13371337
if (flags & SVf_IOK) {
13381338
/* both args are simple IVs */
1339-
UV topl, topr;
1339+
IV result;
13401340
il = SvIVX(svl);
13411341
ir = SvIVX(svr);
13421342
do_iv:
1343-
topl = ((UV)il) >> (UVSIZE * 4 - 1);
1344-
topr = ((UV)ir) >> (UVSIZE * 4 - 1);
1345-
1346-
/* if both are in a range that can't under/overflow, do a
1347-
* simple integer multiply: if the top halves(*) of both numbers
1348-
* are 00...00 or 11...11, then it's safe.
1349-
* (*) for 32-bits, the "top half" is the top 17 bits,
1350-
* for 64-bits, its 33 bits */
1351-
if (!(
1352-
((topl+1) | (topr+1))
1353-
& ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */
1354-
)) {
1355-
TARGi(il * ir, 0); /* args not GMG, so can't be tainted */
1343+
if (!S_iv_mul_may_overflow(il, ir, &result)) {
1344+
TARGi(result, 0); /* args not GMG, so can't be tainted */
13561345
goto ret;
13571346
}
13581347
goto generic;
@@ -1388,12 +1377,9 @@ PP(pp_multiply)
13881377
if (SvIV_please_nomg(svl)) {
13891378
bool auvok = SvUOK(svl);
13901379
bool buvok = SvUOK(svr);
1391-
const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1392-
const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
13931380
UV alow;
1394-
UV ahigh;
13951381
UV blow;
1396-
UV bhigh;
1382+
UV product;
13971383

13981384
if (auvok) {
13991385
alow = SvUVX(svl);
@@ -1420,19 +1406,7 @@ PP(pp_multiply)
14201406
}
14211407
}
14221408

1423-
/* If this does sign extension on unsigned it's time for plan B */
1424-
ahigh = alow >> (4 * sizeof (UV));
1425-
alow &= botmask;
1426-
bhigh = blow >> (4 * sizeof (UV));
1427-
blow &= botmask;
1428-
if (ahigh && bhigh) {
1429-
NOOP;
1430-
/* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1431-
which is overflow. Drop to NVs below. */
1432-
} else if (!ahigh && !bhigh) {
1433-
/* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1434-
so the unsigned multiply cannot overflow. */
1435-
const UV product = alow * blow;
1409+
if (!S_uv_mul_overflow(alow, blow, &product)) {
14361410
if (auvok == buvok) {
14371411
/* -ve * -ve or +ve * +ve gives a +ve result. */
14381412
TARGu(product, 1);
@@ -1442,42 +1416,6 @@ PP(pp_multiply)
14421416
TARGi(NEGATE_2IV(product), 1);
14431417
goto ret;
14441418
} /* else drop to NVs below. */
1445-
} else {
1446-
/* One operand is large, 1 small */
1447-
UV product_middle;
1448-
if (bhigh) {
1449-
/* swap the operands */
1450-
ahigh = bhigh;
1451-
bhigh = blow; /* bhigh now the temp var for the swap */
1452-
blow = alow;
1453-
alow = bhigh;
1454-
}
1455-
/* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1456-
multiplies can't overflow. shift can, add can, -ve can. */
1457-
product_middle = ahigh * blow;
1458-
if (!(product_middle & topmask)) {
1459-
/* OK, (ahigh * blow) won't lose bits when we shift it. */
1460-
UV product_low;
1461-
product_middle <<= (4 * sizeof (UV));
1462-
product_low = alow * blow;
1463-
1464-
/* as for pp_add, UV + something mustn't get smaller.
1465-
IIRC ANSI mandates this wrapping *behaviour* for
1466-
unsigned whatever the actual representation*/
1467-
product_low += product_middle;
1468-
if (product_low >= product_middle) {
1469-
/* didn't overflow */
1470-
if (auvok == buvok) {
1471-
/* -ve * -ve or +ve * +ve gives a +ve result. */
1472-
TARGu(product_low, 1);
1473-
goto ret;
1474-
} else if (product_low <= ABS_IV_MIN) {
1475-
/* -ve result, which could overflow an IV */
1476-
TARGi(NEGATE_2IV(product_low), 1);
1477-
goto ret;
1478-
} /* else drop to NVs below. */
1479-
}
1480-
} /* product_middle too large */
14811419
} /* ahigh && bhigh */
14821420
} /* SvIOK(svl) */
14831421
} /* SvIOK(svr) */
@@ -1929,18 +1867,12 @@ PP(pp_subtract)
19291867
U32 flags = (svl->sv_flags & svr->sv_flags);
19301868
if (flags & SVf_IOK) {
19311869
/* both args are simple IVs */
1932-
UV topl, topr;
1870+
IV result;
19331871
il = SvIVX(svl);
19341872
ir = SvIVX(svr);
19351873
do_iv:
1936-
topl = ((UV)il) >> (UVSIZE * 8 - 2);
1937-
topr = ((UV)ir) >> (UVSIZE * 8 - 2);
1938-
1939-
/* if both are in a range that can't under/overflow, do a
1940-
* simple integer subtract: if the top of both numbers
1941-
* are 00 or 11, then it's safe */
1942-
if (!( ((topl+1) | (topr+1)) & 2)) {
1943-
TARGi(il - ir, 0); /* args not GMG, so can't be tainted */
1874+
if (!S_iv_sub_may_overflow(il, ir, &result)) {
1875+
TARGi(result, 0); /* args not GMG, so can't be tainted */
19441876
goto ret;
19451877
}
19461878
goto generic;

pp_hot.c

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,18 +1827,12 @@ PP(pp_add)
18271827
U32 flags = (svl->sv_flags & svr->sv_flags);
18281828
if (flags & SVf_IOK) {
18291829
/* both args are simple IVs */
1830-
UV topl, topr;
1830+
IV result;
18311831
il = SvIVX(svl);
18321832
ir = SvIVX(svr);
18331833
do_iv:
1834-
topl = ((UV)il) >> (UVSIZE * 8 - 2);
1835-
topr = ((UV)ir) >> (UVSIZE * 8 - 2);
1836-
1837-
/* if both are in a range that can't under/overflow, do a
1838-
* simple integer add: if the top of both numbers
1839-
* are 00 or 11, then it's safe */
1840-
if (!( ((topl+1) | (topr+1)) & 2)) {
1841-
TARGi(il + ir, 0); /* args not GMG, so can't be tainted */
1834+
if (!S_iv_add_may_overflow(il, ir, &result)) {
1835+
TARGi(result, 0); /* args not GMG, so can't be tainted */
18421836
goto ret;
18431837
}
18441838
goto generic;

t/op/64bitint.t

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,4 +469,30 @@ cmp_ok 0x3ffffffffffffffe % -0xc000000000000000, '==', -0x8000000000000002, 'mo
469469
cmp_ok 0x3fffffffffffffff % -0xc000000000000000, '==', -0x8000000000000001, 'modulo is (IV_MIN-1)';
470470
cmp_ok 0x4000000000000000 % -0xc000000000000000, '==', -0x8000000000000000, 'modulo is IV_MIN';
471471

472+
# Arithmetic close to IV overflow
473+
474+
# These had been handled in generic (slower) code, but now in fast path
475+
# (as "simple common case"). Either way, these tests should pass.
476+
$q = 9223372036854775800;
477+
cmp_ok 5 + $q, '==', 9223372036854775805, "5 + $q";
478+
cmp_ok $q - -5, '==', 9223372036854775805, "$q - -5";
479+
$q = 1111111111111111111;
480+
cmp_ok $q * 5, '==', 5555555555555555555, "$q * 5";
481+
482+
# IV <op> IV -> UV/NV promotion
483+
484+
$q = 7777777777777777777;
485+
$r = 2222222222222222223;
486+
# Note 10000000000000000000 can be represented accurately in both
487+
# IEEE double (binary64; 0x1.158e460913dp+63) and decimal format (1e+19)
488+
cmp_ok $q + $r, '==', 10000000000000000000, 'IV + IV promotes to UV';
489+
cmp_ok -$q + -$r, '==', -10000000000000000000, 'IV + IV promotes to NV';
490+
cmp_ok $q - -$r, '==', 10000000000000000000, 'IV - IV promotes to UV';
491+
cmp_ok -$q - $r, '==', -10000000000000000000, 'IV - IV promotes to NV';
492+
$q = 3000000000;
493+
$r = 4000000000;
494+
cmp_ok $q * $r, '==', 12000000000000000000, 'IV * IV promotes to UV';
495+
cmp_ok $q * -$r, '==', -12000000000000000000, 'IV * IV promotes to UV then NV';
496+
cmp_ok +($q * 2) * $r, '==', 24000000000000000000, 'IV * IV promotes to NV';
497+
472498
done_testing();

0 commit comments

Comments
 (0)