From 2c2df0365b791fd861630d8cbbd6d7998e564f89 Mon Sep 17 00:00:00 2001 From: Adriano Meligrana <68152031+ameligrana@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:17:21 +0200 Subject: [PATCH 1/3] Bound better the loop size for the estimator to increase the global shift --- src/WeightVectors.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/WeightVectors.jl b/src/WeightVectors.jl index c8b470a8..ff1be543 100644 --- a/src/WeightVectors.jl +++ b/src/WeightVectors.jl @@ -484,9 +484,10 @@ function set_global_shift_increase!(m::Memory{UInt64}, m5) offset = 2m2+2093+2 checkbounds(m, offset-65*2:offset-2) - # TODO for perf, we can get away with shaving 1 to 10 off of these operations. # This can underflow from significand sums into weights, but that underflow is safe because it can only happen if all the latter weights are zero. Be careful about this when re-arranging the memory layout! - Base.Cartesian.@nexprs 65 i -> (@inbounds x += m[offset - 2i] >> (i - 2)) + for i in 1:(Base.top_set_bit(m[4])+1) + @inbounds x += m[offset - 2i] >> (i - 2) + end # x is computed by rounding down at a certain level and then summing (and adding 1) # m[5] will be computed by rounding up at a more precise level and then summing From 66d58ba64e809327f14db2e7d4be095811554846 Mon Sep 17 00:00:00 2001 From: Adriano Meligrana <68152031+ameligrana@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:30:08 +0200 Subject: [PATCH 2/3] Update WeightVectors.jl --- src/WeightVectors.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/WeightVectors.jl b/src/WeightVectors.jl index ff1be543..19782ef0 100644 --- a/src/WeightVectors.jl +++ b/src/WeightVectors.jl @@ -485,7 +485,7 @@ function set_global_shift_increase!(m::Memory{UInt64}, m5) checkbounds(m, offset-65*2:offset-2) # This can underflow from significand sums into weights, but that underflow is safe because it can only happen if all the latter weights are zero. Be careful about this when re-arranging the memory layout! - for i in 1:(Base.top_set_bit(m[4])+1) + for i in (Base.top_set_bit(m[4])+1):-1:1 @inbounds x += m[offset - 2i] >> (i - 2) end From f5370142c5c9e189a59f535f5b5c7e529fcae5a5 Mon Sep 17 00:00:00 2001 From: Adriano Meligrana <68152031+ameligrana@users.noreply.github.com> Date: Sat, 18 Apr 2026 21:40:52 +0200 Subject: [PATCH 3/3] Update WeightVectors.jl --- src/WeightVectors.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/WeightVectors.jl b/src/WeightVectors.jl index 19782ef0..ff1be543 100644 --- a/src/WeightVectors.jl +++ b/src/WeightVectors.jl @@ -485,7 +485,7 @@ function set_global_shift_increase!(m::Memory{UInt64}, m5) checkbounds(m, offset-65*2:offset-2) # This can underflow from significand sums into weights, but that underflow is safe because it can only happen if all the latter weights are zero. Be careful about this when re-arranging the memory layout! - for i in (Base.top_set_bit(m[4])+1):-1:1 + for i in 1:(Base.top_set_bit(m[4])+1) @inbounds x += m[offset - 2i] >> (i - 2) end