Skip to content

Commit bab712b

Browse files
committed
Optimized standard path calculation
1 parent 75f32b8 commit bab712b

File tree

3 files changed

+147
-42
lines changed

3 files changed

+147
-42
lines changed

ext/bcmath/libbcmath/src/div.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,3 +429,19 @@ bool bc_divide(bc_num numerator, bc_num divisor, bc_num *quot, size_t scale)
429429
*quot = bc_copy_num(BCG(_zero_));
430430
return true;
431431
}
432+
433+
void bc_divide_vector(
434+
BC_VECTOR *numerator_vectors, size_t numerator_arr_size,
435+
const BC_VECTOR *divisor_vectors, size_t divisor_arr_size, size_t divisor_size,
436+
BC_VECTOR *quot_vectors, size_t quot_arr_size
437+
) {
438+
ZEND_ASSERT(divisor_vectors[divisor_arr_size - 1] != 0);
439+
ZEND_ASSERT(quot_arr_size == numerator_arr_size - divisor_arr_size + 1);
440+
441+
/* Do the division */
442+
if (divisor_arr_size == 1) {
443+
bc_fast_div(numerator_vectors, numerator_arr_size, divisor_vectors[0], quot_vectors, quot_arr_size);
444+
} else {
445+
bc_standard_div(numerator_vectors, numerator_arr_size, divisor_vectors, divisor_arr_size, divisor_size, quot_vectors, quot_arr_size);
446+
}
447+
}

ext/bcmath/libbcmath/src/private.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ bc_num _bc_do_sub (bc_num n1, bc_num n2);
8787
void bc_multiply_vector(
8888
const BC_VECTOR *n1_vector, size_t n1_arr_size, const BC_VECTOR *n2_vector, size_t n2_arr_size,
8989
BC_VECTOR *prod_vector, size_t prod_arr_size);
90+
void bc_divide_vector(
91+
BC_VECTOR *numerator_vectors, size_t numerator_arr_size,
92+
const BC_VECTOR *divisor_vectors, size_t divisor_arr_size, size_t divisor_size,
93+
BC_VECTOR *quot_vectors, size_t quot_arr_size);
9094
void _bc_rm_leading_zeros (bc_num num);
9195

9296
#endif

ext/bcmath/libbcmath/src/sqrt.c

Lines changed: 127 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
*************************************************************************/
3131

3232
#include "bcmath.h"
33+
#include "convert.h"
3334
#include <stdbool.h>
3435
#include <stddef.h>
3536
#include "private.h"
@@ -101,57 +102,142 @@ static inline void bc_fast_sqrt(bc_num *num, size_t scale, size_t num_calc_full_
101102
*num = ret;
102103
}
103104

104-
static inline void bc_standard_sqrt(bc_num *num, size_t scale, bcmath_compare_result num_cmp_one)
105+
static inline void bc_standard_sqrt(bc_num *num, size_t scale, size_t num_calc_full_len, size_t num_use_full_len, size_t leading_zeros)
105106
{
106-
bc_num guess;
107-
size_t cscale;
108-
/* Calculate the initial guess. */
109-
if (num_cmp_one == BCMATH_RIGHT_GREATER) {
110-
/* The number is between 0 and 1. Guess should start at 1. */
111-
guess = bc_copy_num(BCG(_one_));
112-
cscale = (*num)->n_scale;
107+
/* allocate memory */
108+
size_t n_arr_size = BC_ARR_SIZE_FROM_LEN(num_calc_full_len);
109+
110+
size_t guess_full_len = (num_calc_full_len + 1) / 2;
111+
/* Since add the old guess and the new guess together during the calculation,
112+
* there is a chance of overflow, so allocate an extra size. */
113+
size_t guess_arr_size = BC_ARR_SIZE_FROM_LEN(guess_full_len) + 1;
114+
115+
/* On 64-bit systems, the array size is calculated by dividing the number of digits by 8, so overflow does not occur.
116+
* (While an overflow could theoretically occur inside safe_emalloc, it does not need to be checked at this point.)
117+
* On 32-bit systems, however, there is a possibility that size_t overflows at this point. */
118+
#if SIZEOF_SIZE_T == 8
119+
size_t allocate_size = n_arr_size * 2 + guess_arr_size * 3;
120+
BC_VECTOR *buf = safe_emalloc(allocate_size, sizeof(BC_VECTOR), 0);
121+
#else
122+
/* By doubling the size and storing the remainder in offset, allocate the same amount of memory while preventing a size_t overflow.
123+
* (In the end, safe_emalloc will still produce an error, but this approach eliminates the need for custom error handling.) */
124+
size_t guess_allocate_size = guess_arr_size * 3;
125+
BC_VECTOR *buf = safe_emalloc(n_arr_size + guess_allocate_size / 2, sizeof(BC_VECTOR) * 2, guess_allocate_size % 2);
126+
#endif
127+
128+
BC_VECTOR *n_vector = buf;
129+
/* In division by successive approximation, the numerator is modified during the computation,
130+
* so it must be copied each time. */
131+
BC_VECTOR *n_vector_copy = n_vector + n_arr_size;
132+
BC_VECTOR *guess_vector = n_vector_copy + n_arr_size;
133+
BC_VECTOR *guess1_vector = guess_vector + guess_arr_size;
134+
BC_VECTOR *tmp_div_ret_vector = guess1_vector + guess_arr_size;
135+
136+
/* convert num to n_vector */
137+
const char *nend = (*num)->n_value + leading_zeros + num_use_full_len - 1;
138+
bc_convert_to_vector_with_zero_pad(n_vector, nend, num_use_full_len, num_calc_full_len - num_use_full_len);
139+
140+
/* Prepare guess_vector (Temporary implementation) */
141+
for (size_t i = 0; i < guess_arr_size - 2; i++) {
142+
guess_vector[i] = BC_VECTOR_BOUNDARY_NUM - 1;
143+
}
144+
if (guess_full_len % BC_VECTOR_SIZE == 0) {
145+
guess_vector[guess_arr_size - 2] = BC_VECTOR_BOUNDARY_NUM - 1;
113146
} else {
114-
/* The number is greater than 1. Guess should start at 10^(exp/2). */
115-
bc_init_num(&guess);
116-
bc_int2num(&guess, 10);
117-
118-
bc_int2num(&guess1, (*num)->n_len);
119-
bc_multiply_ex(guess1, point5, &guess1, 0);
120-
guess1->n_scale = 0;
121-
bc_raise_bc_exponent(guess, guess1, &guess, 0);
122-
bc_free_num (&guess1);
123-
cscale = 3;
147+
guess_vector[guess_arr_size - 2] = 0;
148+
for (size_t i = 0; i < guess_full_len % BC_VECTOR_SIZE; i++) {
149+
guess_vector[guess_arr_size - 2] *= BASE;
150+
guess_vector[guess_arr_size - 2] += 9;
151+
}
124152
}
153+
guess_vector[guess_arr_size - 1] = 0;
125154

126-
bc_num guess1 = NULL;
127-
bc_num point5 = bc_new_num (1, 1);
128-
point5->n_value[1] = 5;
129-
bc_num diff = NULL;
155+
size_t quot_size = n_arr_size - (guess_arr_size - 1) + 1;
130156

157+
BC_VECTOR two[1] = { 2 };
158+
159+
/**
160+
* Newton's algorithm. Iterative expression is `x_{n+1} = (x_n + a / x_n) / 2`
161+
* If break down the calculation into detailed steps, it looks like this:
162+
* 1. quot = a / x_n
163+
* 2. add = x_n + quot1
164+
* 3. x_{n+1} = add / 2
165+
* 4. repeat until the difference between the `x_n` and `x_{n+1}` is less than or equal to 1.
166+
*/
131167
bool done = false;
132-
while (!done) {
133-
bc_free_num (&guess1);
134-
guess1 = bc_copy_num(guess);
135-
bc_divide(*num, guess, &guess, cscale);
136-
bc_add_ex(guess, guess1, &guess, 0);
137-
bc_multiply_ex(guess, point5, &guess, cscale);
138-
bc_sub_ex(guess, guess1, &diff, cscale + 1);
139-
if (bc_is_near_zero(diff, cscale)) {
140-
if (cscale < scale + 1) {
141-
cscale = MIN (cscale * 3, scale + 1);
168+
do {
169+
/* Since the value changes during division by successive approximation, use a copied version of it. */
170+
memcpy(n_vector_copy, n_vector, n_arr_size * sizeof(BC_VECTOR));
171+
172+
/* 1. quot = a / x_n */
173+
bc_divide_vector(
174+
n_vector_copy, n_arr_size,
175+
guess_vector, guess_arr_size - 1, guess_full_len,
176+
tmp_div_ret_vector, quot_size
177+
);
178+
179+
BC_VECTOR *tmp_vptr = guess1_vector;
180+
guess1_vector = guess_vector;
181+
guess_vector = tmp_vptr;
182+
183+
/* 2. add = x_n + quot1 */
184+
int carry = 0;
185+
for (size_t i = 0; i < guess_arr_size - 1; i++) {
186+
guess_vector[i] = guess1_vector[i] + tmp_div_ret_vector[i] + carry;
187+
if (guess_vector[i] >= BC_VECTOR_BOUNDARY_NUM) {
188+
guess_vector[i] -= BC_VECTOR_BOUNDARY_NUM;
189+
carry = 1;
142190
} else {
143-
done = true;
191+
carry = 0;
144192
}
145193
}
194+
guess_vector[guess_arr_size - 1] = tmp_div_ret_vector[guess_arr_size - 1] + carry;
195+
196+
/* 3. x_{n+1} = add / 2 */
197+
bc_divide_vector(
198+
guess_vector, guess_arr_size,
199+
two, 1, 1,
200+
tmp_div_ret_vector, guess_arr_size
201+
);
202+
203+
memcpy(guess_vector, tmp_div_ret_vector, guess_arr_size * sizeof(BC_VECTOR));
204+
205+
/* 4. repeat until the difference between the `x_n` and `x_{n+1}` is less than or equal to 1. */
206+
size_t diff = guess_vector[0] > guess1_vector[0] ? guess_vector[0] - guess1_vector[0] : guess1_vector[0] - guess_vector[0];
207+
if (diff <= 1) {
208+
bool is_same = true;
209+
for (size_t i = 1; i < guess_arr_size - 1; i++) {
210+
if (guess_vector[i] != guess1_vector[i]) {
211+
is_same = false;
212+
break;
213+
}
214+
}
215+
done = is_same;
216+
}
217+
} while (!done);
218+
219+
size_t guess_len;
220+
size_t guess_leading_zeros = 0;
221+
if (leading_zeros > 0) {
222+
guess_len = 1; /* for int zero */
223+
guess_leading_zeros = (leading_zeros + 1) / 2;
224+
} else {
225+
guess_len = ((*num)->n_len + 1) / 2;
146226
}
227+
bc_num ret = bc_new_num_nonzeroed(guess_len, scale + 1);
228+
char *rptr = ret->n_value;
229+
char *rend = rptr + guess_len + scale + 1 - 1;
230+
231+
for (size_t i = 0; i < guess_leading_zeros; i++) {
232+
*rptr++ = 0;
233+
}
234+
bc_convert_vector_to_char(guess_vector, rptr, rend, guess_arr_size - 1);
235+
ret->n_scale = scale;
147236

148-
/* Assign the number and clean up. */
149-
bc_free_num (num);
150-
bc_divide(guess, BCG(_one_), num, scale);
151-
bc_free_num (&guess);
152-
bc_free_num (&guess1);
153-
bc_free_num (&point5);
154-
bc_free_num (&diff);
237+
bc_free_num(num);
238+
*num = ret;
239+
240+
efree(buf);
155241
}
156242

157243
bool bc_sqrt(bc_num *num, size_t scale)
@@ -198,8 +284,7 @@ bool bc_sqrt(bc_num *num, size_t scale)
198284
if (num_calc_full_len < MAX_LENGTH_OF_LONG) {
199285
bc_fast_sqrt(num, scale, num_calc_full_len, num_use_full_len, leading_zeros);
200286
} else {
201-
bc_standard_sqrt(num, scale, num_cmp_one);
287+
bc_standard_sqrt(num, scale, num_calc_full_len, num_use_full_len, leading_zeros);
202288
}
203-
204289
return true;
205290
}

0 commit comments

Comments
 (0)