9 #include <botan/internal/mp_core.h>
10 #include <botan/internal/mp_asmi.h>
11 #include <botan/mem_ops.h>
17 const size_t KARATSUBA_MULTIPLY_THRESHOLD = 32;
18 const size_t KARATSUBA_SQUARE_THRESHOLD = 32;
23 void basecase_mul(word z[],
24 const word x[],
size_t x_size,
25 const word y[],
size_t y_size)
27 const size_t x_size_8 = x_size - (x_size % 8);
31 for(
size_t i = 0; i != y_size; ++i)
33 const word y_i = y[i];
37 for(
size_t j = 0; j != x_size_8; j += 8)
40 for(
size_t j = x_size_8; j != x_size; ++j)
41 z[i+j] =
word_madd3(x[j], y_i, z[i+j], &carry);
50 void karatsuba_mul(word z[],
const word x[],
const word y[],
size_t N,
53 if(N < KARATSUBA_MULTIPLY_THRESHOLD || N % 2)
62 return basecase_mul(z, x, N, y, N);
65 const size_t N2 = N / 2;
68 const word* x1 = x + N2;
70 const word* y1 = y + N2;
74 const int32_t cmp0 =
bigint_cmp(x0, N2, x1, N2);
75 const int32_t cmp1 =
bigint_cmp(y1, N2, y0, N2);
100 karatsuba_mul(workspace, z0, z1, N2, workspace+N);
103 karatsuba_mul(z0, x0, y0, N2, workspace+N);
104 karatsuba_mul(z1, x1, y1, N2, workspace+N);
106 const word ws_carry =
bigint_add3_nc(workspace + N, z0, N, z1, N);
112 if((cmp0 == cmp1) || (cmp0 == 0) || (cmp1 == 0))
121 void karatsuba_sqr(word z[],
const word x[],
size_t N, word workspace[])
123 if(N < KARATSUBA_SQUARE_THRESHOLD || N % 2)
132 return basecase_mul(z, x, N, x, N);
135 const size_t N2 = N / 2;
138 const word* x1 = x + N2;
142 const int32_t cmp =
bigint_cmp(x0, N2, x1, N2);
155 karatsuba_sqr(workspace, z0, N2, workspace+N);
158 karatsuba_sqr(z0, x0, N2, workspace+N);
159 karatsuba_sqr(z1, x1, N2, workspace+N);
161 const word ws_carry =
bigint_add3_nc(workspace + N, z0, N, z1, N);
178 size_t karatsuba_size(
size_t z_size,
179 size_t x_size,
size_t x_sw,
180 size_t y_size,
size_t y_sw)
182 if(x_sw > x_size || x_sw > y_size || y_sw > x_size || y_sw > y_size)
185 if(((x_size == x_sw) && (x_size % 2)) ||
186 ((y_size == y_sw) && (y_size % 2)))
189 const size_t start = (x_sw > y_sw) ? x_sw : y_sw;
190 const size_t end = (x_size < y_size) ? x_size : y_size;
199 for(
size_t j = start; j <= end; ++j)
207 if(x_sw <= j && j <= x_size && y_sw <= j && j <= y_size)
210 (j+2) <= x_size && (j+2) <= y_size && 2*(j+2) <= z_size)
222 size_t karatsuba_size(
size_t z_size,
size_t x_size,
size_t x_sw)
231 for(
size_t j = x_sw; j <= x_size; ++j)
239 if(j % 4 == 2 && (j+2) <= x_size && 2*(j+2) <= z_size)
254 const size_t x_sig_words = x.
sig_words();
255 const size_t y_sig_words = y.
sig_words();
263 else if(y_sig_words == 1)
267 else if(x_sig_words <= 4 && x.
size() >= 4 &&
268 y_sig_words <= 4 && y.
size() >= 4 && z.
size() >= 8)
272 else if(x_sig_words <= 6 && x.
size() >= 6 &&
273 y_sig_words <= 6 && y.
size() >= 6 && z.
size() >= 12)
277 else if(x_sig_words <= 8 && x.
size() >= 8 &&
278 y_sig_words <= 8 && y.
size() >= 8 && z.
size() >= 16)
282 else if(x_sig_words <= 9 && x.
size() >= 9 &&
283 y_sig_words <= 9 && y.
size() >= 9 && z.
size() >= 18)
287 else if(x_sig_words <= 16 && x.
size() >= 16 &&
288 y_sig_words <= 16 && y.
size() >= 16 && z.
size() >= 32)
292 else if(x_sig_words < KARATSUBA_MULTIPLY_THRESHOLD ||
293 y_sig_words < KARATSUBA_MULTIPLY_THRESHOLD ||
300 const size_t N = karatsuba_size(z.
size(), x.
size(), x_sig_words, y.
size(), y_sig_words);
313 const word x[],
size_t x_size,
size_t x_sw)
315 BOTAN_ASSERT(z_size/2 >= x_sw,
"Output size is sufficient");
321 else if(x_sw <= 4 && x_size >= 4 && z_size >= 8)
325 else if(x_sw <= 6 && x_size >= 6 && z_size >= 12)
329 else if(x_sw <= 8 && x_size >= 8 && z_size >= 16)
333 else if(x_sw == 9 && x_size >= 9 && z_size >= 18)
337 else if(x_sw <= 16 && x_size >= 16 && z_size >= 32)
341 else if(x_size < KARATSUBA_SQUARE_THRESHOLD || !workspace)
343 basecase_mul(z, x, x_sw, x, x_sw);
347 const size_t N = karatsuba_size(z_size, x_size, x_sw);
350 karatsuba_sqr(z, x, N, workspace);
352 basecase_mul(z, x, x_sw, x, x_sw);
int32_t bigint_cmp(const word x[], size_t x_size, const word y[], size_t y_size)
void clear_mem(T *ptr, size_t n)
word bigint_sub2(word x[], size_t x_size, const word y[], size_t y_size)
word bigint_add3_nc(word z[], const word x[], size_t x_size, const word y[], size_t y_size)
word word_madd3(word a, word b, word c, word *d)
void bigint_comba_mul4(word z[8], const word x[4], const word y[4])
word bigint_sub3(word z[], const word x[], size_t x_size, const word y[], size_t y_size)
void bigint_comba_mul9(word z[18], const word x[9], const word y[9])
#define BOTAN_ASSERT(expr, assertion_made)
void bigint_linmul3(word z[], const word x[], size_t x_size, word y)
void bigint_comba_sqr16(word z[32], const word x[16])
word word8_madd3(word z[8], const word x[8], word y, word carry)
void bigint_sqr(word z[], size_t z_size, word workspace[], const word x[], size_t x_size, size_t x_sw)
const word * data() const
word bigint_add2_nc(word x[], size_t x_size, const word y[], size_t y_size)
void bigint_comba_sqr9(word z[18], const word x[9])
void bigint_comba_mul8(word z[16], const word x[8], const word y[8])
void bigint_comba_sqr8(word z[16], const word x[8])
void bigint_add2(word x[], size_t x_size, const word y[], size_t y_size)
void bigint_comba_mul16(word z[32], const word x[16], const word y[16])
void bigint_mul(BigInt &z, const BigInt &x, const BigInt &y, word workspace[])
void bigint_comba_mul6(word z[12], const word x[6], const word y[6])
void bigint_comba_sqr4(word z[8], const word x[4])
void bigint_comba_sqr6(word z[12], const word x[6])