9 #ifndef BOTAN_MP_ASM_INTERNAL_H_
10 #define BOTAN_MP_ASM_INTERNAL_H_
12 #include <botan/internal/mp_madd.h>
16 #if defined(BOTAN_MP_USE_X86_32_ASM)
18 #define ADDSUB2_OP(OPERATION, INDEX) \
19 ASM("movl 4*" #INDEX "(%[y]), %[carry]") \
20 ASM(OPERATION " %[carry], 4*" #INDEX "(%[x])") \
22 #define ADDSUB3_OP(OPERATION, INDEX) \
23 ASM("movl 4*" #INDEX "(%[x]), %[carry]") \
24 ASM(OPERATION " 4*" #INDEX "(%[y]), %[carry]") \
25 ASM("movl %[carry], 4*" #INDEX "(%[z])") \
27 #define LINMUL_OP(WRITE_TO, INDEX) \
28 ASM("movl 4*" #INDEX "(%[x]),%%eax") \
30 ASM("addl %[carry],%%eax") \
31 ASM("adcl $0,%%edx") \
32 ASM("movl %%edx,%[carry]") \
33 ASM("movl %%eax, 4*" #INDEX "(%[" WRITE_TO "])")
35 #define MULADD_OP(IGNORED, INDEX) \
36 ASM("movl 4*" #INDEX "(%[x]),%%eax") \
38 ASM("addl %[carry],%%eax") \
39 ASM("adcl $0,%%edx") \
40 ASM("addl 4*" #INDEX "(%[z]),%%eax") \
41 ASM("adcl $0,%%edx") \
42 ASM("movl %%edx,%[carry]") \
43 ASM("movl %%eax, 4*" #INDEX " (%[z])")
45 #define ADD_OR_SUBTRACT(CORE_CODE) \
46 ASM("rorl %[carry]") \
48 ASM("sbbl %[carry],%[carry]") \
51 #elif defined(BOTAN_MP_USE_X86_64_ASM)
53 #define ADDSUB2_OP(OPERATION, INDEX) \
54 ASM("movq 8*" #INDEX "(%[y]), %[carry]") \
55 ASM(OPERATION " %[carry], 8*" #INDEX "(%[x])") \
57 #define ADDSUB3_OP(OPERATION, INDEX) \
58 ASM("movq 8*" #INDEX "(%[x]), %[carry]") \
59 ASM(OPERATION " 8*" #INDEX "(%[y]), %[carry]") \
60 ASM("movq %[carry], 8*" #INDEX "(%[z])") \
62 #define LINMUL_OP(WRITE_TO, INDEX) \
63 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
65 ASM("addq %[carry],%%rax") \
66 ASM("adcq $0,%%rdx") \
67 ASM("movq %%rdx,%[carry]") \
68 ASM("movq %%rax, 8*" #INDEX "(%[" WRITE_TO "])")
70 #define MULADD_OP(IGNORED, INDEX) \
71 ASM("movq 8*" #INDEX "(%[x]),%%rax") \
73 ASM("addq %[carry],%%rax") \
74 ASM("adcq $0,%%rdx") \
75 ASM("addq 8*" #INDEX "(%[z]),%%rax") \
76 ASM("adcq $0,%%rdx") \
77 ASM("movq %%rdx,%[carry]") \
78 ASM("movq %%rax, 8*" #INDEX " (%[z])")
80 #define ADD_OR_SUBTRACT(CORE_CODE) \
81 ASM("rorq %[carry]") \
83 ASM("sbbq %[carry],%[carry]") \
88 #if defined(ADD_OR_SUBTRACT)
90 #define ASM(x) x "\n\t"
92 #define DO_8_TIMES(MACRO, ARG) \
109 #if defined(BOTAN_MP_USE_X86_32_ASM)
111 ADD_OR_SUBTRACT(ASM(
"adcl %[y],%[x]"))
112 : [x]
"=r"(x), [carry]
"=r"(*carry)
113 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
117 #elif defined(BOTAN_MP_USE_X86_64_ASM)
120 ADD_OR_SUBTRACT(ASM(
"adcq %[y],%[x]"))
121 : [x]
"=r"(x), [carry]
"=r"(*carry)
122 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
130 *carry = c1 | (z < *
carry);
140 #if defined(BOTAN_MP_USE_X86_32_ASM)
142 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"adcl"))
144 : [x]
"r"(x), [y]
"r"(y),
"0"(carry)
148 #elif defined(BOTAN_MP_USE_X86_64_ASM)
151 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"adcq"))
153 : [x]
"r"(x), [y]
"r"(y),
"0"(carry)
158 x[0] =
word_add(x[0], y[0], &carry);
159 x[1] =
word_add(x[1], y[1], &carry);
160 x[2] =
word_add(x[2], y[2], &carry);
161 x[3] =
word_add(x[3], y[3], &carry);
162 x[4] =
word_add(x[4], y[4], &carry);
163 x[5] =
word_add(x[5], y[5], &carry);
164 x[6] =
word_add(x[6], y[6], &carry);
165 x[7] =
word_add(x[7], y[7], &carry);
174 const word y[8], word
carry)
176 #if defined(BOTAN_MP_USE_X86_32_ASM)
178 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"adcl"))
180 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
184 #elif defined(BOTAN_MP_USE_X86_64_ASM)
187 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"adcq"))
189 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
194 z[0] =
word_add(x[0], y[0], &carry);
195 z[1] =
word_add(x[1], y[1], &carry);
196 z[2] =
word_add(x[2], y[2], &carry);
197 z[3] =
word_add(x[3], y[3], &carry);
198 z[4] =
word_add(x[4], y[4], &carry);
199 z[5] =
word_add(x[5], y[5], &carry);
200 z[6] =
word_add(x[6], y[6], &carry);
201 z[7] =
word_add(x[7], y[7], &carry);
211 #if defined(BOTAN_MP_USE_X86_32_ASM)
213 ADD_OR_SUBTRACT(ASM(
"sbbl %[y],%[x]"))
214 : [x]
"=r"(x), [carry]
"=r"(*carry)
215 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
219 #elif defined(BOTAN_MP_USE_X86_64_ASM)
222 ADD_OR_SUBTRACT(ASM(
"sbbq %[y],%[x]"))
223 : [x]
"=r"(x), [carry]
"=r"(*carry)
224 :
"0"(x), [y]
"rm"(y),
"1"(*carry)
231 word z = t0 - *
carry;
232 *carry = c1 | (z > t0);
242 #if defined(BOTAN_MP_USE_X86_32_ASM)
244 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"sbbl"))
246 : [x]
"r"(x), [y]
"r"(y),
"0"(carry)
250 #elif defined(BOTAN_MP_USE_X86_64_ASM)
253 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB2_OP,
"sbbq"))
255 : [x]
"r"(x), [y]
"r"(y),
"0"(carry)
260 x[0] =
word_sub(x[0], y[0], &carry);
261 x[1] =
word_sub(x[1], y[1], &carry);
262 x[2] =
word_sub(x[2], y[2], &carry);
263 x[3] =
word_sub(x[3], y[3], &carry);
264 x[4] =
word_sub(x[4], y[4], &carry);
265 x[5] =
word_sub(x[5], y[5], &carry);
266 x[6] =
word_sub(x[6], y[6], &carry);
267 x[7] =
word_sub(x[7], y[7], &carry);
277 #if defined(BOTAN_MP_USE_X86_32_ASM)
279 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbl"))
281 : [x]
"r"(y), [y]
"r"(x), [z]
"r"(x),
"0"(
carry)
285 #elif defined(BOTAN_MP_USE_X86_64_ASM)
288 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
290 : [x]
"r"(y), [y]
"r"(x), [z]
"r"(x),
"0"(
carry)
295 x[0] =
word_sub(y[0], x[0], &carry);
296 x[1] =
word_sub(y[1], x[1], &carry);
297 x[2] =
word_sub(y[2], x[2], &carry);
298 x[3] =
word_sub(y[3], x[3], &carry);
299 x[4] =
word_sub(y[4], x[4], &carry);
300 x[5] =
word_sub(y[5], x[5], &carry);
301 x[6] =
word_sub(y[6], x[6], &carry);
302 x[7] =
word_sub(y[7], x[7], &carry);
311 const word y[8], word
carry)
313 #if defined(BOTAN_MP_USE_X86_32_ASM)
315 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbl"))
317 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
321 #elif defined(BOTAN_MP_USE_X86_64_ASM)
324 ADD_OR_SUBTRACT(DO_8_TIMES(ADDSUB3_OP,
"sbbq"))
326 : [x]
"r"(x), [y]
"r"(y), [z]
"r"(z),
"0"(
carry)
331 z[0] =
word_sub(x[0], y[0], &carry);
332 z[1] =
word_sub(x[1], y[1], &carry);
333 z[2] =
word_sub(x[2], y[2], &carry);
334 z[3] =
word_sub(x[3], y[3], &carry);
335 z[4] =
word_sub(x[4], y[4], &carry);
336 z[5] =
word_sub(x[5], y[5], &carry);
337 z[6] =
word_sub(x[6], y[6], &carry);
338 z[7] =
word_sub(x[7], y[7], &carry);
348 #if defined(BOTAN_MP_USE_X86_32_ASM)
350 DO_8_TIMES(LINMUL_OP,
"x")
352 : [x]
"r"(x), [y]
"rm"(y),
"0"(carry)
353 :
"cc",
"%eax",
"%edx");
356 #elif defined(BOTAN_MP_USE_X86_64_ASM)
359 DO_8_TIMES(LINMUL_OP,
"x")
361 : [x]
"r"(x), [y]
"rm"(y),
"0"(carry)
362 :
"cc",
"%rax",
"%rdx");
383 #if defined(BOTAN_MP_USE_X86_32_ASM)
385 DO_8_TIMES(LINMUL_OP,
"z")
387 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
388 :
"cc",
"%eax",
"%edx");
391 #elif defined(BOTAN_MP_USE_X86_64_ASM)
393 DO_8_TIMES(LINMUL_OP,
"z")
395 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
396 :
"cc",
"%rax",
"%rdx");
417 #if defined(BOTAN_MP_USE_X86_32_ASM)
419 DO_8_TIMES(MULADD_OP,
"")
421 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
422 :
"cc",
"%eax",
"%edx");
425 #elif defined(BOTAN_MP_USE_X86_64_ASM)
428 DO_8_TIMES(MULADD_OP,
"")
430 : [z]
"r"(z), [x]
"r"(x), [y]
"rm"(y),
"0"(
carry)
431 :
"cc",
"%rax",
"%rdx");
453 #if defined(BOTAN_MP_USE_X86_32_ASM)
466 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
467 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
470 #elif defined(BOTAN_MP_USE_X86_64_ASM)
484 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
485 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
492 *w2 += (*w1 <
carry);
500 inline void word3_add(word* w2, word* w1, word* w0, word x)
502 #if defined(BOTAN_MP_USE_X86_32_ASM)
508 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
509 : [x]
"r"(x),
"0"(*w0),
"1"(*w1),
"2"(*w2)
512 #elif defined(BOTAN_MP_USE_X86_64_ASM)
519 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
520 : [x]
"r"(x),
"0"(*w0),
"1"(*w1),
"2"(*w2)
527 word c2 = (*w1 < c1);
538 #if defined(BOTAN_MP_USE_X86_32_ASM)
556 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
557 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
560 #elif defined(BOTAN_MP_USE_X86_64_ASM)
578 : [w0]"=r"(*w0), [w1]
"=r"(*w1), [w2]
"=r"(*w2)
579 : [z0]
"r"(z0), [z1]
"r"(z1),
"0"(*w0),
"1"(*w1),
"2"(*w2)
587 word top = (y >> (BOTAN_MP_WORD_BITS-1));
589 y |= (x >> (BOTAN_MP_WORD_BITS-1));
602 #undef ADD_OR_SUBTRACT
word word8_sub2_rev(word x[8], const word y[8], word carry)
void word3_muladd(word *w2, word *w1, word *w0, word x, word y)
word word8_add2(word x[8], const word y[8], word carry)
word word8_linmul3(word z[8], const word x[8], word y, word carry)
void carry(int64_t &h0, int64_t &h1)
word word_madd3(word a, word b, word c, word *d)
word word8_sub2(word x[8], const word y[8], word carry)
word word_madd2(word a, word b, word *c)
word word8_madd3(word z[8], const word x[8], word y, word carry)
void word3_add(word *w2, word *w1, word *w0, word x)
word word8_linmul2(word x[8], word y, word carry)
word word8_add3(word z[8], const word x[8], const word y[8], word carry)
void word3_muladd_2(word *w2, word *w1, word *w0, word x, word y)
word word_sub(word x, word y, word *carry)
word word_add(word x, word y, word *carry)
word word8_sub3(word z[8], const word x[8], const word y[8], word carry)