8 #ifndef BOTAN_UTIL_MUL128_H__
9 #define BOTAN_UTIL_MUL128_H__
11 #include <botan/types.h>
15 #if defined(__SIZEOF_INT128__) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
16 #define BOTAN_TARGET_HAS_NATIVE_UINT128
20 typedef unsigned int uint128_t __attribute__((mode(TI)));
28 #if defined(BOTAN_TARGET_HAS_NATIVE_UINT128)
30 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \
32 const uint128_t r = static_cast<uint128_t>(a) * b; \
33 *hi = (r >> 64) & 0xFFFFFFFFFFFFFFFF; \
34 *lo = (r ) & 0xFFFFFFFFFFFFFFFF; \
37 #elif defined(BOTAN_BUILD_COMPILER_IS_MSVC) && defined(BOTAN_TARGET_CPU_HAS_NATIVE_64BIT)
40 #pragma intrinsic(_umul128)
42 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) \
43 do { *lo = _umul128(a, b, hi); } while(0)
45 #elif defined(BOTAN_USE_GCC_INLINE_ASM)
47 #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
49 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
50 asm("mulq %3" : "=d" (*hi), "=a" (*lo) : "a" (a), "rm" (b) : "cc"); \
53 #elif defined(BOTAN_TARGET_ARCH_IS_ALPHA)
55 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
56 asm("umulh %1,%2,%0" : "=r" (*hi) : "r" (a), "r" (b)); \
60 #elif defined(BOTAN_TARGET_ARCH_IS_IA64)
62 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
63 asm("xmpy.hu %0=%1,%2" : "=f" (*hi) : "f" (a), "f" (b)); \
67 #elif defined(BOTAN_TARGET_ARCH_IS_PPC64)
69 #define BOTAN_FAST_64X64_MUL(a,b,lo,hi) do { \
70 asm("mulhdu %0,%1,%2" : "=r" (*hi) : "r" (a), "r" (b) : "cc"); \
83 inline void mul64x64_128(uint64_t a, uint64_t b, uint64_t* lo, uint64_t* hi)
85 #if defined(BOTAN_FAST_64X64_MUL)
86 BOTAN_FAST_64X64_MUL(a, b, lo, hi);
94 const size_t HWORD_BITS = 32;
95 const uint32_t HWORD_MASK = 0xFFFFFFFF;
97 const uint32_t a_hi = (a >> HWORD_BITS);
98 const uint32_t a_lo = (a & HWORD_MASK);
99 const uint32_t b_hi = (b >> HWORD_BITS);
100 const uint32_t b_lo = (b & HWORD_MASK);
102 uint64_t x0 =
static_cast<uint64_t
>(a_hi) * b_hi;
103 uint64_t x1 =
static_cast<uint64_t
>(a_lo) * b_hi;
104 uint64_t x2 =
static_cast<uint64_t
>(a_hi) * b_lo;
105 uint64_t x3 =
static_cast<uint64_t
>(a_lo) * b_lo;
108 x2 += x3 >> HWORD_BITS;
114 x0 +=
static_cast<uint64_t
>(
static_cast<bool>(x2 < x1)) << HWORD_BITS;
116 *hi = x0 + (x2 >> HWORD_BITS);
117 *lo = ((x2 & HWORD_MASK) << HWORD_BITS) + (x3 & HWORD_MASK);
void mul64x64_128(uint64_t a, uint64_t b, uint64_t *lo, uint64_t *hi)