7 #ifndef BOTAN_SIMD_AVX2_H_
8 #define BOTAN_SIMD_AVX2_H_
10 #include <botan/types.h>
11 #include <immintrin.h>
28 m_avx2 = _mm256_setzero_si256();
34 m_avx2 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(B));
38 explicit
SIMD_8x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3,
39 uint32_t B4, uint32_t B5, uint32_t B6, uint32_t B7)
41 m_avx2 = _mm256_set_epi32(B7, B6, B5, B4, B3, B2, B1, B0);
53 return SIMD_8x32(_mm256_loadu_si256(reinterpret_cast<const __m256i*>(in)));
65 _mm256_storeu_si256(reinterpret_cast<__m256i*>(out), m_avx2);
78 static_assert(ROT > 0 && ROT < 32,
"Invalid rotation constant");
80 #if defined(__AVX512VL__)
81 return SIMD_8x32(_mm256_rol_epi32(m_avx2, ROT));
85 const __m256i shuf_rotl_8 = _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3,
86 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3);
88 return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_8));
92 const __m256i shuf_rotl_16 = _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2,
93 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
95 return SIMD_8x32(_mm256_shuffle_epi8(m_avx2, shuf_rotl_16));
99 return SIMD_8x32(_mm256_or_si256(_mm256_slli_epi32(m_avx2, static_cast<int>(ROT)),
100 _mm256_srli_epi32(m_avx2, static_cast<int>(32-ROT))));
109 return this->
rotl<32-ROT>();
112 template<
size_t ROT1,
size_t ROT2,
size_t ROT3>
117 const SIMD_8x32 rot1 = this->rotr<ROT1>();
118 const SIMD_8x32 rot2 = this->rotr<ROT2>();
119 const SIMD_8x32 rot3 = this->rotr<ROT3>();
121 return rot1 ^ rot2 ^ rot3;
167 m_avx2 = _mm256_add_epi32(m_avx2, other.m_avx2);
173 m_avx2 = _mm256_sub_epi32(m_avx2, other.m_avx2);
179 m_avx2 = _mm256_xor_si256(m_avx2, other.m_avx2);
185 m_avx2 = _mm256_or_si256(m_avx2, other.m_avx2);
191 m_avx2 = _mm256_and_si256(m_avx2, other.m_avx2);
196 return SIMD_8x32(_mm256_slli_epi32(m_avx2, SHIFT));
201 return SIMD_8x32(_mm256_srli_epi32(m_avx2, SHIFT));
207 return SIMD_8x32(_mm256_xor_si256(m_avx2, _mm256_set1_epi32(0xFFFFFFFF)));
214 return SIMD_8x32(_mm256_andnot_si256(m_avx2, other.m_avx2));
220 const uint8_t BSWAP_MASK[32] = { 3, 2, 1, 0,
229 const __m256i bswap = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(BSWAP_MASK));
231 const __m256i output = _mm256_shuffle_epi8(m_avx2, bswap);
240 const __m256i T0 = _mm256_unpacklo_epi32(B0.m_avx2, B1.m_avx2);
241 const __m256i T1 = _mm256_unpacklo_epi32(B2.m_avx2, B3.m_avx2);
242 const __m256i T2 = _mm256_unpackhi_epi32(B0.m_avx2, B1.m_avx2);
243 const __m256i T3 = _mm256_unpackhi_epi32(B2.m_avx2, B3.m_avx2);
245 B0.m_avx2 = _mm256_unpacklo_epi64(T0, T1);
246 B1.m_avx2 = _mm256_unpackhi_epi64(T0, T1);
247 B2.m_avx2 = _mm256_unpacklo_epi64(T2, T3);
248 B3.m_avx2 = _mm256_unpackhi_epi64(T2, T3);
288 SIMD_8x32 T0 = _mm256_permute2x128_si256(A.handle(), B.handle(), 0 + (2 << 4));
289 SIMD_8x32 T1 = _mm256_permute2x128_si256(A.handle(), B.handle(), 1 + (3 << 4));
void store_le(uint8_t out[]) const
BOTAN_FORCE_INLINE SIMD_8x32()
void BOTAN_FUNC_ISA("avx2") SHACAL2 SIMD_8x32 A
#define BOTAN_IF_CONSTEXPR
__m256i BOTAN_FUNC_ISA("avx2") handle() const
static SIMD_8x32 splat(uint32_t B)
#define BOTAN_FORCE_INLINE
int(* final)(unsigned char *, CTX *)
BOTAN_FUNC_ISA("avx2") SIMD_8x32 shl() const
static SIMD_8x32 load_le(const uint8_t *in)
static void zero_registers()
static void reset_registers()
BOTAN_FUNC_ISA("avx2") SIMD_8x32 shr() const
SIMD_8x32 andc(const SIMD_8x32 &other) const
void store_be(uint8_t out[]) const
static void transpose(SIMD_8x32 &B0, SIMD_8x32 &B1, SIMD_8x32 &B2, SIMD_8x32 &B3)
SIMD_8x32 BOTAN_FUNC_ISA("avx2") rho() const
SIMD_8x32 & operator=(const SIMD_8x32 &other)=default
static SIMD_8x32 load_be(const uint8_t *in)