Botan  2.1.0
Crypto and TLS for C++11
simd_32.h
Go to the documentation of this file.
1 /*
2 * Lightweight wrappers for SIMD operations
3 * (C) 2009,2011,2016,2017 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7 
8 #ifndef BOTAN_SIMD_32_H__
9 #define BOTAN_SIMD_32_H__
10 
11 #include <botan/types.h>
12 #include <botan/loadstor.h>
13 #include <botan/bswap.h>
14 #include <botan/cpuid.h>
15 
16 #if defined(BOTAN_TARGET_SUPPORTS_SSE2)
17  #include <emmintrin.h>
18  #define BOTAN_SIMD_USE_SSE2
19 
20 #elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
21  #include <altivec.h>
22  #undef vector
23  #undef bool
24  #define BOTAN_SIMD_USE_ALTIVEC
25 
26 #elif defined(BOTAN_TARGET_SUPPORTS_NEON)
27  #include <arm_neon.h>
28  #define BOTAN_SIMD_USE_NEON
29 #endif
30 
31 namespace Botan {
32 
33 /**
34 * 4x32 bit SIMD register
35 *
36 * This class is not a general purpose SIMD type, and only offers
37 * instructions needed for evaluation of specific crypto primitives.
38 * For example it does not currently have equality operators of any
39 * kind.
40 *
41 * Implemented for SSE2, VMX (Altivec), and NEON.
42 */
43 class SIMD_4x32 final
44  {
45  public:
46 
47  SIMD_4x32& operator=(const SIMD_4x32& other) = default;
48  SIMD_4x32(const SIMD_4x32& other) = default;
49 
50 #if !defined(BOTAN_BUILD_COMPILER_IS_MSVC_2013)
51  SIMD_4x32& operator=(SIMD_4x32&& other) = default;
52  SIMD_4x32(SIMD_4x32&& other) = default;
53 #endif
54 
55  /**
56  * Zero initialize SIMD register with 4 32-bit elements
57  */
58  SIMD_4x32() // zero initialized
59  {
60 #if defined(BOTAN_SIMD_USE_SSE2)
61  ::memset(&m_sse, 0, sizeof(m_sse));
62 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
63  m_vmx = vec_splat_u32(0);
64 #elif defined(BOTAN_SIMD_USE_NEON)
65  m_neon = vdupq_n_u32(0);
66 #else
67  ::memset(m_scalar, 0, sizeof(m_scalar));
68 #endif
69  }
70 
71  /**
72  * Load SIMD register with 4 32-bit elements
73  */
74  explicit SIMD_4x32(const uint32_t B[4])
75  {
76 #if defined(BOTAN_SIMD_USE_SSE2)
77  m_sse = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
78 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
79  m_vmx = (__vector unsigned int){B[0], B[1], B[2], B[3]};
80 #elif defined(BOTAN_SIMD_USE_NEON)
81  m_neon = vld1q_u32(B);
82 #else
83  m_scalar[0] = B[0];
84  m_scalar[1] = B[1];
85  m_scalar[2] = B[2];
86  m_scalar[3] = B[3];
87 #endif
88  }
89 
90  /**
91  * Load SIMD register with 4 32-bit elements
92  */
93  SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
94  {
95 #if defined(BOTAN_SIMD_USE_SSE2)
96  m_sse = _mm_set_epi32(B3, B2, B1, B0);
97 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
98  m_vmx = (__vector unsigned int){B0, B1, B2, B3};
99 #elif defined(BOTAN_SIMD_USE_NEON)
100  // Better way to do this?
101  const uint32_t B[4] = { B0, B1, B2, B3 };
102  m_neon = vld1q_u32(B);
103 #else
104  m_scalar[0] = B0;
105  m_scalar[1] = B1;
106  m_scalar[2] = B2;
107  m_scalar[3] = B3;
108 #endif
109  }
110 
111  /**
112  * Load SIMD register with one 32-bit element repeated
113  */
114  static SIMD_4x32 splat(uint32_t B)
115  {
116 #if defined(BOTAN_SIMD_USE_SSE2)
117  return SIMD_4x32(_mm_set1_epi32(B));
118 #elif defined(BOTAN_SIMD_USE_ARM)
119  return SIMD_4x32(vdupq_n_u32(B));
120 #else
121  return SIMD_4x32(B, B, B, B);
122 #endif
123  }
124 
125  /**
126  * Load a SIMD register with little-endian convention
127  */
128  static SIMD_4x32 load_le(const void* in)
129  {
130 #if defined(BOTAN_SIMD_USE_SSE2)
131  return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
132 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
133  const uint32_t* in_32 = static_cast<const uint32_t*>(in);
134 
135  __vector unsigned int R0 = vec_ld(0, in_32);
136  __vector unsigned int R1 = vec_ld(12, in_32);
137 
138  __vector unsigned char perm = vec_lvsl(0, in_32);
139 
141  {
142  perm = vec_xor(perm, vec_splat_u8(3)); // bswap vector
143  }
144 
145  R0 = vec_perm(R0, R1, perm);
146 
147  return SIMD_4x32(R0);
148 #elif defined(BOTAN_SIMD_USE_NEON)
149 
150  uint32_t in32[4];
151  std::memcpy(in32, in, 16);
153  {
154  bswap_4(in32);
155  }
156  return SIMD_4x32(vld1q_u32(in32));
157 
158 #else
159  SIMD_4x32 out;
160  Botan::load_le(out.m_scalar, static_cast<const uint8_t*>(in), 4);
161  return out;
162 #endif
163  }
164 
165  /**
166  * Load a SIMD register with big-endian convention
167  */
168  static SIMD_4x32 load_be(const void* in)
169  {
170 #if defined(BOTAN_SIMD_USE_SSE2)
171 
172  return load_le(in).bswap();
173 
174 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
175 
176  const uint32_t* in_32 = static_cast<const uint32_t*>(in);
177  __vector unsigned int R0 = vec_ld(0, in_32);
178  __vector unsigned int R1 = vec_ld(12, in_32);
179  __vector unsigned char perm = vec_lvsl(0, in_32);
180 
182  {
183  perm = vec_xor(perm, vec_splat_u8(3)); // bswap vector
184  }
185 
186  R0 = vec_perm(R0, R1, perm);
187  return SIMD_4x32(R0);
188 
189 #elif defined(BOTAN_SIMD_USE_NEON)
190 
191  uint32_t in32[4];
192  std::memcpy(in32, in, 16);
194  {
195  bswap_4(in32);
196  }
197  return SIMD_4x32(vld1q_u32(in32));
198 
199 #else
200  SIMD_4x32 out;
201  Botan::load_be(out.m_scalar, static_cast<const uint8_t*>(in), 4);
202  return out;
203 #endif
204  }
205 
206  /**
207  * Load a SIMD register with little-endian convention
208  */
209  void store_le(uint8_t out[]) const
210  {
211 #if defined(BOTAN_SIMD_USE_SSE2)
212 
213  _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_sse);
214 
215 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
216 
217  __vector unsigned char perm = vec_lvsl(0, static_cast<uint32_t*>(nullptr));
219  {
220  perm = vec_xor(perm, vec_splat_u8(3)); // bswap vector
221  }
222 
223  union {
224  __vector unsigned int V;
225  uint32_t R[4];
226  } vec;
227  vec.V = vec_perm(m_vmx, m_vmx, perm);
228  Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
229 
230 #elif defined(BOTAN_SIMD_USE_NEON)
231 
233  {
234  SIMD_4x32 swap = bswap();
235  swap.store_be(out);
236  }
237  else
238  {
239  uint32_t out32[4] = { 0 };
240  vst1q_u32(out32, m_neon);
241  copy_out_le(out, 16, out32);
242  }
243 #else
244  Botan::store_le(out, m_scalar[0], m_scalar[1], m_scalar[2], m_scalar[3]);
245 #endif
246  }
247 
248  /**
249  * Load a SIMD register with big-endian convention
250  */
251  void store_be(uint8_t out[]) const
252  {
253 #if defined(BOTAN_SIMD_USE_SSE2)
254 
255  bswap().store_le(out);
256 
257 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
258 
259  union {
260  __vector unsigned int V;
261  uint32_t R[4];
262  } vec;
263  vec.V = m_vmx;
264  Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
265 
266 #elif defined(BOTAN_SIMD_USE_NEON)
267 
269  {
270  SIMD_4x32 swap = bswap();
271  swap.store_le(out);
272  }
273  else
274  {
275  uint32_t out32[4] = { 0 };
276  vst1q_u32(out32, m_neon);
277  copy_out_be(out, 16, out32);
278  }
279 
280 #else
281  Botan::store_be(out, m_scalar[0], m_scalar[1], m_scalar[2], m_scalar[3]);
282 #endif
283  }
284 
285  /**
286  * Rotate each element of SIMD register n bits left
287  */
288  void rotate_left(size_t rot)
289  {
290 #if defined(BOTAN_SIMD_USE_SSE2)
291 
292  m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(rot)),
293  _mm_srli_epi32(m_sse, static_cast<int>(32-rot)));
294 
295 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
296 
297  const unsigned int r = static_cast<unsigned int>(rot);
298  m_vmx = vec_rl(m_vmx, (__vector unsigned int){r, r, r, r});
299 
300 #elif defined(BOTAN_SIMD_USE_NEON)
301  m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(rot)),
302  vshrq_n_u32(m_neon, static_cast<int>(32-rot)));
303 
304 #else
305  m_scalar[0] = Botan::rotate_left(m_scalar[0], rot);
306  m_scalar[1] = Botan::rotate_left(m_scalar[1], rot);
307  m_scalar[2] = Botan::rotate_left(m_scalar[2], rot);
308  m_scalar[3] = Botan::rotate_left(m_scalar[3], rot);
309 #endif
310  }
311 
312  /**
313  * Rotate each element of SIMD register n bits right
314  */
315  void rotate_right(size_t rot)
316  {
317  rotate_left(32 - rot);
318  }
319 
320  /**
321  * Add elements of a SIMD vector
322  */
323  SIMD_4x32 operator+(const SIMD_4x32& other) const
324  {
325  SIMD_4x32 retval(*this);
326  retval += other;
327  return retval;
328  }
329 
330  /**
331  * Subtract elements of a SIMD vector
332  */
333  SIMD_4x32 operator-(const SIMD_4x32& other) const
334  {
335  SIMD_4x32 retval(*this);
336  retval -= other;
337  return retval;
338  }
339 
340  /**
341  * XOR elements of a SIMD vector
342  */
343  SIMD_4x32 operator^(const SIMD_4x32& other) const
344  {
345  SIMD_4x32 retval(*this);
346  retval ^= other;
347  return retval;
348  }
349 
350  /**
351  * Binary OR elements of a SIMD vector
352  */
353  SIMD_4x32 operator|(const SIMD_4x32& other) const
354  {
355  SIMD_4x32 retval(*this);
356  retval |= other;
357  return retval;
358  }
359 
360  /**
361  * Binary AND elements of a SIMD vector
362  */
363  SIMD_4x32 operator&(const SIMD_4x32& other) const
364  {
365  SIMD_4x32 retval(*this);
366  retval &= other;
367  return retval;
368  }
369 
370  void operator+=(const SIMD_4x32& other)
371  {
372 #if defined(BOTAN_SIMD_USE_SSE2)
373  m_sse = _mm_add_epi32(m_sse, other.m_sse);
374 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
375  m_vmx = vec_add(m_vmx, other.m_vmx);
376 #elif defined(BOTAN_SIMD_USE_NEON)
377  m_neon = vaddq_u32(m_neon, other.m_neon);
378 #else
379  m_scalar[0] += other.m_scalar[0];
380  m_scalar[1] += other.m_scalar[1];
381  m_scalar[2] += other.m_scalar[2];
382  m_scalar[3] += other.m_scalar[3];
383 #endif
384  }
385 
386  void operator-=(const SIMD_4x32& other)
387  {
388 #if defined(BOTAN_SIMD_USE_SSE2)
389  m_sse = _mm_sub_epi32(m_sse, other.m_sse);
390 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
391  m_vmx = vec_sub(m_vmx, other.m_vmx);
392 #elif defined(BOTAN_SIMD_USE_NEON)
393  m_neon = vsubq_u32(m_neon, other.m_neon);
394 #else
395  m_scalar[0] -= other.m_scalar[0];
396  m_scalar[1] -= other.m_scalar[1];
397  m_scalar[2] -= other.m_scalar[2];
398  m_scalar[3] -= other.m_scalar[3];
399 #endif
400  }
401 
402  void operator^=(const SIMD_4x32& other)
403  {
404 #if defined(BOTAN_SIMD_USE_SSE2)
405  m_sse = _mm_xor_si128(m_sse, other.m_sse);
406 
407 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
408  m_vmx = vec_xor(m_vmx, other.m_vmx);
409 #elif defined(BOTAN_SIMD_USE_NEON)
410  m_neon = veorq_u32(m_neon, other.m_neon);
411 #else
412  m_scalar[0] ^= other.m_scalar[0];
413  m_scalar[1] ^= other.m_scalar[1];
414  m_scalar[2] ^= other.m_scalar[2];
415  m_scalar[3] ^= other.m_scalar[3];
416 #endif
417  }
418 
419  void operator|=(const SIMD_4x32& other)
420  {
421 #if defined(BOTAN_SIMD_USE_SSE2)
422  m_sse = _mm_or_si128(m_sse, other.m_sse);
423 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
424  m_vmx = vec_or(m_vmx, other.m_vmx);
425 #elif defined(BOTAN_SIMD_USE_NEON)
426  m_neon = vorrq_u32(m_neon, other.m_neon);
427 #else
428  m_scalar[0] |= other.m_scalar[0];
429  m_scalar[1] |= other.m_scalar[1];
430  m_scalar[2] |= other.m_scalar[2];
431  m_scalar[3] |= other.m_scalar[3];
432 #endif
433  }
434 
435  void operator&=(const SIMD_4x32& other)
436  {
437 #if defined(BOTAN_SIMD_USE_SSE2)
438  m_sse = _mm_and_si128(m_sse, other.m_sse);
439 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
440  m_vmx = vec_and(m_vmx, other.m_vmx);
441 #elif defined(BOTAN_SIMD_USE_NEON)
442  m_neon = vandq_u32(m_neon, other.m_neon);
443 #else
444  m_scalar[0] &= other.m_scalar[0];
445  m_scalar[1] &= other.m_scalar[1];
446  m_scalar[2] &= other.m_scalar[2];
447  m_scalar[3] &= other.m_scalar[3];
448 #endif
449  }
450 
451  SIMD_4x32 operator<<(size_t shift) const
452  {
453 #if defined(BOTAN_SIMD_USE_SSE2)
454  return SIMD_4x32(_mm_slli_epi32(m_sse, static_cast<int>(shift)));
455 
456 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
457  const unsigned int s = static_cast<unsigned int>(shift);
458  return SIMD_4x32(vec_sl(m_vmx, (__vector unsigned int){s, s, s, s}));
459 #elif defined(BOTAN_SIMD_USE_NEON)
460  return SIMD_4x32(vshlq_n_u32(m_neon, static_cast<int>(shift)));
461 #else
462  return SIMD_4x32(m_scalar[0] << shift,
463  m_scalar[1] << shift,
464  m_scalar[2] << shift,
465  m_scalar[3] << shift);
466 #endif
467  }
468 
469  SIMD_4x32 operator>>(size_t shift) const
470  {
471 #if defined(BOTAN_SIMD_USE_SSE2)
472  return SIMD_4x32(_mm_srli_epi32(m_sse, static_cast<int>(shift)));
473 
474 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
475  const unsigned int s = static_cast<unsigned int>(shift);
476  return SIMD_4x32(vec_sr(m_vmx, (__vector unsigned int){s, s, s, s}));
477 #elif defined(BOTAN_SIMD_USE_NEON)
478  return SIMD_4x32(vshrq_n_u32(m_neon, static_cast<int>(shift)));
479 #else
480  return SIMD_4x32(m_scalar[0] >> shift, m_scalar[1] >> shift,
481  m_scalar[2] >> shift, m_scalar[3] >> shift);
482 
483 #endif
484  }
485 
487  {
488 #if defined(BOTAN_SIMD_USE_SSE2)
489  return SIMD_4x32(_mm_xor_si128(m_sse, _mm_set1_epi32(0xFFFFFFFF)));
490 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
491  return SIMD_4x32(vec_nor(m_vmx, m_vmx));
492 #elif defined(BOTAN_SIMD_USE_NEON)
493  return SIMD_4x32(vmvnq_u32(m_neon));
494 #else
495  return SIMD_4x32(~m_scalar[0], ~m_scalar[1], ~m_scalar[2], ~m_scalar[3]);
496 #endif
497  }
498 
499  // (~reg) & other
500  SIMD_4x32 andc(const SIMD_4x32& other) const
501  {
502 #if defined(BOTAN_SIMD_USE_SSE2)
503  return SIMD_4x32(_mm_andnot_si128(m_sse, other.m_sse));
504 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
505  /*
506  AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
507  so swap the arguments
508  */
509  return SIMD_4x32(vec_andc(other.m_vmx, m_vmx));
510 #elif defined(BOTAN_SIMD_USE_NEON)
511  // NEON is also a & ~b
512  return SIMD_4x32(vbicq_u32(other.m_neon, m_neon));
513 #else
514  return SIMD_4x32((~m_scalar[0]) & other.m_scalar[0],
515  (~m_scalar[1]) & other.m_scalar[1],
516  (~m_scalar[2]) & other.m_scalar[2],
517  (~m_scalar[3]) & other.m_scalar[3]);
518 #endif
519  }
520 
521  /**
522  * Return copy *this with each word byte swapped
523  */
524  SIMD_4x32 bswap() const
525  {
526 #if defined(BOTAN_SIMD_USE_SSE2)
527 
528  __m128i T = m_sse;
529  T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
530  T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
531  return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
532 
533 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
534 
535  __vector unsigned char perm = vec_lvsl(0, static_cast<uint32_t*>(nullptr));
536  perm = vec_xor(perm, vec_splat_u8(3));
537  return SIMD_4x32(vec_perm(m_vmx, m_vmx, perm));
538 
539 #elif defined(BOTAN_SIMD_USE_NEON)
540 
541  //return SIMD_4x32(vrev64q_u32(m_neon));
542 
543  // FIXME this is really slow
544  SIMD_4x32 ror8(m_neon);
545  ror8.rotate_right(8);
546  SIMD_4x32 rol8(m_neon);
547  rol8.rotate_left(8);
548 
549  SIMD_4x32 mask1 = SIMD_4x32::splat(0xFF00FF00);
550  SIMD_4x32 mask2 = SIMD_4x32::splat(0x00FF00FF);
551  return (ror8 & mask1) | (rol8 & mask2);
552 #else
553  // scalar
554  return SIMD_4x32(reverse_bytes(m_scalar[0]),
555  reverse_bytes(m_scalar[1]),
556  reverse_bytes(m_scalar[2]),
557  reverse_bytes(m_scalar[3]));
558 #endif
559  }
560 
561  /**
562  * 4x4 Transposition on SIMD registers
563  */
564  static void transpose(SIMD_4x32& B0, SIMD_4x32& B1,
565  SIMD_4x32& B2, SIMD_4x32& B3)
566  {
567 #if defined(BOTAN_SIMD_USE_SSE2)
568  const __m128i T0 = _mm_unpacklo_epi32(B0.m_sse, B1.m_sse);
569  const __m128i T1 = _mm_unpacklo_epi32(B2.m_sse, B3.m_sse);
570  const __m128i T2 = _mm_unpackhi_epi32(B0.m_sse, B1.m_sse);
571  const __m128i T3 = _mm_unpackhi_epi32(B2.m_sse, B3.m_sse);
572 
573  B0.m_sse = _mm_unpacklo_epi64(T0, T1);
574  B1.m_sse = _mm_unpackhi_epi64(T0, T1);
575  B2.m_sse = _mm_unpacklo_epi64(T2, T3);
576  B3.m_sse = _mm_unpackhi_epi64(T2, T3);
577 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
578  const __vector unsigned int T0 = vec_mergeh(B0.m_vmx, B2.m_vmx);
579  const __vector unsigned int T1 = vec_mergeh(B1.m_vmx, B3.m_vmx);
580  const __vector unsigned int T2 = vec_mergel(B0.m_vmx, B2.m_vmx);
581  const __vector unsigned int T3 = vec_mergel(B1.m_vmx, B3.m_vmx);
582 
583  B0.m_vmx = vec_mergeh(T0, T1);
584  B1.m_vmx = vec_mergel(T0, T1);
585  B2.m_vmx = vec_mergeh(T2, T3);
586  B3.m_vmx = vec_mergel(T2, T3);
587 #elif defined(BOTAN_SIMD_USE_NEON)
588 
589 #if defined(BOTAN_TARGET_ARCH_IS_ARM32)
590 
591  const uint32x4x2_t T0 = vzipq_u32(B0.m_neon, B2.m_neon);
592  const uint32x4x2_t T1 = vzipq_u32(B1.m_neon, B3.m_neon);
593  const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
594  const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
595 
596  B0.m_neon = O0.val[0];
597  B1.m_neon = O0.val[1];
598  B2.m_neon = O1.val[0];
599  B3.m_neon = O1.val[1];
600 
601 #elif defined(BOTAN_TARGET_ARCH_IS_ARM64)
602  const uint32x4_t T0 = vzip1q_u32(B0.m_neon, B2.m_neon);
603  const uint32x4_t T2 = vzip2q_u32(B0.m_neon, B2.m_neon);
604 
605  const uint32x4_t T1 = vzip1q_u32(B1.m_neon, B3.m_neon);
606  const uint32x4_t T3 = vzip2q_u32(B1.m_neon, B3.m_neon);
607 
608  B0.m_neon = vzip1q_u32(T0, T1);
609  B1.m_neon = vzip2q_u32(T0, T1);
610 
611  B2.m_neon = vzip1q_u32(T2, T3);
612  B3.m_neon = vzip2q_u32(T2, T3);
613 #endif
614 
615 #else
616  // scalar
617  SIMD_4x32 T0(B0.m_scalar[0], B1.m_scalar[0], B2.m_scalar[0], B3.m_scalar[0]);
618  SIMD_4x32 T1(B0.m_scalar[1], B1.m_scalar[1], B2.m_scalar[1], B3.m_scalar[1]);
619  SIMD_4x32 T2(B0.m_scalar[2], B1.m_scalar[2], B2.m_scalar[2], B3.m_scalar[2]);
620  SIMD_4x32 T3(B0.m_scalar[3], B1.m_scalar[3], B2.m_scalar[3], B3.m_scalar[3]);
621 
622  B0 = T0;
623  B1 = T1;
624  B2 = T2;
625  B3 = T3;
626 #endif
627  }
628 
629  private:
630 
631 #if defined(BOTAN_SIMD_USE_SSE2)
632  explicit SIMD_4x32(__m128i in) : m_sse(in) {}
633 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
634  explicit SIMD_4x32(__vector unsigned int in) : m_vmx(in) {}
635 #elif defined(BOTAN_SIMD_USE_NEON)
636  explicit SIMD_4x32(uint32x4_t in) : m_neon(in) {}
637 #endif
638 
639 #if defined(BOTAN_SIMD_USE_SSE2)
640  __m128i m_sse;
641 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
642  __vector unsigned int m_vmx;
643 #elif defined(BOTAN_SIMD_USE_NEON)
644  uint32x4_t m_neon;
645 #else
646  uint32_t m_scalar[4];
647 #endif
648  };
649 
651 
652 }
653 
654 #endif
SIMD_4x32 bswap() const
Definition: simd_32.h:524
SIMD_4x32(const uint32_t B[4])
Definition: simd_32.h:74
SIMD_4x32 operator~() const
Definition: simd_32.h:486
SIMD_4x32 operator&(const SIMD_4x32 &other) const
Definition: simd_32.h:363
void store_be(uint16_t in, uint8_t out[2])
Definition: loadstor.h:441
void copy_out_le(uint8_t out[], size_t out_bytes, const T in[])
Definition: loadstor.h:682
static SIMD_4x32 load_le(const void *in)
Definition: simd_32.h:128
T rotate_left(T input, size_t rot)
Definition: rotate.h:21
void rotate_right(size_t rot)
Definition: simd_32.h:315
void operator^=(const SIMD_4x32 &other)
Definition: simd_32.h:402
SIMD_4x32 operator^(const SIMD_4x32 &other) const
Definition: simd_32.h:343
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3)
Definition: simd_32.h:564
void operator+=(const SIMD_4x32 &other)
Definition: simd_32.h:370
T load_be(const uint8_t in[], size_t off)
Definition: loadstor.h:113
SIMD_4x32 operator>>(size_t shift) const
Definition: simd_32.h:469
SIMD_4x32 andc(const SIMD_4x32 &other) const
Definition: simd_32.h:500
static SIMD_4x32 load_be(const void *in)
Definition: simd_32.h:168
void store_le(uint8_t out[]) const
Definition: simd_32.h:209
SIMD_4x32 operator-(const SIMD_4x32 &other) const
Definition: simd_32.h:333
T load_le(const uint8_t in[], size_t off)
Definition: loadstor.h:129
static bool is_little_endian()
Definition: cpuid.h:74
Definition: alg_id.cpp:13
void bswap_4(T x[4])
Definition: bswap.h:112
uint16_t reverse_bytes(uint16_t val)
Definition: bswap.h:24
static SIMD_4x32 splat(uint32_t B)
Definition: simd_32.h:114
SIMD_4x32 operator|(const SIMD_4x32 &other) const
Definition: simd_32.h:353
SIMD_4x32 SIMD_32
Definition: simd_32.h:650
SIMD_4x32 operator+(const SIMD_4x32 &other) const
Definition: simd_32.h:323
void operator|=(const SIMD_4x32 &other)
Definition: simd_32.h:419
SIMD_4x32 operator<<(size_t shift) const
Definition: simd_32.h:451
void operator-=(const SIMD_4x32 &other)
Definition: simd_32.h:386
SIMD_4x32 & operator=(const SIMD_4x32 &other)=default
SIMD_4x32(uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
Definition: simd_32.h:93
static bool is_big_endian()
Definition: cpuid.h:83
void operator&=(const SIMD_4x32 &other)
Definition: simd_32.h:435
void rotate_left(size_t rot)
Definition: simd_32.h:288
void store_be(uint8_t out[]) const
Definition: simd_32.h:251
void copy_out_be(uint8_t out[], size_t out_bytes, const T in[])
Definition: loadstor.h:661
void store_le(uint16_t in, uint8_t out[2])
Definition: loadstor.h:457