Botan  2.1.0
Crypto and TLS for C++11
Public Member Functions | Static Public Member Functions | List of all members
Botan::SIMD_4x32 Class Referencefinal

#include <simd_32.h>

Public Member Functions

SIMD_4x32 andc (const SIMD_4x32 &other) const
 
SIMD_4x32 bswap () const
 
SIMD_4x32 operator& (const SIMD_4x32 &other) const
 
void operator&= (const SIMD_4x32 &other)
 
SIMD_4x32 operator+ (const SIMD_4x32 &other) const
 
void operator+= (const SIMD_4x32 &other)
 
SIMD_4x32 operator- (const SIMD_4x32 &other) const
 
void operator-= (const SIMD_4x32 &other)
 
SIMD_4x32 operator<< (size_t shift) const
 
SIMD_4x32operator= (const SIMD_4x32 &other)=default
 
SIMD_4x32operator= (SIMD_4x32 &&other)=default
 
SIMD_4x32 operator>> (size_t shift) const
 
SIMD_4x32 operator^ (const SIMD_4x32 &other) const
 
void operator^= (const SIMD_4x32 &other)
 
SIMD_4x32 operator| (const SIMD_4x32 &other) const
 
void operator|= (const SIMD_4x32 &other)
 
SIMD_4x32 operator~ () const
 
void rotate_left (size_t rot)
 
void rotate_right (size_t rot)
 
 SIMD_4x32 (const SIMD_4x32 &other)=default
 
 SIMD_4x32 (SIMD_4x32 &&other)=default
 
 SIMD_4x32 ()
 
 SIMD_4x32 (const uint32_t B[4])
 
 SIMD_4x32 (uint32_t B0, uint32_t B1, uint32_t B2, uint32_t B3)
 
void store_be (uint8_t out[]) const
 
void store_le (uint8_t out[]) const
 

Static Public Member Functions

static SIMD_4x32 load_be (const void *in)
 
static SIMD_4x32 load_le (const void *in)
 
static SIMD_4x32 splat (uint32_t B)
 
static void transpose (SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3)
 

Detailed Description

4x32 bit SIMD register

This class is not a general purpose SIMD type, and only offers instructions needed for evaluation of specific crypto primitives. For example it does not currently have equality operators of any kind.

Implemented for SSE2, VMX (Altivec), and NEON.

Definition at line 43 of file simd_32.h.

Constructor & Destructor Documentation

Botan::SIMD_4x32::SIMD_4x32 ( const SIMD_4x32 other)
default
Botan::SIMD_4x32::SIMD_4x32 ( SIMD_4x32 &&  other)
default
Botan::SIMD_4x32::SIMD_4x32 ( )
inline

Zero initialize SIMD register with 4 32-bit elements

Definition at line 58 of file simd_32.h.

Referenced by andc(), bswap(), load_be(), load_le(), operator<<(), operator>>(), operator~(), and splat().

59  {
60 #if defined(BOTAN_SIMD_USE_SSE2)
61  ::memset(&m_sse, 0, sizeof(m_sse));
62 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
63  m_vmx = vec_splat_u32(0);
64 #elif defined(BOTAN_SIMD_USE_NEON)
65  m_neon = vdupq_n_u32(0);
66 #else
67  ::memset(m_scalar, 0, sizeof(m_scalar));
68 #endif
69  }
Botan::SIMD_4x32::SIMD_4x32 ( const uint32_t  B[4])
inlineexplicit

Load SIMD register with 4 32-bit elements

Definition at line 74 of file simd_32.h.

75  {
76 #if defined(BOTAN_SIMD_USE_SSE2)
77  m_sse = _mm_loadu_si128(reinterpret_cast<const __m128i*>(B));
78 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
79  m_vmx = (__vector unsigned int){B[0], B[1], B[2], B[3]};
80 #elif defined(BOTAN_SIMD_USE_NEON)
81  m_neon = vld1q_u32(B);
82 #else
83  m_scalar[0] = B[0];
84  m_scalar[1] = B[1];
85  m_scalar[2] = B[2];
86  m_scalar[3] = B[3];
87 #endif
88  }
Botan::SIMD_4x32::SIMD_4x32 ( uint32_t  B0,
uint32_t  B1,
uint32_t  B2,
uint32_t  B3 
)
inline

Load SIMD register with 4 32-bit elements

Definition at line 93 of file simd_32.h.

94  {
95 #if defined(BOTAN_SIMD_USE_SSE2)
96  m_sse = _mm_set_epi32(B3, B2, B1, B0);
97 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
98  m_vmx = (__vector unsigned int){B0, B1, B2, B3};
99 #elif defined(BOTAN_SIMD_USE_NEON)
100  // Better way to do this?
101  const uint32_t B[4] = { B0, B1, B2, B3 };
102  m_neon = vld1q_u32(B);
103 #else
104  m_scalar[0] = B0;
105  m_scalar[1] = B1;
106  m_scalar[2] = B2;
107  m_scalar[3] = B3;
108 #endif
109  }

Member Function Documentation

SIMD_4x32 Botan::SIMD_4x32::andc ( const SIMD_4x32 other) const
inline

Definition at line 500 of file simd_32.h.

References SIMD_4x32().

501  {
502 #if defined(BOTAN_SIMD_USE_SSE2)
503  return SIMD_4x32(_mm_andnot_si128(m_sse, other.m_sse));
504 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
505  /*
506  AltiVec does arg1 & ~arg2 rather than SSE's ~arg1 & arg2
507  so swap the arguments
508  */
509  return SIMD_4x32(vec_andc(other.m_vmx, m_vmx));
510 #elif defined(BOTAN_SIMD_USE_NEON)
511  // NEON is also a & ~b
512  return SIMD_4x32(vbicq_u32(other.m_neon, m_neon));
513 #else
514  return SIMD_4x32((~m_scalar[0]) & other.m_scalar[0],
515  (~m_scalar[1]) & other.m_scalar[1],
516  (~m_scalar[2]) & other.m_scalar[2],
517  (~m_scalar[3]) & other.m_scalar[3]);
518 #endif
519  }
SIMD_4x32 Botan::SIMD_4x32::bswap ( ) const
inline

Return copy *this with each word byte swapped

Definition at line 524 of file simd_32.h.

References Botan::reverse_bytes(), rotate_left(), rotate_right(), SIMD_4x32(), and splat().

Referenced by load_be(), store_be(), and store_le().

525  {
526 #if defined(BOTAN_SIMD_USE_SSE2)
527 
528  __m128i T = m_sse;
529  T = _mm_shufflehi_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
530  T = _mm_shufflelo_epi16(T, _MM_SHUFFLE(2, 3, 0, 1));
531  return SIMD_4x32(_mm_or_si128(_mm_srli_epi16(T, 8), _mm_slli_epi16(T, 8)));
532 
533 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
534 
535  __vector unsigned char perm = vec_lvsl(0, static_cast<uint32_t*>(nullptr));
536  perm = vec_xor(perm, vec_splat_u8(3));
537  return SIMD_4x32(vec_perm(m_vmx, m_vmx, perm));
538 
539 #elif defined(BOTAN_SIMD_USE_NEON)
540 
541  //return SIMD_4x32(vrev64q_u32(m_neon));
542 
543  // FIXME this is really slow
544  SIMD_4x32 ror8(m_neon);
545  ror8.rotate_right(8);
546  SIMD_4x32 rol8(m_neon);
547  rol8.rotate_left(8);
548 
549  SIMD_4x32 mask1 = SIMD_4x32::splat(0xFF00FF00);
550  SIMD_4x32 mask2 = SIMD_4x32::splat(0x00FF00FF);
551  return (ror8 & mask1) | (rol8 & mask2);
552 #else
553  // scalar
554  return SIMD_4x32(reverse_bytes(m_scalar[0]),
555  reverse_bytes(m_scalar[1]),
556  reverse_bytes(m_scalar[2]),
557  reverse_bytes(m_scalar[3]));
558 #endif
559  }
uint16_t reverse_bytes(uint16_t val)
Definition: bswap.h:24
static SIMD_4x32 splat(uint32_t B)
Definition: simd_32.h:114
static SIMD_4x32 Botan::SIMD_4x32::load_be ( const void *  in)
inlinestatic

Load a SIMD register with big-endian convention

Definition at line 168 of file simd_32.h.

References bswap(), Botan::bswap_4(), Botan::CPUID::is_little_endian(), Botan::load_be(), load_le(), and SIMD_4x32().

169  {
170 #if defined(BOTAN_SIMD_USE_SSE2)
171 
172  return load_le(in).bswap();
173 
174 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
175 
176  const uint32_t* in_32 = static_cast<const uint32_t*>(in);
177  __vector unsigned int R0 = vec_ld(0, in_32);
178  __vector unsigned int R1 = vec_ld(12, in_32);
179  __vector unsigned char perm = vec_lvsl(0, in_32);
180 
182  {
183  perm = vec_xor(perm, vec_splat_u8(3)); // bswap vector
184  }
185 
186  R0 = vec_perm(R0, R1, perm);
187  return SIMD_4x32(R0);
188 
189 #elif defined(BOTAN_SIMD_USE_NEON)
190 
191  uint32_t in32[4];
192  std::memcpy(in32, in, 16);
194  {
195  bswap_4(in32);
196  }
197  return SIMD_4x32(vld1q_u32(in32));
198 
199 #else
200  SIMD_4x32 out;
201  Botan::load_be(out.m_scalar, static_cast<const uint8_t*>(in), 4);
202  return out;
203 #endif
204  }
SIMD_4x32 bswap() const
Definition: simd_32.h:524
static SIMD_4x32 load_le(const void *in)
Definition: simd_32.h:128
T load_be(const uint8_t in[], size_t off)
Definition: loadstor.h:113
static bool is_little_endian()
Definition: cpuid.h:74
void bswap_4(T x[4])
Definition: bswap.h:112
static SIMD_4x32 Botan::SIMD_4x32::load_le ( const void *  in)
inlinestatic

Load a SIMD register with little-endian convention

Definition at line 128 of file simd_32.h.

References Botan::bswap_4(), Botan::CPUID::is_big_endian(), Botan::load_le(), and SIMD_4x32().

Referenced by load_be(), Botan::Serpent::simd_decrypt_4(), and Botan::Serpent::simd_encrypt_4().

129  {
130 #if defined(BOTAN_SIMD_USE_SSE2)
131  return SIMD_4x32(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
132 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
133  const uint32_t* in_32 = static_cast<const uint32_t*>(in);
134 
135  __vector unsigned int R0 = vec_ld(0, in_32);
136  __vector unsigned int R1 = vec_ld(12, in_32);
137 
138  __vector unsigned char perm = vec_lvsl(0, in_32);
139 
141  {
142  perm = vec_xor(perm, vec_splat_u8(3)); // bswap vector
143  }
144 
145  R0 = vec_perm(R0, R1, perm);
146 
147  return SIMD_4x32(R0);
148 #elif defined(BOTAN_SIMD_USE_NEON)
149 
150  uint32_t in32[4];
151  std::memcpy(in32, in, 16);
153  {
154  bswap_4(in32);
155  }
156  return SIMD_4x32(vld1q_u32(in32));
157 
158 #else
159  SIMD_4x32 out;
160  Botan::load_le(out.m_scalar, static_cast<const uint8_t*>(in), 4);
161  return out;
162 #endif
163  }
T load_le(const uint8_t in[], size_t off)
Definition: loadstor.h:129
void bswap_4(T x[4])
Definition: bswap.h:112
static bool is_big_endian()
Definition: cpuid.h:83
SIMD_4x32 Botan::SIMD_4x32::operator& ( const SIMD_4x32 other) const
inline

Binary AND elements of a SIMD vector

Definition at line 363 of file simd_32.h.

364  {
365  SIMD_4x32 retval(*this);
366  retval &= other;
367  return retval;
368  }
void Botan::SIMD_4x32::operator&= ( const SIMD_4x32 other)
inline

Definition at line 435 of file simd_32.h.

436  {
437 #if defined(BOTAN_SIMD_USE_SSE2)
438  m_sse = _mm_and_si128(m_sse, other.m_sse);
439 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
440  m_vmx = vec_and(m_vmx, other.m_vmx);
441 #elif defined(BOTAN_SIMD_USE_NEON)
442  m_neon = vandq_u32(m_neon, other.m_neon);
443 #else
444  m_scalar[0] &= other.m_scalar[0];
445  m_scalar[1] &= other.m_scalar[1];
446  m_scalar[2] &= other.m_scalar[2];
447  m_scalar[3] &= other.m_scalar[3];
448 #endif
449  }
SIMD_4x32 Botan::SIMD_4x32::operator+ ( const SIMD_4x32 other) const
inline

Add elements of a SIMD vector

Definition at line 323 of file simd_32.h.

324  {
325  SIMD_4x32 retval(*this);
326  retval += other;
327  return retval;
328  }
void Botan::SIMD_4x32::operator+= ( const SIMD_4x32 other)
inline

Definition at line 370 of file simd_32.h.

371  {
372 #if defined(BOTAN_SIMD_USE_SSE2)
373  m_sse = _mm_add_epi32(m_sse, other.m_sse);
374 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
375  m_vmx = vec_add(m_vmx, other.m_vmx);
376 #elif defined(BOTAN_SIMD_USE_NEON)
377  m_neon = vaddq_u32(m_neon, other.m_neon);
378 #else
379  m_scalar[0] += other.m_scalar[0];
380  m_scalar[1] += other.m_scalar[1];
381  m_scalar[2] += other.m_scalar[2];
382  m_scalar[3] += other.m_scalar[3];
383 #endif
384  }
SIMD_4x32 Botan::SIMD_4x32::operator- ( const SIMD_4x32 other) const
inline

Subtract elements of a SIMD vector

Definition at line 333 of file simd_32.h.

334  {
335  SIMD_4x32 retval(*this);
336  retval -= other;
337  return retval;
338  }
void Botan::SIMD_4x32::operator-= ( const SIMD_4x32 other)
inline

Definition at line 386 of file simd_32.h.

387  {
388 #if defined(BOTAN_SIMD_USE_SSE2)
389  m_sse = _mm_sub_epi32(m_sse, other.m_sse);
390 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
391  m_vmx = vec_sub(m_vmx, other.m_vmx);
392 #elif defined(BOTAN_SIMD_USE_NEON)
393  m_neon = vsubq_u32(m_neon, other.m_neon);
394 #else
395  m_scalar[0] -= other.m_scalar[0];
396  m_scalar[1] -= other.m_scalar[1];
397  m_scalar[2] -= other.m_scalar[2];
398  m_scalar[3] -= other.m_scalar[3];
399 #endif
400  }
SIMD_4x32 Botan::SIMD_4x32::operator<< ( size_t  shift) const
inline

Definition at line 451 of file simd_32.h.

References SIMD_4x32().

452  {
453 #if defined(BOTAN_SIMD_USE_SSE2)
454  return SIMD_4x32(_mm_slli_epi32(m_sse, static_cast<int>(shift)));
455 
456 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
457  const unsigned int s = static_cast<unsigned int>(shift);
458  return SIMD_4x32(vec_sl(m_vmx, (__vector unsigned int){s, s, s, s}));
459 #elif defined(BOTAN_SIMD_USE_NEON)
460  return SIMD_4x32(vshlq_n_u32(m_neon, static_cast<int>(shift)));
461 #else
462  return SIMD_4x32(m_scalar[0] << shift,
463  m_scalar[1] << shift,
464  m_scalar[2] << shift,
465  m_scalar[3] << shift);
466 #endif
467  }
SIMD_4x32& Botan::SIMD_4x32::operator= ( const SIMD_4x32 other)
default
SIMD_4x32& Botan::SIMD_4x32::operator= ( SIMD_4x32 &&  other)
default
SIMD_4x32 Botan::SIMD_4x32::operator>> ( size_t  shift) const
inline

Definition at line 469 of file simd_32.h.

References SIMD_4x32().

470  {
471 #if defined(BOTAN_SIMD_USE_SSE2)
472  return SIMD_4x32(_mm_srli_epi32(m_sse, static_cast<int>(shift)));
473 
474 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
475  const unsigned int s = static_cast<unsigned int>(shift);
476  return SIMD_4x32(vec_sr(m_vmx, (__vector unsigned int){s, s, s, s}));
477 #elif defined(BOTAN_SIMD_USE_NEON)
478  return SIMD_4x32(vshrq_n_u32(m_neon, static_cast<int>(shift)));
479 #else
480  return SIMD_4x32(m_scalar[0] >> shift, m_scalar[1] >> shift,
481  m_scalar[2] >> shift, m_scalar[3] >> shift);
482 
483 #endif
484  }
SIMD_4x32 Botan::SIMD_4x32::operator^ ( const SIMD_4x32 other) const
inline

XOR elements of a SIMD vector

Definition at line 343 of file simd_32.h.

344  {
345  SIMD_4x32 retval(*this);
346  retval ^= other;
347  return retval;
348  }
void Botan::SIMD_4x32::operator^= ( const SIMD_4x32 other)
inline

Definition at line 402 of file simd_32.h.

403  {
404 #if defined(BOTAN_SIMD_USE_SSE2)
405  m_sse = _mm_xor_si128(m_sse, other.m_sse);
406 
407 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
408  m_vmx = vec_xor(m_vmx, other.m_vmx);
409 #elif defined(BOTAN_SIMD_USE_NEON)
410  m_neon = veorq_u32(m_neon, other.m_neon);
411 #else
412  m_scalar[0] ^= other.m_scalar[0];
413  m_scalar[1] ^= other.m_scalar[1];
414  m_scalar[2] ^= other.m_scalar[2];
415  m_scalar[3] ^= other.m_scalar[3];
416 #endif
417  }
SIMD_4x32 Botan::SIMD_4x32::operator| ( const SIMD_4x32 other) const
inline

Binary OR elements of a SIMD vector

Definition at line 353 of file simd_32.h.

354  {
355  SIMD_4x32 retval(*this);
356  retval |= other;
357  return retval;
358  }
void Botan::SIMD_4x32::operator|= ( const SIMD_4x32 other)
inline

Definition at line 419 of file simd_32.h.

420  {
421 #if defined(BOTAN_SIMD_USE_SSE2)
422  m_sse = _mm_or_si128(m_sse, other.m_sse);
423 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
424  m_vmx = vec_or(m_vmx, other.m_vmx);
425 #elif defined(BOTAN_SIMD_USE_NEON)
426  m_neon = vorrq_u32(m_neon, other.m_neon);
427 #else
428  m_scalar[0] |= other.m_scalar[0];
429  m_scalar[1] |= other.m_scalar[1];
430  m_scalar[2] |= other.m_scalar[2];
431  m_scalar[3] |= other.m_scalar[3];
432 #endif
433  }
SIMD_4x32 Botan::SIMD_4x32::operator~ ( ) const
inline

Definition at line 486 of file simd_32.h.

References SIMD_4x32().

487  {
488 #if defined(BOTAN_SIMD_USE_SSE2)
489  return SIMD_4x32(_mm_xor_si128(m_sse, _mm_set1_epi32(0xFFFFFFFF)));
490 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
491  return SIMD_4x32(vec_nor(m_vmx, m_vmx));
492 #elif defined(BOTAN_SIMD_USE_NEON)
493  return SIMD_4x32(vmvnq_u32(m_neon));
494 #else
495  return SIMD_4x32(~m_scalar[0], ~m_scalar[1], ~m_scalar[2], ~m_scalar[3]);
496 #endif
497  }
void Botan::SIMD_4x32::rotate_left ( size_t  rot)
inline

Rotate each element of SIMD register n bits left

Definition at line 288 of file simd_32.h.

References Botan::rotate_left().

Referenced by bswap(), and rotate_right().

289  {
290 #if defined(BOTAN_SIMD_USE_SSE2)
291 
292  m_sse = _mm_or_si128(_mm_slli_epi32(m_sse, static_cast<int>(rot)),
293  _mm_srli_epi32(m_sse, static_cast<int>(32-rot)));
294 
295 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
296 
297  const unsigned int r = static_cast<unsigned int>(rot);
298  m_vmx = vec_rl(m_vmx, (__vector unsigned int){r, r, r, r});
299 
300 #elif defined(BOTAN_SIMD_USE_NEON)
301  m_neon = vorrq_u32(vshlq_n_u32(m_neon, static_cast<int>(rot)),
302  vshrq_n_u32(m_neon, static_cast<int>(32-rot)));
303 
304 #else
305  m_scalar[0] = Botan::rotate_left(m_scalar[0], rot);
306  m_scalar[1] = Botan::rotate_left(m_scalar[1], rot);
307  m_scalar[2] = Botan::rotate_left(m_scalar[2], rot);
308  m_scalar[3] = Botan::rotate_left(m_scalar[3], rot);
309 #endif
310  }
T rotate_left(T input, size_t rot)
Definition: rotate.h:21
void Botan::SIMD_4x32::rotate_right ( size_t  rot)
inline

Rotate each element of SIMD register n bits right

Definition at line 315 of file simd_32.h.

References rotate_left().

Referenced by bswap().

316  {
317  rotate_left(32 - rot);
318  }
void rotate_left(size_t rot)
Definition: simd_32.h:288
static SIMD_4x32 Botan::SIMD_4x32::splat ( uint32_t  B)
inlinestatic

Load SIMD register with one 32-bit element repeated

Definition at line 114 of file simd_32.h.

References SIMD_4x32().

Referenced by bswap().

115  {
116 #if defined(BOTAN_SIMD_USE_SSE2)
117  return SIMD_4x32(_mm_set1_epi32(B));
118 #elif defined(BOTAN_SIMD_USE_ARM)
119  return SIMD_4x32(vdupq_n_u32(B));
120 #else
121  return SIMD_4x32(B, B, B, B);
122 #endif
123  }
void Botan::SIMD_4x32::store_be ( uint8_t  out[]) const
inline

Load a SIMD register with big-endian convention

Definition at line 251 of file simd_32.h.

References bswap(), Botan::copy_out_be(), Botan::CPUID::is_little_endian(), Botan::store_be(), and store_le().

Referenced by store_le().

252  {
253 #if defined(BOTAN_SIMD_USE_SSE2)
254 
255  bswap().store_le(out);
256 
257 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
258 
259  union {
260  __vector unsigned int V;
261  uint32_t R[4];
262  } vec;
263  vec.V = m_vmx;
264  Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
265 
266 #elif defined(BOTAN_SIMD_USE_NEON)
267 
269  {
270  SIMD_4x32 swap = bswap();
271  swap.store_le(out);
272  }
273  else
274  {
275  uint32_t out32[4] = { 0 };
276  vst1q_u32(out32, m_neon);
277  copy_out_be(out, 16, out32);
278  }
279 
280 #else
281  Botan::store_be(out, m_scalar[0], m_scalar[1], m_scalar[2], m_scalar[3]);
282 #endif
283  }
SIMD_4x32 bswap() const
Definition: simd_32.h:524
void store_be(uint16_t in, uint8_t out[2])
Definition: loadstor.h:441
void store_le(uint8_t out[]) const
Definition: simd_32.h:209
static bool is_little_endian()
Definition: cpuid.h:74
void copy_out_be(uint8_t out[], size_t out_bytes, const T in[])
Definition: loadstor.h:661
void Botan::SIMD_4x32::store_le ( uint8_t  out[]) const
inline

Load a SIMD register with little-endian convention

Definition at line 209 of file simd_32.h.

References bswap(), Botan::copy_out_le(), Botan::CPUID::is_big_endian(), store_be(), Botan::store_be(), and Botan::store_le().

Referenced by Botan::Serpent::simd_decrypt_4(), Botan::Serpent::simd_encrypt_4(), and store_be().

210  {
211 #if defined(BOTAN_SIMD_USE_SSE2)
212 
213  _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_sse);
214 
215 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
216 
217  __vector unsigned char perm = vec_lvsl(0, static_cast<uint32_t*>(nullptr));
219  {
220  perm = vec_xor(perm, vec_splat_u8(3)); // bswap vector
221  }
222 
223  union {
224  __vector unsigned int V;
225  uint32_t R[4];
226  } vec;
227  vec.V = vec_perm(m_vmx, m_vmx, perm);
228  Botan::store_be(out, vec.R[0], vec.R[1], vec.R[2], vec.R[3]);
229 
230 #elif defined(BOTAN_SIMD_USE_NEON)
231 
233  {
234  SIMD_4x32 swap = bswap();
235  swap.store_be(out);
236  }
237  else
238  {
239  uint32_t out32[4] = { 0 };
240  vst1q_u32(out32, m_neon);
241  copy_out_le(out, 16, out32);
242  }
243 #else
244  Botan::store_le(out, m_scalar[0], m_scalar[1], m_scalar[2], m_scalar[3]);
245 #endif
246  }
SIMD_4x32 bswap() const
Definition: simd_32.h:524
void store_be(uint16_t in, uint8_t out[2])
Definition: loadstor.h:441
void copy_out_le(uint8_t out[], size_t out_bytes, const T in[])
Definition: loadstor.h:682
static bool is_big_endian()
Definition: cpuid.h:83
void store_le(uint16_t in, uint8_t out[2])
Definition: loadstor.h:457
static void Botan::SIMD_4x32::transpose ( SIMD_4x32 B0,
SIMD_4x32 B1,
SIMD_4x32 B2,
SIMD_4x32 B3 
)
inlinestatic

4x4 Transposition on SIMD registers

Definition at line 564 of file simd_32.h.

Referenced by Botan::Serpent::simd_decrypt_4(), and Botan::Serpent::simd_encrypt_4().

566  {
567 #if defined(BOTAN_SIMD_USE_SSE2)
568  const __m128i T0 = _mm_unpacklo_epi32(B0.m_sse, B1.m_sse);
569  const __m128i T1 = _mm_unpacklo_epi32(B2.m_sse, B3.m_sse);
570  const __m128i T2 = _mm_unpackhi_epi32(B0.m_sse, B1.m_sse);
571  const __m128i T3 = _mm_unpackhi_epi32(B2.m_sse, B3.m_sse);
572 
573  B0.m_sse = _mm_unpacklo_epi64(T0, T1);
574  B1.m_sse = _mm_unpackhi_epi64(T0, T1);
575  B2.m_sse = _mm_unpacklo_epi64(T2, T3);
576  B3.m_sse = _mm_unpackhi_epi64(T2, T3);
577 #elif defined(BOTAN_SIMD_USE_ALTIVEC)
578  const __vector unsigned int T0 = vec_mergeh(B0.m_vmx, B2.m_vmx);
579  const __vector unsigned int T1 = vec_mergeh(B1.m_vmx, B3.m_vmx);
580  const __vector unsigned int T2 = vec_mergel(B0.m_vmx, B2.m_vmx);
581  const __vector unsigned int T3 = vec_mergel(B1.m_vmx, B3.m_vmx);
582 
583  B0.m_vmx = vec_mergeh(T0, T1);
584  B1.m_vmx = vec_mergel(T0, T1);
585  B2.m_vmx = vec_mergeh(T2, T3);
586  B3.m_vmx = vec_mergel(T2, T3);
587 #elif defined(BOTAN_SIMD_USE_NEON)
588 
589 #if defined(BOTAN_TARGET_ARCH_IS_ARM32)
590 
591  const uint32x4x2_t T0 = vzipq_u32(B0.m_neon, B2.m_neon);
592  const uint32x4x2_t T1 = vzipq_u32(B1.m_neon, B3.m_neon);
593  const uint32x4x2_t O0 = vzipq_u32(T0.val[0], T1.val[0]);
594  const uint32x4x2_t O1 = vzipq_u32(T0.val[1], T1.val[1]);
595 
596  B0.m_neon = O0.val[0];
597  B1.m_neon = O0.val[1];
598  B2.m_neon = O1.val[0];
599  B3.m_neon = O1.val[1];
600 
601 #elif defined(BOTAN_TARGET_ARCH_IS_ARM64)
602  const uint32x4_t T0 = vzip1q_u32(B0.m_neon, B2.m_neon);
603  const uint32x4_t T2 = vzip2q_u32(B0.m_neon, B2.m_neon);
604 
605  const uint32x4_t T1 = vzip1q_u32(B1.m_neon, B3.m_neon);
606  const uint32x4_t T3 = vzip2q_u32(B1.m_neon, B3.m_neon);
607 
608  B0.m_neon = vzip1q_u32(T0, T1);
609  B1.m_neon = vzip2q_u32(T0, T1);
610 
611  B2.m_neon = vzip1q_u32(T2, T3);
612  B3.m_neon = vzip2q_u32(T2, T3);
613 #endif
614 
615 #else
616  // scalar
617  SIMD_4x32 T0(B0.m_scalar[0], B1.m_scalar[0], B2.m_scalar[0], B3.m_scalar[0]);
618  SIMD_4x32 T1(B0.m_scalar[1], B1.m_scalar[1], B2.m_scalar[1], B3.m_scalar[1]);
619  SIMD_4x32 T2(B0.m_scalar[2], B1.m_scalar[2], B2.m_scalar[2], B3.m_scalar[2]);
620  SIMD_4x32 T3(B0.m_scalar[3], B1.m_scalar[3], B2.m_scalar[3], B3.m_scalar[3]);
621 
622  B0 = T0;
623  B1 = T1;
624  B2 = T2;
625  B3 = T3;
626 #endif
627  }

The documentation for this class was generated from the following file: