Botan  2.19.1
Crypto and TLS for C++11
chacha_avx2.cpp
Go to the documentation of this file.
1 /*
2 * (C) 2018 Jack Lloyd
3 *
4 * Botan is released under the Simplified BSD License (see license.txt)
5 */
6 
7 #include <botan/chacha.h>
8 #include <botan/internal/simd_avx2.h>
9 
10 namespace Botan {
11 
12 //static
13 BOTAN_FUNC_ISA("avx2")
14 void ChaCha::chacha_avx2_x8(uint8_t output[64*8], uint32_t state[16], size_t rounds)
15  {
17 
18  BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
19  const SIMD_8x32 CTR0 = SIMD_8x32(0, 1, 2, 3, 4, 5, 6, 7);
20 
21  const uint32_t C = 0xFFFFFFFF - state[12];
22  const SIMD_8x32 CTR1 = SIMD_8x32(0, C < 1, C < 2, C < 3, C < 4, C < 5, C < 6, C < 7);
23 
24  SIMD_8x32 R00 = SIMD_8x32::splat(state[ 0]);
25  SIMD_8x32 R01 = SIMD_8x32::splat(state[ 1]);
26  SIMD_8x32 R02 = SIMD_8x32::splat(state[ 2]);
27  SIMD_8x32 R03 = SIMD_8x32::splat(state[ 3]);
28  SIMD_8x32 R04 = SIMD_8x32::splat(state[ 4]);
29  SIMD_8x32 R05 = SIMD_8x32::splat(state[ 5]);
30  SIMD_8x32 R06 = SIMD_8x32::splat(state[ 6]);
31  SIMD_8x32 R07 = SIMD_8x32::splat(state[ 7]);
32  SIMD_8x32 R08 = SIMD_8x32::splat(state[ 8]);
33  SIMD_8x32 R09 = SIMD_8x32::splat(state[ 9]);
34  SIMD_8x32 R10 = SIMD_8x32::splat(state[10]);
35  SIMD_8x32 R11 = SIMD_8x32::splat(state[11]);
36  SIMD_8x32 R12 = SIMD_8x32::splat(state[12]) + CTR0;
37  SIMD_8x32 R13 = SIMD_8x32::splat(state[13]) + CTR1;
38  SIMD_8x32 R14 = SIMD_8x32::splat(state[14]);
39  SIMD_8x32 R15 = SIMD_8x32::splat(state[15]);
40 
41  for(size_t r = 0; r != rounds / 2; ++r)
42  {
43  R00 += R04;
44  R01 += R05;
45  R02 += R06;
46  R03 += R07;
47 
48  R12 ^= R00;
49  R13 ^= R01;
50  R14 ^= R02;
51  R15 ^= R03;
52 
53  R12 = R12.rotl<16>();
54  R13 = R13.rotl<16>();
55  R14 = R14.rotl<16>();
56  R15 = R15.rotl<16>();
57 
58  R08 += R12;
59  R09 += R13;
60  R10 += R14;
61  R11 += R15;
62 
63  R04 ^= R08;
64  R05 ^= R09;
65  R06 ^= R10;
66  R07 ^= R11;
67 
68  R04 = R04.rotl<12>();
69  R05 = R05.rotl<12>();
70  R06 = R06.rotl<12>();
71  R07 = R07.rotl<12>();
72 
73  R00 += R04;
74  R01 += R05;
75  R02 += R06;
76  R03 += R07;
77 
78  R12 ^= R00;
79  R13 ^= R01;
80  R14 ^= R02;
81  R15 ^= R03;
82 
83  R12 = R12.rotl<8>();
84  R13 = R13.rotl<8>();
85  R14 = R14.rotl<8>();
86  R15 = R15.rotl<8>();
87 
88  R08 += R12;
89  R09 += R13;
90  R10 += R14;
91  R11 += R15;
92 
93  R04 ^= R08;
94  R05 ^= R09;
95  R06 ^= R10;
96  R07 ^= R11;
97 
98  R04 = R04.rotl<7>();
99  R05 = R05.rotl<7>();
100  R06 = R06.rotl<7>();
101  R07 = R07.rotl<7>();
102 
103  R00 += R05;
104  R01 += R06;
105  R02 += R07;
106  R03 += R04;
107 
108  R15 ^= R00;
109  R12 ^= R01;
110  R13 ^= R02;
111  R14 ^= R03;
112 
113  R15 = R15.rotl<16>();
114  R12 = R12.rotl<16>();
115  R13 = R13.rotl<16>();
116  R14 = R14.rotl<16>();
117 
118  R10 += R15;
119  R11 += R12;
120  R08 += R13;
121  R09 += R14;
122 
123  R05 ^= R10;
124  R06 ^= R11;
125  R07 ^= R08;
126  R04 ^= R09;
127 
128  R05 = R05.rotl<12>();
129  R06 = R06.rotl<12>();
130  R07 = R07.rotl<12>();
131  R04 = R04.rotl<12>();
132 
133  R00 += R05;
134  R01 += R06;
135  R02 += R07;
136  R03 += R04;
137 
138  R15 ^= R00;
139  R12 ^= R01;
140  R13 ^= R02;
141  R14 ^= R03;
142 
143  R15 = R15.rotl<8>();
144  R12 = R12.rotl<8>();
145  R13 = R13.rotl<8>();
146  R14 = R14.rotl<8>();
147 
148  R10 += R15;
149  R11 += R12;
150  R08 += R13;
151  R09 += R14;
152 
153  R05 ^= R10;
154  R06 ^= R11;
155  R07 ^= R08;
156  R04 ^= R09;
157 
158  R05 = R05.rotl<7>();
159  R06 = R06.rotl<7>();
160  R07 = R07.rotl<7>();
161  R04 = R04.rotl<7>();
162  }
163 
164  R00 += SIMD_8x32::splat(state[0]);
165  R01 += SIMD_8x32::splat(state[1]);
166  R02 += SIMD_8x32::splat(state[2]);
167  R03 += SIMD_8x32::splat(state[3]);
168  R04 += SIMD_8x32::splat(state[4]);
169  R05 += SIMD_8x32::splat(state[5]);
170  R06 += SIMD_8x32::splat(state[6]);
171  R07 += SIMD_8x32::splat(state[7]);
172  R08 += SIMD_8x32::splat(state[8]);
173  R09 += SIMD_8x32::splat(state[9]);
174  R10 += SIMD_8x32::splat(state[10]);
175  R11 += SIMD_8x32::splat(state[11]);
176  R12 += SIMD_8x32::splat(state[12]) + CTR0;
177  R13 += SIMD_8x32::splat(state[13]) + CTR1;
178  R14 += SIMD_8x32::splat(state[14]);
179  R15 += SIMD_8x32::splat(state[15]);
180 
181  SIMD_8x32::transpose(R00, R01, R02, R03, R04, R05, R06, R07);
182  SIMD_8x32::transpose(R08, R09, R10, R11, R12, R13, R14, R15);
183 
184  R00.store_le(output);
185  R08.store_le(output + 32*1);
186  R01.store_le(output + 32*2);
187  R09.store_le(output + 32*3);
188  R02.store_le(output + 32*4);
189  R10.store_le(output + 32*5);
190  R03.store_le(output + 32*6);
191  R11.store_le(output + 32*7);
192  R04.store_le(output + 32*8);
193  R12.store_le(output + 32*9);
194  R05.store_le(output + 32*10);
195  R13.store_le(output + 32*11);
196  R06.store_le(output + 32*12);
197  R14.store_le(output + 32*13);
198  R07.store_le(output + 32*14);
199  R15.store_le(output + 32*15);
200 
202 
203  state[12] += 8;
204  if(state[12] < 8)
205  state[13]++;
206  }
207 }
static SIMD_8x32 splat(uint32_t B)
Definition: simd_avx2.h:45
static void zero_registers()
Definition: simd_avx2.h:273
static void reset_registers()
Definition: simd_avx2.h:267
#define BOTAN_ASSERT(expr, assertion_made)
Definition: assert.h:55
#define BOTAN_FUNC_ISA(isa)
Definition: compiler.h:77
Definition: alg_id.cpp:13
static void transpose(SIMD_8x32 &B0, SIMD_8x32 &B1, SIMD_8x32 &B2, SIMD_8x32 &B3)
Definition: simd_avx2.h:237
SIMD_8x32 C