Botan  2.19.1
Crypto and TLS for C++11
chacha_simd32.cpp
Go to the documentation of this file.
1 /*
2 * (C) 2018 Jack Lloyd
3 *
4 * Botan is released under the Simplified BSD License (see license.txt)
5 */
6 
7 #include <botan/chacha.h>
8 #include <botan/internal/simd_32.h>
9 
10 namespace Botan {
11 
12 //static
13 void ChaCha::chacha_simd32_x4(uint8_t output[64*4], uint32_t state[16], size_t rounds)
14  {
15  BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");
16  const SIMD_4x32 CTR0 = SIMD_4x32(0, 1, 2, 3);
17 
18  const uint32_t C = 0xFFFFFFFF - state[12];
19  const SIMD_4x32 CTR1 = SIMD_4x32(0, C < 1, C < 2, C < 3);
20 
21  SIMD_4x32 R00 = SIMD_4x32::splat(state[ 0]);
22  SIMD_4x32 R01 = SIMD_4x32::splat(state[ 1]);
23  SIMD_4x32 R02 = SIMD_4x32::splat(state[ 2]);
24  SIMD_4x32 R03 = SIMD_4x32::splat(state[ 3]);
25  SIMD_4x32 R04 = SIMD_4x32::splat(state[ 4]);
26  SIMD_4x32 R05 = SIMD_4x32::splat(state[ 5]);
27  SIMD_4x32 R06 = SIMD_4x32::splat(state[ 6]);
28  SIMD_4x32 R07 = SIMD_4x32::splat(state[ 7]);
29  SIMD_4x32 R08 = SIMD_4x32::splat(state[ 8]);
30  SIMD_4x32 R09 = SIMD_4x32::splat(state[ 9]);
31  SIMD_4x32 R10 = SIMD_4x32::splat(state[10]);
32  SIMD_4x32 R11 = SIMD_4x32::splat(state[11]);
33  SIMD_4x32 R12 = SIMD_4x32::splat(state[12]) + CTR0;
34  SIMD_4x32 R13 = SIMD_4x32::splat(state[13]) + CTR1;
35  SIMD_4x32 R14 = SIMD_4x32::splat(state[14]);
36  SIMD_4x32 R15 = SIMD_4x32::splat(state[15]);
37 
38  for(size_t r = 0; r != rounds / 2; ++r)
39  {
40  R00 += R04;
41  R01 += R05;
42  R02 += R06;
43  R03 += R07;
44 
45  R12 ^= R00;
46  R13 ^= R01;
47  R14 ^= R02;
48  R15 ^= R03;
49 
50  R12 = R12.rotl<16>();
51  R13 = R13.rotl<16>();
52  R14 = R14.rotl<16>();
53  R15 = R15.rotl<16>();
54 
55  R08 += R12;
56  R09 += R13;
57  R10 += R14;
58  R11 += R15;
59 
60  R04 ^= R08;
61  R05 ^= R09;
62  R06 ^= R10;
63  R07 ^= R11;
64 
65  R04 = R04.rotl<12>();
66  R05 = R05.rotl<12>();
67  R06 = R06.rotl<12>();
68  R07 = R07.rotl<12>();
69 
70  R00 += R04;
71  R01 += R05;
72  R02 += R06;
73  R03 += R07;
74 
75  R12 ^= R00;
76  R13 ^= R01;
77  R14 ^= R02;
78  R15 ^= R03;
79 
80  R12 = R12.rotl<8>();
81  R13 = R13.rotl<8>();
82  R14 = R14.rotl<8>();
83  R15 = R15.rotl<8>();
84 
85  R08 += R12;
86  R09 += R13;
87  R10 += R14;
88  R11 += R15;
89 
90  R04 ^= R08;
91  R05 ^= R09;
92  R06 ^= R10;
93  R07 ^= R11;
94 
95  R04 = R04.rotl<7>();
96  R05 = R05.rotl<7>();
97  R06 = R06.rotl<7>();
98  R07 = R07.rotl<7>();
99 
100  R00 += R05;
101  R01 += R06;
102  R02 += R07;
103  R03 += R04;
104 
105  R15 ^= R00;
106  R12 ^= R01;
107  R13 ^= R02;
108  R14 ^= R03;
109 
110  R15 = R15.rotl<16>();
111  R12 = R12.rotl<16>();
112  R13 = R13.rotl<16>();
113  R14 = R14.rotl<16>();
114 
115  R10 += R15;
116  R11 += R12;
117  R08 += R13;
118  R09 += R14;
119 
120  R05 ^= R10;
121  R06 ^= R11;
122  R07 ^= R08;
123  R04 ^= R09;
124 
125  R05 = R05.rotl<12>();
126  R06 = R06.rotl<12>();
127  R07 = R07.rotl<12>();
128  R04 = R04.rotl<12>();
129 
130  R00 += R05;
131  R01 += R06;
132  R02 += R07;
133  R03 += R04;
134 
135  R15 ^= R00;
136  R12 ^= R01;
137  R13 ^= R02;
138  R14 ^= R03;
139 
140  R15 = R15.rotl<8>();
141  R12 = R12.rotl<8>();
142  R13 = R13.rotl<8>();
143  R14 = R14.rotl<8>();
144 
145  R10 += R15;
146  R11 += R12;
147  R08 += R13;
148  R09 += R14;
149 
150  R05 ^= R10;
151  R06 ^= R11;
152  R07 ^= R08;
153  R04 ^= R09;
154 
155  R05 = R05.rotl<7>();
156  R06 = R06.rotl<7>();
157  R07 = R07.rotl<7>();
158  R04 = R04.rotl<7>();
159  }
160 
161  R00 += SIMD_4x32::splat(state[0]);
162  R01 += SIMD_4x32::splat(state[1]);
163  R02 += SIMD_4x32::splat(state[2]);
164  R03 += SIMD_4x32::splat(state[3]);
165  R04 += SIMD_4x32::splat(state[4]);
166  R05 += SIMD_4x32::splat(state[5]);
167  R06 += SIMD_4x32::splat(state[6]);
168  R07 += SIMD_4x32::splat(state[7]);
169  R08 += SIMD_4x32::splat(state[8]);
170  R09 += SIMD_4x32::splat(state[9]);
171  R10 += SIMD_4x32::splat(state[10]);
172  R11 += SIMD_4x32::splat(state[11]);
173  R12 += SIMD_4x32::splat(state[12]) + CTR0;
174  R13 += SIMD_4x32::splat(state[13]) + CTR1;
175  R14 += SIMD_4x32::splat(state[14]);
176  R15 += SIMD_4x32::splat(state[15]);
177 
178  SIMD_4x32::transpose(R00, R01, R02, R03);
179  SIMD_4x32::transpose(R04, R05, R06, R07);
180  SIMD_4x32::transpose(R08, R09, R10, R11);
181  SIMD_4x32::transpose(R12, R13, R14, R15);
182 
183  R00.store_le(output + 0*16);
184  R04.store_le(output + 1*16);
185  R08.store_le(output + 2*16);
186  R12.store_le(output + 3*16);
187  R01.store_le(output + 4*16);
188  R05.store_le(output + 5*16);
189  R09.store_le(output + 6*16);
190  R13.store_le(output + 7*16);
191  R02.store_le(output + 8*16);
192  R06.store_le(output + 9*16);
193  R10.store_le(output + 10*16);
194  R14.store_le(output + 11*16);
195  R03.store_le(output + 12*16);
196  R07.store_le(output + 13*16);
197  R11.store_le(output + 14*16);
198  R15.store_le(output + 15*16);
199 
200  state[12] += 4;
201  if(state[12] < 4)
202  state[13]++;
203  }
204 
205 }
static void transpose(SIMD_4x32 &B0, SIMD_4x32 &B1, SIMD_4x32 &B2, SIMD_4x32 &B3)
Definition: simd_32.h:564
#define BOTAN_ASSERT(expr, assertion_made)
Definition: assert.h:55
Definition: alg_id.cpp:13
static SIMD_4x32 splat(uint32_t B)
Definition: simd_32.h:131
SIMD_8x32 C