Botan  2.19.1
Crypto and TLS for C++11
aes_armv8.cpp
Go to the documentation of this file.
1 /*
2 * AES using ARMv8
3 * Contributed by Jeffrey Walton
4 *
5 * Further changes
6 * (C) 2017,2018 Jack Lloyd
7 *
8 * Botan is released under the Simplified BSD License (see license.txt)
9 */
10 
11 #include <botan/aes.h>
12 #include <botan/loadstor.h>
13 #include <arm_neon.h>
14 
15 namespace Botan {
16 
17 #define AES_ENC_4_ROUNDS(K) \
18  do \
19  { \
20  B0 = vaesmcq_u8(vaeseq_u8(B0, K)); \
21  B1 = vaesmcq_u8(vaeseq_u8(B1, K)); \
22  B2 = vaesmcq_u8(vaeseq_u8(B2, K)); \
23  B3 = vaesmcq_u8(vaeseq_u8(B3, K)); \
24  } while(0)
25 
26 #define AES_ENC_4_LAST_ROUNDS(K, K2) \
27  do \
28  { \
29  B0 = veorq_u8(vaeseq_u8(B0, K), K2); \
30  B1 = veorq_u8(vaeseq_u8(B1, K), K2); \
31  B2 = veorq_u8(vaeseq_u8(B2, K), K2); \
32  B3 = veorq_u8(vaeseq_u8(B3, K), K2); \
33  } while(0)
34 
35 #define AES_DEC_4_ROUNDS(K) \
36  do \
37  { \
38  B0 = vaesimcq_u8(vaesdq_u8(B0, K)); \
39  B1 = vaesimcq_u8(vaesdq_u8(B1, K)); \
40  B2 = vaesimcq_u8(vaesdq_u8(B2, K)); \
41  B3 = vaesimcq_u8(vaesdq_u8(B3, K)); \
42  } while(0)
43 
44 #define AES_DEC_4_LAST_ROUNDS(K, K2) \
45  do \
46  { \
47  B0 = veorq_u8(vaesdq_u8(B0, K), K2); \
48  B1 = veorq_u8(vaesdq_u8(B1, K), K2); \
49  B2 = veorq_u8(vaesdq_u8(B2, K), K2); \
50  B3 = veorq_u8(vaesdq_u8(B3, K), K2); \
51  } while(0)
52 
53 /*
54 * AES-128 Encryption
55 */
56 BOTAN_FUNC_ISA("+crypto")
57 void AES_128::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
58  {
59  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
60 
61  const uint8x16_t K0 = vld1q_u8(skey + 0*16);
62  const uint8x16_t K1 = vld1q_u8(skey + 1*16);
63  const uint8x16_t K2 = vld1q_u8(skey + 2*16);
64  const uint8x16_t K3 = vld1q_u8(skey + 3*16);
65  const uint8x16_t K4 = vld1q_u8(skey + 4*16);
66  const uint8x16_t K5 = vld1q_u8(skey + 5*16);
67  const uint8x16_t K6 = vld1q_u8(skey + 6*16);
68  const uint8x16_t K7 = vld1q_u8(skey + 7*16);
69  const uint8x16_t K8 = vld1q_u8(skey + 8*16);
70  const uint8x16_t K9 = vld1q_u8(skey + 9*16);
71  const uint8x16_t K10 = vld1q_u8(skey + 10*16);
72 
73  while(blocks >= 4)
74  {
75  uint8x16_t B0 = vld1q_u8(in);
76  uint8x16_t B1 = vld1q_u8(in+16);
77  uint8x16_t B2 = vld1q_u8(in+32);
78  uint8x16_t B3 = vld1q_u8(in+48);
79 
80  AES_ENC_4_ROUNDS(K0);
81  AES_ENC_4_ROUNDS(K1);
82  AES_ENC_4_ROUNDS(K2);
83  AES_ENC_4_ROUNDS(K3);
84  AES_ENC_4_ROUNDS(K4);
85  AES_ENC_4_ROUNDS(K5);
86  AES_ENC_4_ROUNDS(K6);
87  AES_ENC_4_ROUNDS(K7);
88  AES_ENC_4_ROUNDS(K8);
89  AES_ENC_4_LAST_ROUNDS(K9, K10);
90 
91  vst1q_u8(out, B0);
92  vst1q_u8(out+16, B1);
93  vst1q_u8(out+32, B2);
94  vst1q_u8(out+48, B3);
95 
96  in += 16*4;
97  out += 16*4;
98  blocks -= 4;
99  }
100 
101  for(size_t i = 0; i != blocks; ++i)
102  {
103  uint8x16_t B = vld1q_u8(in+16*i);
104  B = vaesmcq_u8(vaeseq_u8(B, K0));
105  B = vaesmcq_u8(vaeseq_u8(B, K1));
106  B = vaesmcq_u8(vaeseq_u8(B, K2));
107  B = vaesmcq_u8(vaeseq_u8(B, K3));
108  B = vaesmcq_u8(vaeseq_u8(B, K4));
109  B = vaesmcq_u8(vaeseq_u8(B, K5));
110  B = vaesmcq_u8(vaeseq_u8(B, K6));
111  B = vaesmcq_u8(vaeseq_u8(B, K7));
112  B = vaesmcq_u8(vaeseq_u8(B, K8));
113  B = veorq_u8(vaeseq_u8(B, K9), K10);
114  vst1q_u8(out+16*i, B);
115  }
116  }
117 
118 /*
119 * AES-128 Decryption
120 */
121 BOTAN_FUNC_ISA("+crypto")
122 void AES_128::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
123  {
124  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
125 
126  const uint8x16_t K0 = vld1q_u8(skey + 0*16);
127  const uint8x16_t K1 = vld1q_u8(skey + 1*16);
128  const uint8x16_t K2 = vld1q_u8(skey + 2*16);
129  const uint8x16_t K3 = vld1q_u8(skey + 3*16);
130  const uint8x16_t K4 = vld1q_u8(skey + 4*16);
131  const uint8x16_t K5 = vld1q_u8(skey + 5*16);
132  const uint8x16_t K6 = vld1q_u8(skey + 6*16);
133  const uint8x16_t K7 = vld1q_u8(skey + 7*16);
134  const uint8x16_t K8 = vld1q_u8(skey + 8*16);
135  const uint8x16_t K9 = vld1q_u8(skey + 9*16);
136  const uint8x16_t K10 = vld1q_u8(skey + 10*16);
137 
138  while(blocks >= 4)
139  {
140  uint8x16_t B0 = vld1q_u8(in);
141  uint8x16_t B1 = vld1q_u8(in+16);
142  uint8x16_t B2 = vld1q_u8(in+32);
143  uint8x16_t B3 = vld1q_u8(in+48);
144 
145  AES_DEC_4_ROUNDS(K0);
146  AES_DEC_4_ROUNDS(K1);
147  AES_DEC_4_ROUNDS(K2);
148  AES_DEC_4_ROUNDS(K3);
149  AES_DEC_4_ROUNDS(K4);
150  AES_DEC_4_ROUNDS(K5);
151  AES_DEC_4_ROUNDS(K6);
152  AES_DEC_4_ROUNDS(K7);
153  AES_DEC_4_ROUNDS(K8);
154  AES_DEC_4_LAST_ROUNDS(K9, K10);
155 
156  vst1q_u8(out, B0);
157  vst1q_u8(out+16, B1);
158  vst1q_u8(out+32, B2);
159  vst1q_u8(out+48, B3);
160 
161  in += 16*4;
162  out += 16*4;
163  blocks -= 4;
164  }
165 
166  for(size_t i = 0; i != blocks; ++i)
167  {
168  uint8x16_t B = vld1q_u8(in+16*i);
169  B = vaesimcq_u8(vaesdq_u8(B, K0));
170  B = vaesimcq_u8(vaesdq_u8(B, K1));
171  B = vaesimcq_u8(vaesdq_u8(B, K2));
172  B = vaesimcq_u8(vaesdq_u8(B, K3));
173  B = vaesimcq_u8(vaesdq_u8(B, K4));
174  B = vaesimcq_u8(vaesdq_u8(B, K5));
175  B = vaesimcq_u8(vaesdq_u8(B, K6));
176  B = vaesimcq_u8(vaesdq_u8(B, K7));
177  B = vaesimcq_u8(vaesdq_u8(B, K8));
178  B = veorq_u8(vaesdq_u8(B, K9), K10);
179  vst1q_u8(out+16*i, B);
180  }
181  }
182 
183 /*
184 * AES-192 Encryption
185 */
186 BOTAN_FUNC_ISA("+crypto")
187 void AES_192::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
188  {
189  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
190 
191  const uint8x16_t K0 = vld1q_u8(skey + 0*16);
192  const uint8x16_t K1 = vld1q_u8(skey + 1*16);
193  const uint8x16_t K2 = vld1q_u8(skey + 2*16);
194  const uint8x16_t K3 = vld1q_u8(skey + 3*16);
195  const uint8x16_t K4 = vld1q_u8(skey + 4*16);
196  const uint8x16_t K5 = vld1q_u8(skey + 5*16);
197  const uint8x16_t K6 = vld1q_u8(skey + 6*16);
198  const uint8x16_t K7 = vld1q_u8(skey + 7*16);
199  const uint8x16_t K8 = vld1q_u8(skey + 8*16);
200  const uint8x16_t K9 = vld1q_u8(skey + 9*16);
201  const uint8x16_t K10 = vld1q_u8(skey + 10*16);
202  const uint8x16_t K11 = vld1q_u8(skey + 11*16);
203  const uint8x16_t K12 = vld1q_u8(skey + 12*16);
204 
205  while(blocks >= 4)
206  {
207  uint8x16_t B0 = vld1q_u8(in);
208  uint8x16_t B1 = vld1q_u8(in+16);
209  uint8x16_t B2 = vld1q_u8(in+32);
210  uint8x16_t B3 = vld1q_u8(in+48);
211 
212  AES_ENC_4_ROUNDS(K0);
213  AES_ENC_4_ROUNDS(K1);
214  AES_ENC_4_ROUNDS(K2);
215  AES_ENC_4_ROUNDS(K3);
216  AES_ENC_4_ROUNDS(K4);
217  AES_ENC_4_ROUNDS(K5);
218  AES_ENC_4_ROUNDS(K6);
219  AES_ENC_4_ROUNDS(K7);
220  AES_ENC_4_ROUNDS(K8);
221  AES_ENC_4_ROUNDS(K9);
222  AES_ENC_4_ROUNDS(K10);
223  AES_ENC_4_LAST_ROUNDS(K11, K12);
224 
225  vst1q_u8(out, B0);
226  vst1q_u8(out+16, B1);
227  vst1q_u8(out+32, B2);
228  vst1q_u8(out+48, B3);
229 
230  in += 16*4;
231  out += 16*4;
232  blocks -= 4;
233  }
234 
235  for(size_t i = 0; i != blocks; ++i)
236  {
237  uint8x16_t B = vld1q_u8(in+16*i);
238  B = vaesmcq_u8(vaeseq_u8(B, K0));
239  B = vaesmcq_u8(vaeseq_u8(B, K1));
240  B = vaesmcq_u8(vaeseq_u8(B, K2));
241  B = vaesmcq_u8(vaeseq_u8(B, K3));
242  B = vaesmcq_u8(vaeseq_u8(B, K4));
243  B = vaesmcq_u8(vaeseq_u8(B, K5));
244  B = vaesmcq_u8(vaeseq_u8(B, K6));
245  B = vaesmcq_u8(vaeseq_u8(B, K7));
246  B = vaesmcq_u8(vaeseq_u8(B, K8));
247  B = vaesmcq_u8(vaeseq_u8(B, K9));
248  B = vaesmcq_u8(vaeseq_u8(B, K10));
249  B = veorq_u8(vaeseq_u8(B, K11), K12);
250  vst1q_u8(out+16*i, B);
251  }
252  }
253 
254 /*
255 * AES-192 Decryption
256 */
257 BOTAN_FUNC_ISA("+crypto")
258 void AES_192::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
259  {
260  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
261 
262  const uint8x16_t K0 = vld1q_u8(skey + 0*16);
263  const uint8x16_t K1 = vld1q_u8(skey + 1*16);
264  const uint8x16_t K2 = vld1q_u8(skey + 2*16);
265  const uint8x16_t K3 = vld1q_u8(skey + 3*16);
266  const uint8x16_t K4 = vld1q_u8(skey + 4*16);
267  const uint8x16_t K5 = vld1q_u8(skey + 5*16);
268  const uint8x16_t K6 = vld1q_u8(skey + 6*16);
269  const uint8x16_t K7 = vld1q_u8(skey + 7*16);
270  const uint8x16_t K8 = vld1q_u8(skey + 8*16);
271  const uint8x16_t K9 = vld1q_u8(skey + 9*16);
272  const uint8x16_t K10 = vld1q_u8(skey + 10*16);
273  const uint8x16_t K11 = vld1q_u8(skey + 11*16);
274  const uint8x16_t K12 = vld1q_u8(skey + 12*16);
275 
276  while(blocks >= 4)
277  {
278  uint8x16_t B0 = vld1q_u8(in);
279  uint8x16_t B1 = vld1q_u8(in+16);
280  uint8x16_t B2 = vld1q_u8(in+32);
281  uint8x16_t B3 = vld1q_u8(in+48);
282 
283  AES_DEC_4_ROUNDS(K0);
284  AES_DEC_4_ROUNDS(K1);
285  AES_DEC_4_ROUNDS(K2);
286  AES_DEC_4_ROUNDS(K3);
287  AES_DEC_4_ROUNDS(K4);
288  AES_DEC_4_ROUNDS(K5);
289  AES_DEC_4_ROUNDS(K6);
290  AES_DEC_4_ROUNDS(K7);
291  AES_DEC_4_ROUNDS(K8);
292  AES_DEC_4_ROUNDS(K9);
293  AES_DEC_4_ROUNDS(K10);
294  AES_DEC_4_LAST_ROUNDS(K11, K12);
295 
296  vst1q_u8(out, B0);
297  vst1q_u8(out+16, B1);
298  vst1q_u8(out+32, B2);
299  vst1q_u8(out+48, B3);
300 
301  in += 16*4;
302  out += 16*4;
303  blocks -= 4;
304  }
305 
306  for(size_t i = 0; i != blocks; ++i)
307  {
308  uint8x16_t B = vld1q_u8(in+16*i);
309  B = vaesimcq_u8(vaesdq_u8(B, K0));
310  B = vaesimcq_u8(vaesdq_u8(B, K1));
311  B = vaesimcq_u8(vaesdq_u8(B, K2));
312  B = vaesimcq_u8(vaesdq_u8(B, K3));
313  B = vaesimcq_u8(vaesdq_u8(B, K4));
314  B = vaesimcq_u8(vaesdq_u8(B, K5));
315  B = vaesimcq_u8(vaesdq_u8(B, K6));
316  B = vaesimcq_u8(vaesdq_u8(B, K7));
317  B = vaesimcq_u8(vaesdq_u8(B, K8));
318  B = vaesimcq_u8(vaesdq_u8(B, K9));
319  B = vaesimcq_u8(vaesdq_u8(B, K10));
320  B = veorq_u8(vaesdq_u8(B, K11), K12);
321  vst1q_u8(out+16*i, B);
322  }
323  }
324 
325 /*
326 * AES-256 Encryption
327 */
328 BOTAN_FUNC_ISA("+crypto")
329 void AES_256::hw_aes_encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
330  {
331  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_EK.data());
332 
333  const uint8x16_t K0 = vld1q_u8(skey + 0*16);
334  const uint8x16_t K1 = vld1q_u8(skey + 1*16);
335  const uint8x16_t K2 = vld1q_u8(skey + 2*16);
336  const uint8x16_t K3 = vld1q_u8(skey + 3*16);
337  const uint8x16_t K4 = vld1q_u8(skey + 4*16);
338  const uint8x16_t K5 = vld1q_u8(skey + 5*16);
339  const uint8x16_t K6 = vld1q_u8(skey + 6*16);
340  const uint8x16_t K7 = vld1q_u8(skey + 7*16);
341  const uint8x16_t K8 = vld1q_u8(skey + 8*16);
342  const uint8x16_t K9 = vld1q_u8(skey + 9*16);
343  const uint8x16_t K10 = vld1q_u8(skey + 10*16);
344  const uint8x16_t K11 = vld1q_u8(skey + 11*16);
345  const uint8x16_t K12 = vld1q_u8(skey + 12*16);
346  const uint8x16_t K13 = vld1q_u8(skey + 13*16);
347  const uint8x16_t K14 = vld1q_u8(skey + 14*16);
348 
349  while(blocks >= 4)
350  {
351  uint8x16_t B0 = vld1q_u8(in);
352  uint8x16_t B1 = vld1q_u8(in+16);
353  uint8x16_t B2 = vld1q_u8(in+32);
354  uint8x16_t B3 = vld1q_u8(in+48);
355 
356  AES_ENC_4_ROUNDS(K0);
357  AES_ENC_4_ROUNDS(K1);
358  AES_ENC_4_ROUNDS(K2);
359  AES_ENC_4_ROUNDS(K3);
360  AES_ENC_4_ROUNDS(K4);
361  AES_ENC_4_ROUNDS(K5);
362  AES_ENC_4_ROUNDS(K6);
363  AES_ENC_4_ROUNDS(K7);
364  AES_ENC_4_ROUNDS(K8);
365  AES_ENC_4_ROUNDS(K9);
366  AES_ENC_4_ROUNDS(K10);
367  AES_ENC_4_ROUNDS(K11);
368  AES_ENC_4_ROUNDS(K12);
369  AES_ENC_4_LAST_ROUNDS(K13, K14);
370 
371  vst1q_u8(out, B0);
372  vst1q_u8(out+16, B1);
373  vst1q_u8(out+32, B2);
374  vst1q_u8(out+48, B3);
375 
376  in += 16*4;
377  out += 16*4;
378  blocks -= 4;
379  }
380 
381  for(size_t i = 0; i != blocks; ++i)
382  {
383  uint8x16_t B = vld1q_u8(in+16*i);
384  B = vaesmcq_u8(vaeseq_u8(B, K0));
385  B = vaesmcq_u8(vaeseq_u8(B, K1));
386  B = vaesmcq_u8(vaeseq_u8(B, K2));
387  B = vaesmcq_u8(vaeseq_u8(B, K3));
388  B = vaesmcq_u8(vaeseq_u8(B, K4));
389  B = vaesmcq_u8(vaeseq_u8(B, K5));
390  B = vaesmcq_u8(vaeseq_u8(B, K6));
391  B = vaesmcq_u8(vaeseq_u8(B, K7));
392  B = vaesmcq_u8(vaeseq_u8(B, K8));
393  B = vaesmcq_u8(vaeseq_u8(B, K9));
394  B = vaesmcq_u8(vaeseq_u8(B, K10));
395  B = vaesmcq_u8(vaeseq_u8(B, K11));
396  B = vaesmcq_u8(vaeseq_u8(B, K12));
397  B = veorq_u8(vaeseq_u8(B, K13), K14);
398  vst1q_u8(out+16*i, B);
399  }
400  }
401 
402 /*
403 * AES-256 Decryption
404 */
405 BOTAN_FUNC_ISA("+crypto")
406 void AES_256::hw_aes_decrypt_n(const uint8_t in[], uint8_t out[], size_t blocks) const
407  {
408  const uint8_t *skey = reinterpret_cast<const uint8_t*>(m_DK.data());
409 
410  const uint8x16_t K0 = vld1q_u8(skey + 0*16);
411  const uint8x16_t K1 = vld1q_u8(skey + 1*16);
412  const uint8x16_t K2 = vld1q_u8(skey + 2*16);
413  const uint8x16_t K3 = vld1q_u8(skey + 3*16);
414  const uint8x16_t K4 = vld1q_u8(skey + 4*16);
415  const uint8x16_t K5 = vld1q_u8(skey + 5*16);
416  const uint8x16_t K6 = vld1q_u8(skey + 6*16);
417  const uint8x16_t K7 = vld1q_u8(skey + 7*16);
418  const uint8x16_t K8 = vld1q_u8(skey + 8*16);
419  const uint8x16_t K9 = vld1q_u8(skey + 9*16);
420  const uint8x16_t K10 = vld1q_u8(skey + 10*16);
421  const uint8x16_t K11 = vld1q_u8(skey + 11*16);
422  const uint8x16_t K12 = vld1q_u8(skey + 12*16);
423  const uint8x16_t K13 = vld1q_u8(skey + 13*16);
424  const uint8x16_t K14 = vld1q_u8(skey + 14*16);
425 
426  while(blocks >= 4)
427  {
428  uint8x16_t B0 = vld1q_u8(in);
429  uint8x16_t B1 = vld1q_u8(in+16);
430  uint8x16_t B2 = vld1q_u8(in+32);
431  uint8x16_t B3 = vld1q_u8(in+48);
432 
433  AES_DEC_4_ROUNDS(K0);
434  AES_DEC_4_ROUNDS(K1);
435  AES_DEC_4_ROUNDS(K2);
436  AES_DEC_4_ROUNDS(K3);
437  AES_DEC_4_ROUNDS(K4);
438  AES_DEC_4_ROUNDS(K5);
439  AES_DEC_4_ROUNDS(K6);
440  AES_DEC_4_ROUNDS(K7);
441  AES_DEC_4_ROUNDS(K8);
442  AES_DEC_4_ROUNDS(K9);
443  AES_DEC_4_ROUNDS(K10);
444  AES_DEC_4_ROUNDS(K11);
445  AES_DEC_4_ROUNDS(K12);
446  AES_DEC_4_LAST_ROUNDS(K13, K14);
447 
448  vst1q_u8(out, B0);
449  vst1q_u8(out+16, B1);
450  vst1q_u8(out+32, B2);
451  vst1q_u8(out+48, B3);
452 
453  in += 16*4;
454  out += 16*4;
455  blocks -= 4;
456  }
457 
458  for(size_t i = 0; i != blocks; ++i)
459  {
460  uint8x16_t B = vld1q_u8(in+16*i);
461  B = vaesimcq_u8(vaesdq_u8(B, K0));
462  B = vaesimcq_u8(vaesdq_u8(B, K1));
463  B = vaesimcq_u8(vaesdq_u8(B, K2));
464  B = vaesimcq_u8(vaesdq_u8(B, K3));
465  B = vaesimcq_u8(vaesdq_u8(B, K4));
466  B = vaesimcq_u8(vaesdq_u8(B, K5));
467  B = vaesimcq_u8(vaesdq_u8(B, K6));
468  B = vaesimcq_u8(vaesdq_u8(B, K7));
469  B = vaesimcq_u8(vaesdq_u8(B, K8));
470  B = vaesimcq_u8(vaesdq_u8(B, K9));
471  B = vaesimcq_u8(vaesdq_u8(B, K10));
472  B = vaesimcq_u8(vaesdq_u8(B, K11));
473  B = vaesimcq_u8(vaesdq_u8(B, K12));
474  B = veorq_u8(vaesdq_u8(B, K13), K14);
475  vst1q_u8(out+16*i, B);
476  }
477  }
478 
479 #undef AES_ENC_4_ROUNDS
480 #undef AES_ENC_4_LAST_ROUNDS
481 #undef AES_DEC_4_ROUNDS
482 #undef AES_DEC_4_LAST_ROUNDS
483 
484 }
void BOTAN_FUNC_ISA("arch=armv8.2-a+sm4") SM4 const uint32x4_t K1
Definition: sm4_armv8.cpp:54
const uint32x4_t K5
Definition: sm4_armv8.cpp:58
const uint32x4_t K2
Definition: sm4_armv8.cpp:55
const uint32x4_t K4
Definition: sm4_armv8.cpp:57
SIMD_8x32 B
#define BOTAN_FUNC_ISA(isa)
Definition: compiler.h:77
const uint32x4_t K3
Definition: sm4_armv8.cpp:56
Definition: alg_id.cpp:13
#define AES_DEC_4_ROUNDS(K)
Definition: aes_armv8.cpp:35
#define AES_ENC_4_ROUNDS(K)
Definition: aes_armv8.cpp:17
const uint32x4_t K6
Definition: sm4_armv8.cpp:59
#define AES_ENC_4_LAST_ROUNDS(K, K2)
Definition: aes_armv8.cpp:26
const uint32x4_t K7
Definition: sm4_armv8.cpp:60
#define AES_DEC_4_LAST_ROUNDS(K, K2)
Definition: aes_armv8.cpp:44