14 inline uint32x4_t qswap_32(uint32x4_t B)
16 static const uint8x16_t tbl = (uint8x16_t){12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 };
17 return vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(B), tbl));
20 inline uint32x4_t bswap_32(uint32x4_t B)
22 return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(B)));
29 inline uint32x4_t bqswap_32(uint32x4_t B)
31 static const uint8x16_t tbl = (uint8x16_t){15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
32 return vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(B), tbl));
35 #define SM4_E(B0, B1, B2, B3, K) do { \
36 B0 = vsm4eq_u32(B0, K); \
37 B1 = vsm4eq_u32(B1, K); \
38 B2 = vsm4eq_u32(B2, K); \
39 B3 = vsm4eq_u32(B3, K); \
45 void SM4::sm4_armv8_encrypt(const uint8_t input8[], uint8_t output8[],
size_t blocks)
const
47 const uint32x4_t K0 = vld1q_u32(&m_RK[ 0]);
48 const uint32x4_t K1 = vld1q_u32(&m_RK[ 4]);
49 const uint32x4_t K2 = vld1q_u32(&m_RK[ 8]);
50 const uint32x4_t K3 = vld1q_u32(&m_RK[12]);
51 const uint32x4_t K4 = vld1q_u32(&m_RK[16]);
52 const uint32x4_t K5 = vld1q_u32(&m_RK[20]);
53 const uint32x4_t K6 = vld1q_u32(&m_RK[24]);
54 const uint32x4_t K7 = vld1q_u32(&m_RK[28]);
56 const uint32_t* input32 =
reinterpret_cast<const uint32_t*
>(
reinterpret_cast<const void*
>(input8));
57 uint32_t* output32 =
reinterpret_cast<uint32_t*
>(
reinterpret_cast<void*
>(output8));
61 uint32x4_t B0 = bswap_32(vld1q_u32(input32));
62 uint32x4_t B1 = bswap_32(vld1q_u32(input32+4));
63 uint32x4_t B2 = bswap_32(vld1q_u32(input32+8));
64 uint32x4_t B3 = bswap_32(vld1q_u32(input32+12));
66 SM4_E(B0, B1, B2, B3, K0);
67 SM4_E(B0, B1, B2, B3, K1);
68 SM4_E(B0, B1, B2, B3, K2);
69 SM4_E(B0, B1, B2, B3, K3);
70 SM4_E(B0, B1, B2, B3, K4);
71 SM4_E(B0, B1, B2, B3, K5);
72 SM4_E(B0, B1, B2, B3, K6);
73 SM4_E(B0, B1, B2, B3, K7);
75 vst1q_u32(output32 , bqswap_32(B0));
76 vst1q_u32(output32+ 4, bqswap_32(B1));
77 vst1q_u32(output32+ 8, bqswap_32(B2));
78 vst1q_u32(output32+12, bqswap_32(B3));
85 for(
size_t i = 0; i != blocks; ++i)
87 uint32x4_t B = bswap_32(vld1q_u32(input32));
89 B = vsm4eq_u32(B, K0);
90 B = vsm4eq_u32(B, K1);
91 B = vsm4eq_u32(B, K2);
92 B = vsm4eq_u32(B, K3);
93 B = vsm4eq_u32(B, K4);
94 B = vsm4eq_u32(B, K5);
95 B = vsm4eq_u32(B, K6);
96 B = vsm4eq_u32(B, K7);
98 vst1q_u32(output32, bqswap_32(B));
106 void SM4::sm4_armv8_decrypt(const uint8_t input8[], uint8_t output8[],
size_t blocks)
const
108 const uint32x4_t K0 = qswap_32(vld1q_u32(&m_RK[ 0]));
109 const uint32x4_t K1 = qswap_32(vld1q_u32(&m_RK[ 4]));
110 const uint32x4_t K2 = qswap_32(vld1q_u32(&m_RK[ 8]));
111 const uint32x4_t K3 = qswap_32(vld1q_u32(&m_RK[12]));
112 const uint32x4_t K4 = qswap_32(vld1q_u32(&m_RK[16]));
113 const uint32x4_t K5 = qswap_32(vld1q_u32(&m_RK[20]));
114 const uint32x4_t K6 = qswap_32(vld1q_u32(&m_RK[24]));
115 const uint32x4_t K7 = qswap_32(vld1q_u32(&m_RK[28]));
117 const uint32_t* input32 =
reinterpret_cast<const uint32_t*
>(
reinterpret_cast<const void*
>(input8));
118 uint32_t* output32 =
reinterpret_cast<uint32_t*
>(
reinterpret_cast<void*
>(output8));
122 uint32x4_t B0 = bswap_32(vld1q_u32(input32));
123 uint32x4_t B1 = bswap_32(vld1q_u32(input32+4));
124 uint32x4_t B2 = bswap_32(vld1q_u32(input32+8));
125 uint32x4_t B3 = bswap_32(vld1q_u32(input32+12));
127 SM4_E(B0, B1, B2, B3, K7);
128 SM4_E(B0, B1, B2, B3, K6);
129 SM4_E(B0, B1, B2, B3, K5);
130 SM4_E(B0, B1, B2, B3, K4);
131 SM4_E(B0, B1, B2, B3, K3);
132 SM4_E(B0, B1, B2, B3, K2);
133 SM4_E(B0, B1, B2, B3, K1);
134 SM4_E(B0, B1, B2, B3, K0);
136 vst1q_u32(output32 , bqswap_32(B0));
137 vst1q_u32(output32+ 4, bqswap_32(B1));
138 vst1q_u32(output32+ 8, bqswap_32(B2));
139 vst1q_u32(output32+12, bqswap_32(B3));
146 for(
size_t i = 0; i != blocks; ++i)
148 uint32x4_t B = bswap_32(vld1q_u32(input32));
150 B = vsm4eq_u32(B, K7);
151 B = vsm4eq_u32(B, K6);
152 B = vsm4eq_u32(B, K5);
153 B = vsm4eq_u32(B, K4);
154 B = vsm4eq_u32(B, K3);
155 B = vsm4eq_u32(B, K2);
156 B = vsm4eq_u32(B, K1);
157 B = vsm4eq_u32(B, K0);
159 vst1q_u32(output32, bqswap_32(B));
#define BOTAN_FUNC_ISA(isa)
#define SM4_E(B0, B1, B2, B3, K)