]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/intel-ipsec-mb/no-aesni/aesni_emu.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / intel-ipsec-mb / no-aesni / aesni_emu.c
1 /*******************************************************************************
2 Copyright (c) 2018, Intel Corporation
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6
7 * Redistributions of source code must retain the above copyright notice,
8 this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of Intel Corporation nor the names of its contributors
13 may be used to endorse or promote products derived from this software
14 without specific prior written permission.
15
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *******************************************************************************/
27
28 /* ========================================================================== */
29 /* AESNI emulation API and helper functions */
30 /* ========================================================================== */
31
32 #include "intel-ipsec-mb.h"
33 #include "aesni_emu.h"
34
35 typedef union {
36 uint32_t i;
37 uint8_t byte[4];
38 } byte_split_t;
39
40 static const uint8_t aes_sbox[16][16] = {
41 { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
42 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 },
43 { 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
44 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 },
45 { 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
46 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 },
47 { 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
48 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 },
49 { 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
50 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 },
51 { 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
52 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf },
53 { 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
54 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 },
55 { 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
56 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 },
57 { 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
58 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 },
59 { 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
60 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb },
61 { 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
62 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 },
63 { 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
64 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 },
65 { 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
66 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a },
67 { 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
68 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e },
69 { 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
70 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf },
71 { 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
72 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }
73 };
74
75 static const uint8_t aes_isbox[16][16] = {
76 { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
77 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb },
78 { 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
79 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb },
80 { 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
81 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e },
82 { 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
83 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 },
84 { 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
85 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 },
86 { 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
87 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 },
88 { 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
89 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 },
90 { 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
91 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b },
92 { 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
93 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 },
94 { 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
95 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e },
96 { 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
97 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b },
98 { 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
99 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 },
100 { 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
101 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f },
102 { 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
103 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef },
104 { 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
105 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 },
106 { 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
107 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d }
108 };
109
110 /* ========================================================================== */
111 /* Emulation API helper functions */
112 /* ========================================================================== */
113
114 static uint8_t aes_get_sbox(const uint32_t x)
115 {
116 uint32_t i = (x>>4) & 0xF;
117 uint32_t j = x&0xF;
118
119 return aes_sbox[i][j];
120 }
121
122 static uint8_t aes_get_isbox(const uint32_t x)
123 {
124 uint32_t i = (x>>4) & 0xF;
125 uint32_t j = x&0xF;
126
127 return aes_isbox[i][j];
128 }
129
130 static void xor_xmm(union xmm_reg *d,
131 const union xmm_reg *s1,
132 const union xmm_reg *s2)
133 {
134 uint32_t i;
135
136 for (i = 0; i < MAX_QWORDS_PER_XMM; i++)
137 d->qword[i] = s1->qword[i] ^ s2->qword[i];
138 }
139
140 static uint32_t rot(const uint32_t x)
141 {
142 uint32_t y = (x>>8) | (x<<24);
143
144 return y;
145 }
146
147 static uint32_t sbox4(const uint32_t x)
148 {
149 uint32_t i;
150 byte_split_t b, o;
151
152 b.i = x;
153
154 for (i = 0; i < 4; i++)
155 o.byte[i] = aes_get_sbox(b.byte[i]);
156
157 return o.i;
158 }
159
160 static void substitute_bytes(union xmm_reg *dst, const union xmm_reg *src)
161 {
162 uint32_t i;
163
164 for (i = 0; i < MAX_BYTES_PER_XMM; i++)
165 dst->byte[i] = aes_get_sbox(src->byte[i]);
166 }
167
168 static void inverse_substitute_bytes(union xmm_reg *dst,
169 const union xmm_reg *src)
170 {
171 uint32_t i;
172
173 for (i = 0; i < MAX_BYTES_PER_XMM; i++)
174 dst->byte[i] = aes_get_isbox(src->byte[i]);
175 }
176
177 static uint8_t gfmul(const uint8_t x, const uint8_t y)
178 {
179 uint32_t i;
180 uint8_t multiplier = y;
181 uint8_t out = 0;
182
183 for (i = 0; i < 7; i++) {
184 if (i >= 1) {
185 /* GFMUL by 2. "xtimes" operation from FIPS document */
186 uint8_t t = multiplier << 1; /* lop of the high bit */
187
188 if (multiplier >> 7) /* look at the old high bit */
189 multiplier = t ^ 0x1B; /* polynomial division */
190 else
191 multiplier = t;
192 }
193 if ((x >> i) & 1)
194 out = out ^ multiplier;
195 }
196
197 return out;
198 }
199
200 static void mix_columns(union xmm_reg *dst, const union xmm_reg *src)
201 {
202 uint32_t c;
203
204 for (c = 0; c < MAX_DWORDS_PER_XMM; c++) {
205 uint8_t s0c = src->byte[c*4+0];
206 uint8_t s1c = src->byte[c*4+1];
207 uint8_t s2c = src->byte[c*4+2];
208 uint8_t s3c = src->byte[c*4+3];
209
210 dst->byte[c*4+0] = gfmul(2, s0c) ^ gfmul(3, s1c) ^ s2c ^ s3c;
211 dst->byte[c*4+1] = s0c ^ gfmul(2, s1c) ^ gfmul(3, s2c) ^ s3c;
212 dst->byte[c*4+2] = s0c ^ s1c ^ gfmul(2, s2c) ^ gfmul(3, s3c);
213 dst->byte[c*4+3] = gfmul(3, s0c) ^ s1c ^ s2c ^ gfmul(2, s3c);
214 }
215 }
216
217 static void inverse_mix_columns(union xmm_reg *dst,
218 const union xmm_reg *src)
219 {
220 uint32_t c;
221
222 for (c = 0; c < MAX_DWORDS_PER_XMM; c++) {
223 uint8_t s0c = src->byte[c*4+0];
224 uint8_t s1c = src->byte[c*4+1];
225 uint8_t s2c = src->byte[c*4+2];
226 uint8_t s3c = src->byte[c*4+3];
227
228 dst->byte[c*4+0] = gfmul(0xe, s0c) ^ gfmul(0xb, s1c) ^
229 gfmul(0xd, s2c) ^ gfmul(0x9, s3c);
230 dst->byte[c*4+1] = gfmul(0x9, s0c) ^ gfmul(0xe, s1c) ^
231 gfmul(0xb, s2c) ^ gfmul(0xd, s3c);
232 dst->byte[c*4+2] = gfmul(0xd, s0c) ^ gfmul(0x9, s1c) ^
233 gfmul(0xe, s2c) ^ gfmul(0xb, s3c);
234 dst->byte[c*4+3] = gfmul(0xb, s0c) ^ gfmul(0xd, s1c) ^
235 gfmul(0x9, s2c) ^ gfmul(0xe, s3c);
236 }
237 }
238
239 static uint32_t wrap_neg(const int x)
240 {
241 /* make sure we stay in 0..3 */
242 return (x >= 0) ? x : (x + 4);
243 }
244
245 static uint32_t wrap_pos(const int x)
246 {
247 /* make sure we stay in 0..3 */
248 return (x <= 3) ? x : (x - 4);
249 }
250
251 static void shift_rows(union xmm_reg *dst, const union xmm_reg *src)
252 {
253 /* cyclic shift last 3 rows of the input */
254 int j;
255 union xmm_reg tmp = *src;
256
257 /* bytes to matrix:
258 0 1 2 3 < columns (i)
259 ----------+
260 0 4 8 C | 0 < rows (j)
261 1 5 9 D | 1
262 2 6 A E | 2
263 3 7 B F | 3
264
265 THIS IS THE KEY: progressively move elements to HIGHER
266 numbered columnar values within a row.
267
268 Each dword is a column with the MSB as the bottom element
269 i is the column index, selects the dword
270 j is the row index,
271 we shift row zero by zero, row 1 by 1 and row 2 by 2 and
272 row 3 by 3, cyclically */
273 for (j = 0; j < MAX_DWORDS_PER_XMM; j++) {
274 int i;
275
276 for (i = 0; i < MAX_DWORDS_PER_XMM; i++)
277 dst->byte[i*4+j] = tmp.byte[wrap_pos(i+j)*4+j];
278 }
279
280 }
281
282 static void inverse_shift_rows(union xmm_reg *dst, const union xmm_reg *src)
283 {
284 uint32_t j;
285 union xmm_reg tmp = *src;
286
287 /* THIS IS THE KEY: progressively move elements to LOWER
288 numbered columnar values within a row.
289
290 Each dword is a column with the MSB as the bottom element
291 i is the column index, selects the dword
292 j is the row index,
293 we shift row zero by zero, row 1 by 1 and row 2 by 2 and
294 row 3 by 3, cyclically */
295 for (j = 0; j < MAX_DWORDS_PER_XMM; j++) {
296 uint32_t i;
297
298 for (i = 0; i < MAX_DWORDS_PER_XMM; i++)
299 dst->byte[i*4+j] = tmp.byte[wrap_neg(i - j) * 4 + j];
300 }
301 }
302
303 /* ========================================================================== */
304 /* AESNI emulation functions */
305 /* ========================================================================== */
306
307 IMB_DLL_LOCAL void emulate_AESKEYGENASSIST(union xmm_reg *dst,
308 const union xmm_reg *src,
309 const uint32_t imm8)
310 {
311 union xmm_reg tmp = *src;
312 uint32_t rcon = (imm8 & 0xFF);
313
314 dst->dword[3] = rot(sbox4(tmp.dword[3])) ^ rcon;
315 dst->dword[2] = sbox4(tmp.dword[3]);
316 dst->dword[1] = rot(sbox4(tmp.dword[1])) ^ rcon;
317 dst->dword[0] = sbox4(tmp.dword[1]);
318 }
319
320 IMB_DLL_LOCAL void emulate_AESENC(union xmm_reg *dst,
321 const union xmm_reg *src)
322 {
323 union xmm_reg tmp = *dst;
324
325 shift_rows(&tmp, &tmp);
326 substitute_bytes(&tmp, &tmp);
327 mix_columns(&tmp, &tmp);
328 xor_xmm(dst, &tmp, src);
329 }
330
331 IMB_DLL_LOCAL void emulate_AESENCLAST(union xmm_reg *dst,
332 const union xmm_reg *src)
333 {
334 union xmm_reg tmp = *dst;
335
336 shift_rows(&tmp, &tmp);
337 substitute_bytes(&tmp, &tmp);
338 xor_xmm(dst, &tmp, src);
339 }
340
341 IMB_DLL_LOCAL void emulate_AESDEC(union xmm_reg *dst,
342 const union xmm_reg *src)
343 {
344 union xmm_reg tmp = *dst;
345
346 inverse_shift_rows(&tmp, &tmp);
347 inverse_substitute_bytes(&tmp, &tmp);
348 inverse_mix_columns(&tmp, &tmp);
349 xor_xmm(dst, &tmp, src);
350 }
351
352 IMB_DLL_LOCAL void emulate_AESDECLAST(union xmm_reg *dst,
353 const union xmm_reg *src)
354 {
355 union xmm_reg tmp = *dst;
356
357 inverse_shift_rows(&tmp, &tmp);
358 inverse_substitute_bytes(&tmp, &tmp);
359 xor_xmm(dst, &tmp, src);
360 }
361
362 IMB_DLL_LOCAL void emulate_AESIMC(union xmm_reg *dst,
363 const union xmm_reg *src)
364 {
365 inverse_mix_columns(dst, src);
366 }