]>
git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/intel-ipsec-mb/no-aesni/aesni_emu.c
1 /*******************************************************************************
2 Copyright (c) 2018, Intel Corporation
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
7 * Redistributions of source code must retain the above copyright notice,
8 this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in the
11 documentation and/or other materials provided with the distribution.
12 * Neither the name of Intel Corporation nor the names of its contributors
13 may be used to endorse or promote products derived from this software
14 without specific prior written permission.
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *******************************************************************************/
28 /* ========================================================================== */
29 /* AESNI emulation API and helper functions */
30 /* ========================================================================== */
32 #include "intel-ipsec-mb.h"
33 #include "aesni_emu.h"
40 static const uint8_t aes_sbox
[16][16] = {
41 { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
42 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 },
43 { 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
44 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 },
45 { 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
46 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 },
47 { 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
48 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 },
49 { 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
50 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 },
51 { 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
52 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf },
53 { 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
54 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 },
55 { 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
56 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 },
57 { 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
58 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 },
59 { 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
60 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb },
61 { 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
62 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 },
63 { 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
64 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 },
65 { 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
66 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a },
67 { 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
68 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e },
69 { 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
70 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf },
71 { 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
72 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }
75 static const uint8_t aes_isbox
[16][16] = {
76 { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
77 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb },
78 { 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
79 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb },
80 { 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
81 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e },
82 { 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
83 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 },
84 { 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
85 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 },
86 { 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
87 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 },
88 { 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
89 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 },
90 { 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
91 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b },
92 { 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
93 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 },
94 { 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
95 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e },
96 { 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
97 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b },
98 { 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
99 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 },
100 { 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
101 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f },
102 { 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
103 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef },
104 { 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
105 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 },
106 { 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
107 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d }
110 /* ========================================================================== */
111 /* Emulation API helper functions */
112 /* ========================================================================== */
114 static uint8_t aes_get_sbox(const uint32_t x
)
116 uint32_t i
= (x
>>4) & 0xF;
119 return aes_sbox
[i
][j
];
122 static uint8_t aes_get_isbox(const uint32_t x
)
124 uint32_t i
= (x
>>4) & 0xF;
127 return aes_isbox
[i
][j
];
130 static void xor_xmm(union xmm_reg
*d
,
131 const union xmm_reg
*s1
,
132 const union xmm_reg
*s2
)
136 for (i
= 0; i
< MAX_QWORDS_PER_XMM
; i
++)
137 d
->qword
[i
] = s1
->qword
[i
] ^ s2
->qword
[i
];
140 static uint32_t rot(const uint32_t x
)
142 uint32_t y
= (x
>>8) | (x
<<24);
147 static uint32_t sbox4(const uint32_t x
)
154 for (i
= 0; i
< 4; i
++)
155 o
.byte
[i
] = aes_get_sbox(b
.byte
[i
]);
160 static void substitute_bytes(union xmm_reg
*dst
, const union xmm_reg
*src
)
164 for (i
= 0; i
< MAX_BYTES_PER_XMM
; i
++)
165 dst
->byte
[i
] = aes_get_sbox(src
->byte
[i
]);
168 static void inverse_substitute_bytes(union xmm_reg
*dst
,
169 const union xmm_reg
*src
)
173 for (i
= 0; i
< MAX_BYTES_PER_XMM
; i
++)
174 dst
->byte
[i
] = aes_get_isbox(src
->byte
[i
]);
177 static uint8_t gfmul(const uint8_t x
, const uint8_t y
)
180 uint8_t multiplier
= y
;
183 for (i
= 0; i
< 7; i
++) {
185 /* GFMUL by 2. "xtimes" operation from FIPS document */
186 uint8_t t
= multiplier
<< 1; /* lop of the high bit */
188 if (multiplier
>> 7) /* look at the old high bit */
189 multiplier
= t
^ 0x1B; /* polynomial division */
194 out
= out
^ multiplier
;
200 static void mix_columns(union xmm_reg
*dst
, const union xmm_reg
*src
)
204 for (c
= 0; c
< MAX_DWORDS_PER_XMM
; c
++) {
205 uint8_t s0c
= src
->byte
[c
*4+0];
206 uint8_t s1c
= src
->byte
[c
*4+1];
207 uint8_t s2c
= src
->byte
[c
*4+2];
208 uint8_t s3c
= src
->byte
[c
*4+3];
210 dst
->byte
[c
*4+0] = gfmul(2, s0c
) ^ gfmul(3, s1c
) ^ s2c
^ s3c
;
211 dst
->byte
[c
*4+1] = s0c
^ gfmul(2, s1c
) ^ gfmul(3, s2c
) ^ s3c
;
212 dst
->byte
[c
*4+2] = s0c
^ s1c
^ gfmul(2, s2c
) ^ gfmul(3, s3c
);
213 dst
->byte
[c
*4+3] = gfmul(3, s0c
) ^ s1c
^ s2c
^ gfmul(2, s3c
);
217 static void inverse_mix_columns(union xmm_reg
*dst
,
218 const union xmm_reg
*src
)
222 for (c
= 0; c
< MAX_DWORDS_PER_XMM
; c
++) {
223 uint8_t s0c
= src
->byte
[c
*4+0];
224 uint8_t s1c
= src
->byte
[c
*4+1];
225 uint8_t s2c
= src
->byte
[c
*4+2];
226 uint8_t s3c
= src
->byte
[c
*4+3];
228 dst
->byte
[c
*4+0] = gfmul(0xe, s0c
) ^ gfmul(0xb, s1c
) ^
229 gfmul(0xd, s2c
) ^ gfmul(0x9, s3c
);
230 dst
->byte
[c
*4+1] = gfmul(0x9, s0c
) ^ gfmul(0xe, s1c
) ^
231 gfmul(0xb, s2c
) ^ gfmul(0xd, s3c
);
232 dst
->byte
[c
*4+2] = gfmul(0xd, s0c
) ^ gfmul(0x9, s1c
) ^
233 gfmul(0xe, s2c
) ^ gfmul(0xb, s3c
);
234 dst
->byte
[c
*4+3] = gfmul(0xb, s0c
) ^ gfmul(0xd, s1c
) ^
235 gfmul(0x9, s2c
) ^ gfmul(0xe, s3c
);
239 static uint32_t wrap_neg(const int x
)
241 /* make sure we stay in 0..3 */
242 return (x
>= 0) ? x
: (x
+ 4);
245 static uint32_t wrap_pos(const int x
)
247 /* make sure we stay in 0..3 */
248 return (x
<= 3) ? x
: (x
- 4);
251 static void shift_rows(union xmm_reg
*dst
, const union xmm_reg
*src
)
253 /* cyclic shift last 3 rows of the input */
255 union xmm_reg tmp
= *src
;
258 0 1 2 3 < columns (i)
260 0 4 8 C | 0 < rows (j)
265 THIS IS THE KEY: progressively move elements to HIGHER
266 numbered columnar values within a row.
268 Each dword is a column with the MSB as the bottom element
269 i is the column index, selects the dword
271 we shift row zero by zero, row 1 by 1 and row 2 by 2 and
272 row 3 by 3, cyclically */
273 for (j
= 0; j
< MAX_DWORDS_PER_XMM
; j
++) {
276 for (i
= 0; i
< MAX_DWORDS_PER_XMM
; i
++)
277 dst
->byte
[i
*4+j
] = tmp
.byte
[wrap_pos(i
+j
)*4+j
];
282 static void inverse_shift_rows(union xmm_reg
*dst
, const union xmm_reg
*src
)
285 union xmm_reg tmp
= *src
;
287 /* THIS IS THE KEY: progressively move elements to LOWER
288 numbered columnar values within a row.
290 Each dword is a column with the MSB as the bottom element
291 i is the column index, selects the dword
293 we shift row zero by zero, row 1 by 1 and row 2 by 2 and
294 row 3 by 3, cyclically */
295 for (j
= 0; j
< MAX_DWORDS_PER_XMM
; j
++) {
298 for (i
= 0; i
< MAX_DWORDS_PER_XMM
; i
++)
299 dst
->byte
[i
*4+j
] = tmp
.byte
[wrap_neg(i
- j
) * 4 + j
];
303 /* ========================================================================== */
304 /* AESNI emulation functions */
305 /* ========================================================================== */
307 IMB_DLL_LOCAL
void emulate_AESKEYGENASSIST(union xmm_reg
*dst
,
308 const union xmm_reg
*src
,
311 union xmm_reg tmp
= *src
;
312 uint32_t rcon
= (imm8
& 0xFF);
314 dst
->dword
[3] = rot(sbox4(tmp
.dword
[3])) ^ rcon
;
315 dst
->dword
[2] = sbox4(tmp
.dword
[3]);
316 dst
->dword
[1] = rot(sbox4(tmp
.dword
[1])) ^ rcon
;
317 dst
->dword
[0] = sbox4(tmp
.dword
[1]);
320 IMB_DLL_LOCAL
void emulate_AESENC(union xmm_reg
*dst
,
321 const union xmm_reg
*src
)
323 union xmm_reg tmp
= *dst
;
325 shift_rows(&tmp
, &tmp
);
326 substitute_bytes(&tmp
, &tmp
);
327 mix_columns(&tmp
, &tmp
);
328 xor_xmm(dst
, &tmp
, src
);
331 IMB_DLL_LOCAL
void emulate_AESENCLAST(union xmm_reg
*dst
,
332 const union xmm_reg
*src
)
334 union xmm_reg tmp
= *dst
;
336 shift_rows(&tmp
, &tmp
);
337 substitute_bytes(&tmp
, &tmp
);
338 xor_xmm(dst
, &tmp
, src
);
341 IMB_DLL_LOCAL
void emulate_AESDEC(union xmm_reg
*dst
,
342 const union xmm_reg
*src
)
344 union xmm_reg tmp
= *dst
;
346 inverse_shift_rows(&tmp
, &tmp
);
347 inverse_substitute_bytes(&tmp
, &tmp
);
348 inverse_mix_columns(&tmp
, &tmp
);
349 xor_xmm(dst
, &tmp
, src
);
352 IMB_DLL_LOCAL
void emulate_AESDECLAST(union xmm_reg
*dst
,
353 const union xmm_reg
*src
)
355 union xmm_reg tmp
= *dst
;
357 inverse_shift_rows(&tmp
, &tmp
);
358 inverse_substitute_bytes(&tmp
, &tmp
);
359 xor_xmm(dst
, &tmp
, src
);
362 IMB_DLL_LOCAL
void emulate_AESIMC(union xmm_reg
*dst
,
363 const union xmm_reg
*src
)
365 inverse_mix_columns(dst
, src
);