]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/arm64/crypto/aes-neon.S
Merge branch 'overlayfs-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszer...
[mirror_ubuntu-artful-kernel.git] / arch / arm64 / crypto / aes-neon.S
1 /*
2 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
3 *
4 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
13
14 #define AES_ENTRY(func) ENTRY(neon_ ## func)
15 #define AES_ENDPROC(func) ENDPROC(neon_ ## func)
16
17 /* multiply by polynomial 'x' in GF(2^8) */
18 .macro mul_by_x, out, in, temp, const
19 sshr \temp, \in, #7
20 shl \out, \in, #1
21 and \temp, \temp, \const
22 eor \out, \out, \temp
23 .endm
24
25 /* multiply by polynomial 'x^2' in GF(2^8) */
26 .macro mul_by_x2, out, in, temp, const
27 ushr \temp, \in, #6
28 shl \out, \in, #2
29 pmul \temp, \temp, \const
30 eor \out, \out, \temp
31 .endm
32
33 /* preload the entire Sbox */
34 .macro prepare, sbox, shiftrows, temp
35 adr \temp, \sbox
36 movi v12.16b, #0x1b
37 ldr q13, \shiftrows
38 ldr q14, .Lror32by8
39 ld1 {v16.16b-v19.16b}, [\temp], #64
40 ld1 {v20.16b-v23.16b}, [\temp], #64
41 ld1 {v24.16b-v27.16b}, [\temp], #64
42 ld1 {v28.16b-v31.16b}, [\temp]
43 .endm
44
45 /* do preload for encryption */
46 .macro enc_prepare, ignore0, ignore1, temp
47 prepare .LForward_Sbox, .LForward_ShiftRows, \temp
48 .endm
49
50 .macro enc_switch_key, ignore0, ignore1, temp
51 /* do nothing */
52 .endm
53
54 /* do preload for decryption */
55 .macro dec_prepare, ignore0, ignore1, temp
56 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
57 .endm
58
59 /* apply SubBytes transformation using the the preloaded Sbox */
60 .macro sub_bytes, in
61 sub v9.16b, \in\().16b, v15.16b
62 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
63 sub v10.16b, v9.16b, v15.16b
64 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
65 sub v11.16b, v10.16b, v15.16b
66 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
67 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
68 .endm
69
70 /* apply MixColumns transformation */
71 .macro mix_columns, in, enc
72 .if \enc == 0
73 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
74 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
75 eor \in\().16b, \in\().16b, v8.16b
76 rev32 v8.8h, v8.8h
77 eor \in\().16b, \in\().16b, v8.16b
78 .endif
79
80 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
81 rev32 v8.8h, \in\().8h
82 eor v8.16b, v8.16b, v9.16b
83 eor \in\().16b, \in\().16b, v8.16b
84 tbl \in\().16b, {\in\().16b}, v14.16b
85 eor \in\().16b, \in\().16b, v8.16b
86 .endm
87
88 .macro do_block, enc, in, rounds, rk, rkp, i
89 ld1 {v15.4s}, [\rk]
90 add \rkp, \rk, #16
91 mov \i, \rounds
92 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
93 movi v15.16b, #0x40
94 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
95 sub_bytes \in
96 subs \i, \i, #1
97 ld1 {v15.4s}, [\rkp], #16
98 beq 2222f
99 mix_columns \in, \enc
100 b 1111b
101 2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
102 .endm
103
104 .macro encrypt_block, in, rounds, rk, rkp, i
105 do_block 1, \in, \rounds, \rk, \rkp, \i
106 .endm
107
108 .macro decrypt_block, in, rounds, rk, rkp, i
109 do_block 0, \in, \rounds, \rk, \rkp, \i
110 .endm
111
112 /*
113 * Interleaved versions: functionally equivalent to the
114 * ones above, but applied to 2 or 4 AES states in parallel.
115 */
116
117 .macro sub_bytes_2x, in0, in1
118 sub v8.16b, \in0\().16b, v15.16b
119 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
120 sub v9.16b, \in1\().16b, v15.16b
121 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
122 sub v10.16b, v8.16b, v15.16b
123 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
124 sub v11.16b, v9.16b, v15.16b
125 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
126 sub v8.16b, v10.16b, v15.16b
127 tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
128 sub v9.16b, v11.16b, v15.16b
129 tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
130 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
131 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
132 .endm
133
134 .macro sub_bytes_4x, in0, in1, in2, in3
135 sub v8.16b, \in0\().16b, v15.16b
136 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
137 sub v9.16b, \in1\().16b, v15.16b
138 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
139 sub v10.16b, \in2\().16b, v15.16b
140 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
141 sub v11.16b, \in3\().16b, v15.16b
142 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
143 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
144 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
145 sub v8.16b, v8.16b, v15.16b
146 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
147 sub v9.16b, v9.16b, v15.16b
148 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
149 sub v10.16b, v10.16b, v15.16b
150 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
151 sub v11.16b, v11.16b, v15.16b
152 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
153 sub v8.16b, v8.16b, v15.16b
154 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
155 sub v9.16b, v9.16b, v15.16b
156 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
157 sub v10.16b, v10.16b, v15.16b
158 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
159 sub v11.16b, v11.16b, v15.16b
160 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
161 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
162 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
163 .endm
164
165 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
166 sshr \tmp0\().16b, \in0\().16b, #7
167 shl \out0\().16b, \in0\().16b, #1
168 sshr \tmp1\().16b, \in1\().16b, #7
169 and \tmp0\().16b, \tmp0\().16b, \const\().16b
170 shl \out1\().16b, \in1\().16b, #1
171 and \tmp1\().16b, \tmp1\().16b, \const\().16b
172 eor \out0\().16b, \out0\().16b, \tmp0\().16b
173 eor \out1\().16b, \out1\().16b, \tmp1\().16b
174 .endm
175
176 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
177 ushr \tmp0\().16b, \in0\().16b, #6
178 shl \out0\().16b, \in0\().16b, #2
179 ushr \tmp1\().16b, \in1\().16b, #6
180 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
181 shl \out1\().16b, \in1\().16b, #2
182 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
183 eor \out0\().16b, \out0\().16b, \tmp0\().16b
184 eor \out1\().16b, \out1\().16b, \tmp1\().16b
185 .endm
186
187 .macro mix_columns_2x, in0, in1, enc
188 .if \enc == 0
189 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
190 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
191 eor \in0\().16b, \in0\().16b, v8.16b
192 rev32 v8.8h, v8.8h
193 eor \in1\().16b, \in1\().16b, v9.16b
194 rev32 v9.8h, v9.8h
195 eor \in0\().16b, \in0\().16b, v8.16b
196 eor \in1\().16b, \in1\().16b, v9.16b
197 .endif
198
199 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
200 rev32 v10.8h, \in0\().8h
201 rev32 v11.8h, \in1\().8h
202 eor v10.16b, v10.16b, v8.16b
203 eor v11.16b, v11.16b, v9.16b
204 eor \in0\().16b, \in0\().16b, v10.16b
205 eor \in1\().16b, \in1\().16b, v11.16b
206 tbl \in0\().16b, {\in0\().16b}, v14.16b
207 tbl \in1\().16b, {\in1\().16b}, v14.16b
208 eor \in0\().16b, \in0\().16b, v10.16b
209 eor \in1\().16b, \in1\().16b, v11.16b
210 .endm
211
212 .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
213 ld1 {v15.4s}, [\rk]
214 add \rkp, \rk, #16
215 mov \i, \rounds
216 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
217 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
218 movi v15.16b, #0x40
219 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
220 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
221 sub_bytes_2x \in0, \in1
222 subs \i, \i, #1
223 ld1 {v15.4s}, [\rkp], #16
224 beq 2222f
225 mix_columns_2x \in0, \in1, \enc
226 b 1111b
227 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
228 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
229 .endm
230
231 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
232 ld1 {v15.4s}, [\rk]
233 add \rkp, \rk, #16
234 mov \i, \rounds
235 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
236 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
237 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
238 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
239 movi v15.16b, #0x40
240 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
241 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
242 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
243 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
244 sub_bytes_4x \in0, \in1, \in2, \in3
245 subs \i, \i, #1
246 ld1 {v15.4s}, [\rkp], #16
247 beq 2222f
248 mix_columns_2x \in0, \in1, \enc
249 mix_columns_2x \in2, \in3, \enc
250 b 1111b
251 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
252 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
253 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
254 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
255 .endm
256
257 .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
258 do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
259 .endm
260
261 .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
262 do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
263 .endm
264
265 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
266 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
267 .endm
268
269 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
270 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
271 .endm
272
273 #include "aes-modes.S"
274
275 .text
276 .align 6
277 .LForward_Sbox:
278 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
279 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
280 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
281 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
282 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
283 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
284 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
285 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
286 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
287 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
288 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
289 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
290 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
291 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
292 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
293 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
294 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
295 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
296 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
297 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
298 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
299 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
300 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
301 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
302 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
303 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
304 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
305 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
306 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
307 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
308 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
309 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
310
311 .LReverse_Sbox:
312 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
313 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
314 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
315 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
316 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
317 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
318 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
319 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
320 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
321 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
322 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
323 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
324 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
325 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
326 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
327 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
328 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
329 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
330 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
331 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
332 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
333 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
334 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
335 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
336 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
337 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
338 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
339 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
340 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
341 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
342 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
343 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
344
345 .LForward_ShiftRows:
346 .octa 0x0b06010c07020d08030e09040f0a0500
347
348 .LReverse_ShiftRows:
349 .octa 0x0306090c0f0205080b0e0104070a0d00
350
351 .Lror32by8:
352 .octa 0x0c0f0e0d080b0a090407060500030201