]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/arm64/crypto/aes-modes.S
Merge branches 'iommu/fixes', 'arm/exynos', 'arm/renesas', 'arm/smmu', 'arm/mediatek...
[mirror_ubuntu-artful-kernel.git] / arch / arm64 / crypto / aes-modes.S
CommitLineData
49788fe2
AB
1/*
2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3 *
4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/* included by aes-ce.S and aes-neon.S */
12
13 .text
14 .align 4
15
16/*
17 * There are several ways to instantiate this code:
18 * - no interleave, all inline
19 * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
20 * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
21 * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
22 * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
23 *
24 * Macros imported by this code:
25 * - enc_prepare - setup NEON registers for encryption
26 * - dec_prepare - setup NEON registers for decryption
27 * - enc_switch_key - change to new key after having prepared for encryption
28 * - encrypt_block - encrypt a single block
29 * - decrypt block - decrypt a single block
30 * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
31 * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
32 * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
33 * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
34 */
35
36#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
37#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp
38#define FRAME_POP ldp x29, x30, [sp],#16
39
40#if INTERLEAVE == 2
41
42aes_encrypt_block2x:
43 encrypt_block2x v0, v1, w3, x2, x6, w7
44 ret
45ENDPROC(aes_encrypt_block2x)
46
47aes_decrypt_block2x:
48 decrypt_block2x v0, v1, w3, x2, x6, w7
49 ret
50ENDPROC(aes_decrypt_block2x)
51
52#elif INTERLEAVE == 4
53
54aes_encrypt_block4x:
55 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
56 ret
57ENDPROC(aes_encrypt_block4x)
58
59aes_decrypt_block4x:
60 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
61 ret
62ENDPROC(aes_decrypt_block4x)
63
64#else
65#error INTERLEAVE should equal 2 or 4
66#endif
67
68 .macro do_encrypt_block2x
69 bl aes_encrypt_block2x
70 .endm
71
72 .macro do_decrypt_block2x
73 bl aes_decrypt_block2x
74 .endm
75
76 .macro do_encrypt_block4x
77 bl aes_encrypt_block4x
78 .endm
79
80 .macro do_decrypt_block4x
81 bl aes_decrypt_block4x
82 .endm
83
84#else
85#define FRAME_PUSH
86#define FRAME_POP
87
88 .macro do_encrypt_block2x
89 encrypt_block2x v0, v1, w3, x2, x6, w7
90 .endm
91
92 .macro do_decrypt_block2x
93 decrypt_block2x v0, v1, w3, x2, x6, w7
94 .endm
95
96 .macro do_encrypt_block4x
97 encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
98 .endm
99
100 .macro do_decrypt_block4x
101 decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
102 .endm
103
104#endif
105
106 /*
107 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
108 * int blocks, int first)
109 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
110 * int blocks, int first)
111 */
112
113AES_ENTRY(aes_ecb_encrypt)
114 FRAME_PUSH
115 cbz w5, .LecbencloopNx
116
117 enc_prepare w3, x2, x5
118
119.LecbencloopNx:
120#if INTERLEAVE >= 2
121 subs w4, w4, #INTERLEAVE
122 bmi .Lecbenc1x
123#if INTERLEAVE == 2
124 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
125 do_encrypt_block2x
126 st1 {v0.16b-v1.16b}, [x0], #32
127#else
128 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
129 do_encrypt_block4x
130 st1 {v0.16b-v3.16b}, [x0], #64
131#endif
132 b .LecbencloopNx
133.Lecbenc1x:
134 adds w4, w4, #INTERLEAVE
135 beq .Lecbencout
136#endif
137.Lecbencloop:
138 ld1 {v0.16b}, [x1], #16 /* get next pt block */
139 encrypt_block v0, w3, x2, x5, w6
140 st1 {v0.16b}, [x0], #16
141 subs w4, w4, #1
142 bne .Lecbencloop
143.Lecbencout:
144 FRAME_POP
145 ret
146AES_ENDPROC(aes_ecb_encrypt)
147
148
149AES_ENTRY(aes_ecb_decrypt)
150 FRAME_PUSH
151 cbz w5, .LecbdecloopNx
152
153 dec_prepare w3, x2, x5
154
155.LecbdecloopNx:
156#if INTERLEAVE >= 2
157 subs w4, w4, #INTERLEAVE
158 bmi .Lecbdec1x
159#if INTERLEAVE == 2
160 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
161 do_decrypt_block2x
162 st1 {v0.16b-v1.16b}, [x0], #32
163#else
164 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
165 do_decrypt_block4x
166 st1 {v0.16b-v3.16b}, [x0], #64
167#endif
168 b .LecbdecloopNx
169.Lecbdec1x:
170 adds w4, w4, #INTERLEAVE
171 beq .Lecbdecout
172#endif
173.Lecbdecloop:
174 ld1 {v0.16b}, [x1], #16 /* get next ct block */
175 decrypt_block v0, w3, x2, x5, w6
176 st1 {v0.16b}, [x0], #16
177 subs w4, w4, #1
178 bne .Lecbdecloop
179.Lecbdecout:
180 FRAME_POP
181 ret
182AES_ENDPROC(aes_ecb_decrypt)
183
184
185 /*
186 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
187 * int blocks, u8 iv[], int first)
188 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
189 * int blocks, u8 iv[], int first)
190 */
191
192AES_ENTRY(aes_cbc_encrypt)
193 cbz w6, .Lcbcencloop
194
195 ld1 {v0.16b}, [x5] /* get iv */
11e3b725 196 enc_prepare w3, x2, x6
49788fe2
AB
197
198.Lcbcencloop:
199 ld1 {v1.16b}, [x1], #16 /* get next pt block */
200 eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
11e3b725 201 encrypt_block v0, w3, x2, x6, w7
49788fe2
AB
202 st1 {v0.16b}, [x0], #16
203 subs w4, w4, #1
204 bne .Lcbcencloop
11e3b725 205 st1 {v0.16b}, [x5] /* return iv */
49788fe2
AB
206 ret
207AES_ENDPROC(aes_cbc_encrypt)
208
209
210AES_ENTRY(aes_cbc_decrypt)
211 FRAME_PUSH
212 cbz w6, .LcbcdecloopNx
213
214 ld1 {v7.16b}, [x5] /* get iv */
11e3b725 215 dec_prepare w3, x2, x6
49788fe2
AB
216
217.LcbcdecloopNx:
218#if INTERLEAVE >= 2
219 subs w4, w4, #INTERLEAVE
220 bmi .Lcbcdec1x
221#if INTERLEAVE == 2
222 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
223 mov v2.16b, v0.16b
224 mov v3.16b, v1.16b
225 do_decrypt_block2x
226 eor v0.16b, v0.16b, v7.16b
227 eor v1.16b, v1.16b, v2.16b
228 mov v7.16b, v3.16b
229 st1 {v0.16b-v1.16b}, [x0], #32
230#else
231 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
232 mov v4.16b, v0.16b
233 mov v5.16b, v1.16b
234 mov v6.16b, v2.16b
235 do_decrypt_block4x
236 sub x1, x1, #16
237 eor v0.16b, v0.16b, v7.16b
238 eor v1.16b, v1.16b, v4.16b
239 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
240 eor v2.16b, v2.16b, v5.16b
241 eor v3.16b, v3.16b, v6.16b
242 st1 {v0.16b-v3.16b}, [x0], #64
243#endif
244 b .LcbcdecloopNx
245.Lcbcdec1x:
246 adds w4, w4, #INTERLEAVE
247 beq .Lcbcdecout
248#endif
249.Lcbcdecloop:
250 ld1 {v1.16b}, [x1], #16 /* get next ct block */
251 mov v0.16b, v1.16b /* ...and copy to v0 */
11e3b725 252 decrypt_block v0, w3, x2, x6, w7
49788fe2
AB
253 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
254 mov v7.16b, v1.16b /* ct is next iv */
255 st1 {v0.16b}, [x0], #16
256 subs w4, w4, #1
257 bne .Lcbcdecloop
258.Lcbcdecout:
259 FRAME_POP
11e3b725 260 st1 {v7.16b}, [x5] /* return iv */
49788fe2
AB
261 ret
262AES_ENDPROC(aes_cbc_decrypt)
263
264
265 /*
266 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
267 * int blocks, u8 ctr[], int first)
268 */
269
270AES_ENTRY(aes_ctr_encrypt)
271 FRAME_PUSH
11e3b725 272 cbz w6, .Lctrnotfirst /* 1st time around? */
49788fe2
AB
273 enc_prepare w3, x2, x6
274 ld1 {v4.16b}, [x5]
11e3b725
AB
275
276.Lctrnotfirst:
277 umov x8, v4.d[1] /* keep swabbed ctr in reg */
278 rev x8, x8
49788fe2 279#if INTERLEAVE >= 2
11e3b725 280 cmn w8, w4 /* 32 bit overflow? */
49788fe2
AB
281 bcs .Lctrloop
282.LctrloopNx:
283 subs w4, w4, #INTERLEAVE
284 bmi .Lctr1x
285#if INTERLEAVE == 2
286 mov v0.8b, v4.8b
287 mov v1.8b, v4.8b
11e3b725
AB
288 rev x7, x8
289 add x8, x8, #1
49788fe2 290 ins v0.d[1], x7
11e3b725
AB
291 rev x7, x8
292 add x8, x8, #1
49788fe2
AB
293 ins v1.d[1], x7
294 ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
295 do_encrypt_block2x
296 eor v0.16b, v0.16b, v2.16b
297 eor v1.16b, v1.16b, v3.16b
298 st1 {v0.16b-v1.16b}, [x0], #32
299#else
300 ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
11e3b725 301 dup v7.4s, w8
49788fe2
AB
302 mov v0.16b, v4.16b
303 add v7.4s, v7.4s, v8.4s
304 mov v1.16b, v4.16b
305 rev32 v8.16b, v7.16b
306 mov v2.16b, v4.16b
307 mov v3.16b, v4.16b
308 mov v1.s[3], v8.s[0]
309 mov v2.s[3], v8.s[1]
310 mov v3.s[3], v8.s[2]
311 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
312 do_encrypt_block4x
313 eor v0.16b, v5.16b, v0.16b
314 ld1 {v5.16b}, [x1], #16 /* get 1 input block */
315 eor v1.16b, v6.16b, v1.16b
316 eor v2.16b, v7.16b, v2.16b
317 eor v3.16b, v5.16b, v3.16b
318 st1 {v0.16b-v3.16b}, [x0], #64
11e3b725 319 add x8, x8, #INTERLEAVE
49788fe2 320#endif
11e3b725 321 rev x7, x8
49788fe2 322 ins v4.d[1], x7
11e3b725 323 cbz w4, .Lctrout
49788fe2 324 b .LctrloopNx
49788fe2
AB
325.Lctr1x:
326 adds w4, w4, #INTERLEAVE
327 beq .Lctrout
328#endif
329.Lctrloop:
330 mov v0.16b, v4.16b
331 encrypt_block v0, w3, x2, x6, w7
11e3b725
AB
332
333 adds x8, x8, #1 /* increment BE ctr */
334 rev x7, x8
335 ins v4.d[1], x7
336 bcs .Lctrcarry /* overflow? */
337
338.Lctrcarrydone:
49788fe2
AB
339 subs w4, w4, #1
340 bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
341 ld1 {v3.16b}, [x1], #16
342 eor v3.16b, v0.16b, v3.16b
343 st1 {v3.16b}, [x0], #16
11e3b725
AB
344 bne .Lctrloop
345
346.Lctrout:
347 st1 {v4.16b}, [x5] /* return next CTR value */
348 FRAME_POP
349 ret
350
49788fe2
AB
351.Lctrhalfblock:
352 ld1 {v3.8b}, [x1]
353 eor v3.8b, v0.8b, v3.8b
354 st1 {v3.8b}, [x0]
49788fe2
AB
355 FRAME_POP
356 ret
11e3b725
AB
357
358.Lctrcarry:
359 umov x7, v4.d[0] /* load upper word of ctr */
360 rev x7, x7 /* ... to handle the carry */
361 add x7, x7, #1
362 rev x7, x7
363 ins v4.d[0], x7
364 b .Lctrcarrydone
49788fe2
AB
365AES_ENDPROC(aes_ctr_encrypt)
366 .ltorg
367
368
369 /*
370 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
371 * int blocks, u8 const rk2[], u8 iv[], int first)
372 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
373 * int blocks, u8 const rk2[], u8 iv[], int first)
374 */
375
376 .macro next_tweak, out, in, const, tmp
377 sshr \tmp\().2d, \in\().2d, #63
378 and \tmp\().16b, \tmp\().16b, \const\().16b
379 add \out\().2d, \in\().2d, \in\().2d
380 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
381 eor \out\().16b, \out\().16b, \tmp\().16b
382 .endm
383
384.Lxts_mul_x:
caf4b9e2
AB
385CPU_LE( .quad 1, 0x87 )
386CPU_BE( .quad 0x87, 1 )
49788fe2
AB
387
388AES_ENTRY(aes_xts_encrypt)
389 FRAME_PUSH
390 cbz w7, .LxtsencloopNx
391
392 ld1 {v4.16b}, [x6]
393 enc_prepare w3, x5, x6
394 encrypt_block v4, w3, x5, x6, w7 /* first tweak */
395 enc_switch_key w3, x2, x6
396 ldr q7, .Lxts_mul_x
397 b .LxtsencNx
398
399.LxtsencloopNx:
400 ldr q7, .Lxts_mul_x
401 next_tweak v4, v4, v7, v8
402.LxtsencNx:
403#if INTERLEAVE >= 2
404 subs w4, w4, #INTERLEAVE
405 bmi .Lxtsenc1x
406#if INTERLEAVE == 2
407 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
408 next_tweak v5, v4, v7, v8
409 eor v0.16b, v0.16b, v4.16b
410 eor v1.16b, v1.16b, v5.16b
411 do_encrypt_block2x
412 eor v0.16b, v0.16b, v4.16b
413 eor v1.16b, v1.16b, v5.16b
414 st1 {v0.16b-v1.16b}, [x0], #32
415 cbz w4, .LxtsencoutNx
416 next_tweak v4, v5, v7, v8
417 b .LxtsencNx
418.LxtsencoutNx:
419 mov v4.16b, v5.16b
420 b .Lxtsencout
421#else
422 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
423 next_tweak v5, v4, v7, v8
424 eor v0.16b, v0.16b, v4.16b
425 next_tweak v6, v5, v7, v8
426 eor v1.16b, v1.16b, v5.16b
427 eor v2.16b, v2.16b, v6.16b
428 next_tweak v7, v6, v7, v8
429 eor v3.16b, v3.16b, v7.16b
430 do_encrypt_block4x
431 eor v3.16b, v3.16b, v7.16b
432 eor v0.16b, v0.16b, v4.16b
433 eor v1.16b, v1.16b, v5.16b
434 eor v2.16b, v2.16b, v6.16b
435 st1 {v0.16b-v3.16b}, [x0], #64
436 mov v4.16b, v7.16b
437 cbz w4, .Lxtsencout
438 b .LxtsencloopNx
439#endif
440.Lxtsenc1x:
441 adds w4, w4, #INTERLEAVE
442 beq .Lxtsencout
443#endif
444.Lxtsencloop:
445 ld1 {v1.16b}, [x1], #16
446 eor v0.16b, v1.16b, v4.16b
447 encrypt_block v0, w3, x2, x6, w7
448 eor v0.16b, v0.16b, v4.16b
449 st1 {v0.16b}, [x0], #16
450 subs w4, w4, #1
451 beq .Lxtsencout
452 next_tweak v4, v4, v7, v8
453 b .Lxtsencloop
454.Lxtsencout:
455 FRAME_POP
456 ret
457AES_ENDPROC(aes_xts_encrypt)
458
459
460AES_ENTRY(aes_xts_decrypt)
461 FRAME_PUSH
462 cbz w7, .LxtsdecloopNx
463
464 ld1 {v4.16b}, [x6]
465 enc_prepare w3, x5, x6
466 encrypt_block v4, w3, x5, x6, w7 /* first tweak */
467 dec_prepare w3, x2, x6
468 ldr q7, .Lxts_mul_x
469 b .LxtsdecNx
470
471.LxtsdecloopNx:
472 ldr q7, .Lxts_mul_x
473 next_tweak v4, v4, v7, v8
474.LxtsdecNx:
475#if INTERLEAVE >= 2
476 subs w4, w4, #INTERLEAVE
477 bmi .Lxtsdec1x
478#if INTERLEAVE == 2
479 ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
480 next_tweak v5, v4, v7, v8
481 eor v0.16b, v0.16b, v4.16b
482 eor v1.16b, v1.16b, v5.16b
483 do_decrypt_block2x
484 eor v0.16b, v0.16b, v4.16b
485 eor v1.16b, v1.16b, v5.16b
486 st1 {v0.16b-v1.16b}, [x0], #32
487 cbz w4, .LxtsdecoutNx
488 next_tweak v4, v5, v7, v8
489 b .LxtsdecNx
490.LxtsdecoutNx:
491 mov v4.16b, v5.16b
492 b .Lxtsdecout
493#else
494 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
495 next_tweak v5, v4, v7, v8
496 eor v0.16b, v0.16b, v4.16b
497 next_tweak v6, v5, v7, v8
498 eor v1.16b, v1.16b, v5.16b
499 eor v2.16b, v2.16b, v6.16b
500 next_tweak v7, v6, v7, v8
501 eor v3.16b, v3.16b, v7.16b
502 do_decrypt_block4x
503 eor v3.16b, v3.16b, v7.16b
504 eor v0.16b, v0.16b, v4.16b
505 eor v1.16b, v1.16b, v5.16b
506 eor v2.16b, v2.16b, v6.16b
507 st1 {v0.16b-v3.16b}, [x0], #64
508 mov v4.16b, v7.16b
509 cbz w4, .Lxtsdecout
510 b .LxtsdecloopNx
511#endif
512.Lxtsdec1x:
513 adds w4, w4, #INTERLEAVE
514 beq .Lxtsdecout
515#endif
516.Lxtsdecloop:
517 ld1 {v1.16b}, [x1], #16
518 eor v0.16b, v1.16b, v4.16b
519 decrypt_block v0, w3, x2, x6, w7
520 eor v0.16b, v0.16b, v4.16b
521 st1 {v0.16b}, [x0], #16
522 subs w4, w4, #1
523 beq .Lxtsdecout
524 next_tweak v4, v4, v7, v8
525 b .Lxtsdecloop
526.Lxtsdecout:
527 FRAME_POP
528 ret
529AES_ENDPROC(aes_xts_decrypt)