]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/arm64/crypto/aes-ce-ccm-core.S
arm64/crypto: issue aese/aesmc instructions in pairs
[mirror_ubuntu-artful-kernel.git] / arch / arm64 / crypto / aes-ce-ccm-core.S
CommitLineData
a3fd8210
AB
1/*
2 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
3 *
4 * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12
13 .text
14 .arch armv8-a+crypto
15
16 /*
17 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
18 * u32 *macp, u8 const rk[], u32 rounds);
19 */
20ENTRY(ce_aes_ccm_auth_data)
21 ldr w8, [x3] /* leftover from prev round? */
22 ld1 {v0.2d}, [x0] /* load mac */
23 cbz w8, 1f
24 sub w8, w8, #16
25 eor v1.16b, v1.16b, v1.16b
260: ldrb w7, [x1], #1 /* get 1 byte of input */
27 subs w2, w2, #1
28 add w8, w8, #1
29 ins v1.b[0], w7
30 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
31 beq 8f /* out of input? */
32 cbnz w8, 0b
33 eor v0.16b, v0.16b, v1.16b
341: ld1 {v3.2d}, [x4] /* load first round key */
35 prfm pldl1strm, [x1]
36 cmp w5, #12 /* which key size? */
37 add x6, x4, #16
38 sub w7, w5, #2 /* modified # of rounds */
39 bmi 2f
40 bne 5f
41 mov v5.16b, v3.16b
42 b 4f
432: mov v4.16b, v3.16b
44 ld1 {v5.2d}, [x6], #16 /* load 2nd round key */
453: aese v0.16b, v4.16b
46 aesmc v0.16b, v0.16b
474: ld1 {v3.2d}, [x6], #16 /* load next round key */
48 aese v0.16b, v5.16b
49 aesmc v0.16b, v0.16b
505: ld1 {v4.2d}, [x6], #16 /* load next round key */
51 subs w7, w7, #3
52 aese v0.16b, v3.16b
53 aesmc v0.16b, v0.16b
54 ld1 {v5.2d}, [x6], #16 /* load next round key */
55 bpl 3b
56 aese v0.16b, v4.16b
57 subs w2, w2, #16 /* last data? */
58 eor v0.16b, v0.16b, v5.16b /* final round */
59 bmi 6f
60 ld1 {v1.16b}, [x1], #16 /* load next input block */
61 eor v0.16b, v0.16b, v1.16b /* xor with mac */
62 bne 1b
636: st1 {v0.2d}, [x0] /* store mac */
64 beq 10f
65 adds w2, w2, #16
66 beq 10f
67 mov w8, w2
687: ldrb w7, [x1], #1
69 umov w6, v0.b[0]
70 eor w6, w6, w7
71 strb w6, [x0], #1
72 subs w2, w2, #1
73 beq 10f
74 ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
75 b 7b
768: mov w7, w8
77 add w8, w8, #16
789: ext v1.16b, v1.16b, v1.16b, #1
79 adds w7, w7, #1
80 bne 9b
81 eor v0.16b, v0.16b, v1.16b
82 st1 {v0.2d}, [x0]
8310: str w8, [x3]
84 ret
85ENDPROC(ce_aes_ccm_auth_data)
86
87 /*
88 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
89 * u32 rounds);
90 */
91ENTRY(ce_aes_ccm_final)
92 ld1 {v3.2d}, [x2], #16 /* load first round key */
93 ld1 {v0.2d}, [x0] /* load mac */
94 cmp w3, #12 /* which key size? */
95 sub w3, w3, #2 /* modified # of rounds */
96 ld1 {v1.2d}, [x1] /* load 1st ctriv */
97 bmi 0f
98 bne 3f
99 mov v5.16b, v3.16b
100 b 2f
1010: mov v4.16b, v3.16b
1021: ld1 {v5.2d}, [x2], #16 /* load next round key */
103 aese v0.16b, v4.16b
a3fd8210 104 aesmc v0.16b, v0.16b
4a97abd4 105 aese v1.16b, v4.16b
a3fd8210
AB
106 aesmc v1.16b, v1.16b
1072: ld1 {v3.2d}, [x2], #16 /* load next round key */
108 aese v0.16b, v5.16b
a3fd8210 109 aesmc v0.16b, v0.16b
4a97abd4 110 aese v1.16b, v5.16b
a3fd8210
AB
111 aesmc v1.16b, v1.16b
1123: ld1 {v4.2d}, [x2], #16 /* load next round key */
113 subs w3, w3, #3
114 aese v0.16b, v3.16b
a3fd8210 115 aesmc v0.16b, v0.16b
4a97abd4 116 aese v1.16b, v3.16b
a3fd8210
AB
117 aesmc v1.16b, v1.16b
118 bpl 1b
119 aese v0.16b, v4.16b
120 aese v1.16b, v4.16b
121 /* final round key cancels out */
122 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
123 st1 {v0.2d}, [x0] /* store result */
124 ret
125ENDPROC(ce_aes_ccm_final)
126
127 .macro aes_ccm_do_crypt,enc
128 ldr x8, [x6, #8] /* load lower ctr */
129 ld1 {v0.2d}, [x5] /* load mac */
130 rev x8, x8 /* keep swabbed ctr in reg */
1310: /* outer loop */
132 ld1 {v1.1d}, [x6] /* load upper ctr */
133 prfm pldl1strm, [x1]
134 add x8, x8, #1
135 rev x9, x8
136 cmp w4, #12 /* which key size? */
137 sub w7, w4, #2 /* get modified # of rounds */
138 ins v1.d[1], x9 /* no carry in lower ctr */
139 ld1 {v3.2d}, [x3] /* load first round key */
140 add x10, x3, #16
141 bmi 1f
142 bne 4f
143 mov v5.16b, v3.16b
144 b 3f
1451: mov v4.16b, v3.16b
146 ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
1472: /* inner loop: 3 rounds, 2x interleaved */
148 aese v0.16b, v4.16b
a3fd8210 149 aesmc v0.16b, v0.16b
4a97abd4 150 aese v1.16b, v4.16b
a3fd8210
AB
151 aesmc v1.16b, v1.16b
1523: ld1 {v3.2d}, [x10], #16 /* load next round key */
153 aese v0.16b, v5.16b
a3fd8210 154 aesmc v0.16b, v0.16b
4a97abd4 155 aese v1.16b, v5.16b
a3fd8210
AB
156 aesmc v1.16b, v1.16b
1574: ld1 {v4.2d}, [x10], #16 /* load next round key */
158 subs w7, w7, #3
159 aese v0.16b, v3.16b
a3fd8210 160 aesmc v0.16b, v0.16b
4a97abd4 161 aese v1.16b, v3.16b
a3fd8210
AB
162 aesmc v1.16b, v1.16b
163 ld1 {v5.2d}, [x10], #16 /* load next round key */
164 bpl 2b
165 aese v0.16b, v4.16b
166 aese v1.16b, v4.16b
167 subs w2, w2, #16
168 bmi 6f /* partial block? */
169 ld1 {v2.16b}, [x1], #16 /* load next input block */
170 .if \enc == 1
171 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
172 eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
173 .else
174 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
175 eor v1.16b, v2.16b, v5.16b /* final round enc */
176 .endif
177 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
178 st1 {v1.16b}, [x0], #16 /* write output block */
179 bne 0b
180 rev x8, x8
181 st1 {v0.2d}, [x5] /* store mac */
182 str x8, [x6, #8] /* store lsb end of ctr (BE) */
1835: ret
184
1856: eor v0.16b, v0.16b, v5.16b /* final round mac */
186 eor v1.16b, v1.16b, v5.16b /* final round enc */
187 st1 {v0.2d}, [x5] /* store mac */
188 add w2, w2, #16 /* process partial tail block */
1897: ldrb w9, [x1], #1 /* get 1 byte of input */
190 umov w6, v1.b[0] /* get top crypted ctr byte */
191 umov w7, v0.b[0] /* get top mac byte */
192 .if \enc == 1
193 eor w7, w7, w9
194 eor w9, w9, w6
195 .else
196 eor w9, w9, w6
197 eor w7, w7, w9
198 .endif
199 strb w9, [x0], #1 /* store out byte */
200 strb w7, [x5], #1 /* store mac byte */
201 subs w2, w2, #1
202 beq 5b
203 ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
204 ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
205 b 7b
206 .endm
207
208 /*
209 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
210 * u8 const rk[], u32 rounds, u8 mac[],
211 * u8 ctr[]);
212 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
213 * u8 const rk[], u32 rounds, u8 mac[],
214 * u8 ctr[]);
215 */
216ENTRY(ce_aes_ccm_encrypt)
217 aes_ccm_do_crypt 1
218ENDPROC(ce_aes_ccm_encrypt)
219
220ENTRY(ce_aes_ccm_decrypt)
221 aes_ccm_do_crypt 0
222ENDPROC(ce_aes_ccm_decrypt)