]>
Commit | Line | Data |
---|---|---|
a3fd8210 AB |
1 | /* |
2 | * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions | |
3 | * | |
4 | * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
10 | ||
11 | #include <linux/linkage.h> | |
12 | ||
13 | .text | |
14 | .arch armv8-a+crypto | |
15 | ||
16 | /* | |
17 | * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes, | |
18 | * u32 *macp, u8 const rk[], u32 rounds); | |
19 | */ | |
20 | ENTRY(ce_aes_ccm_auth_data) | |
21 | ldr w8, [x3] /* leftover from prev round? */ | |
22 | ld1 {v0.2d}, [x0] /* load mac */ | |
23 | cbz w8, 1f | |
24 | sub w8, w8, #16 | |
25 | eor v1.16b, v1.16b, v1.16b | |
26 | 0: ldrb w7, [x1], #1 /* get 1 byte of input */ | |
27 | subs w2, w2, #1 | |
28 | add w8, w8, #1 | |
29 | ins v1.b[0], w7 | |
30 | ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ | |
31 | beq 8f /* out of input? */ | |
32 | cbnz w8, 0b | |
33 | eor v0.16b, v0.16b, v1.16b | |
34 | 1: ld1 {v3.2d}, [x4] /* load first round key */ | |
35 | prfm pldl1strm, [x1] | |
36 | cmp w5, #12 /* which key size? */ | |
37 | add x6, x4, #16 | |
38 | sub w7, w5, #2 /* modified # of rounds */ | |
39 | bmi 2f | |
40 | bne 5f | |
41 | mov v5.16b, v3.16b | |
42 | b 4f | |
43 | 2: mov v4.16b, v3.16b | |
44 | ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ | |
45 | 3: aese v0.16b, v4.16b | |
46 | aesmc v0.16b, v0.16b | |
47 | 4: ld1 {v3.2d}, [x6], #16 /* load next round key */ | |
48 | aese v0.16b, v5.16b | |
49 | aesmc v0.16b, v0.16b | |
50 | 5: ld1 {v4.2d}, [x6], #16 /* load next round key */ | |
51 | subs w7, w7, #3 | |
52 | aese v0.16b, v3.16b | |
53 | aesmc v0.16b, v0.16b | |
54 | ld1 {v5.2d}, [x6], #16 /* load next round key */ | |
55 | bpl 3b | |
56 | aese v0.16b, v4.16b | |
57 | subs w2, w2, #16 /* last data? */ | |
58 | eor v0.16b, v0.16b, v5.16b /* final round */ | |
59 | bmi 6f | |
60 | ld1 {v1.16b}, [x1], #16 /* load next input block */ | |
61 | eor v0.16b, v0.16b, v1.16b /* xor with mac */ | |
62 | bne 1b | |
63 | 6: st1 {v0.2d}, [x0] /* store mac */ | |
64 | beq 10f | |
65 | adds w2, w2, #16 | |
66 | beq 10f | |
67 | mov w8, w2 | |
68 | 7: ldrb w7, [x1], #1 | |
69 | umov w6, v0.b[0] | |
70 | eor w6, w6, w7 | |
71 | strb w6, [x0], #1 | |
72 | subs w2, w2, #1 | |
73 | beq 10f | |
74 | ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ | |
75 | b 7b | |
76 | 8: mov w7, w8 | |
77 | add w8, w8, #16 | |
78 | 9: ext v1.16b, v1.16b, v1.16b, #1 | |
79 | adds w7, w7, #1 | |
80 | bne 9b | |
81 | eor v0.16b, v0.16b, v1.16b | |
82 | st1 {v0.2d}, [x0] | |
83 | 10: str w8, [x3] | |
84 | ret | |
85 | ENDPROC(ce_aes_ccm_auth_data) | |
86 | ||
87 | /* | |
88 | * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[], | |
89 | * u32 rounds); | |
90 | */ | |
91 | ENTRY(ce_aes_ccm_final) | |
92 | ld1 {v3.2d}, [x2], #16 /* load first round key */ | |
93 | ld1 {v0.2d}, [x0] /* load mac */ | |
94 | cmp w3, #12 /* which key size? */ | |
95 | sub w3, w3, #2 /* modified # of rounds */ | |
96 | ld1 {v1.2d}, [x1] /* load 1st ctriv */ | |
97 | bmi 0f | |
98 | bne 3f | |
99 | mov v5.16b, v3.16b | |
100 | b 2f | |
101 | 0: mov v4.16b, v3.16b | |
102 | 1: ld1 {v5.2d}, [x2], #16 /* load next round key */ | |
103 | aese v0.16b, v4.16b | |
a3fd8210 | 104 | aesmc v0.16b, v0.16b |
4a97abd4 | 105 | aese v1.16b, v4.16b |
a3fd8210 AB |
106 | aesmc v1.16b, v1.16b |
107 | 2: ld1 {v3.2d}, [x2], #16 /* load next round key */ | |
108 | aese v0.16b, v5.16b | |
a3fd8210 | 109 | aesmc v0.16b, v0.16b |
4a97abd4 | 110 | aese v1.16b, v5.16b |
a3fd8210 AB |
111 | aesmc v1.16b, v1.16b |
112 | 3: ld1 {v4.2d}, [x2], #16 /* load next round key */ | |
113 | subs w3, w3, #3 | |
114 | aese v0.16b, v3.16b | |
a3fd8210 | 115 | aesmc v0.16b, v0.16b |
4a97abd4 | 116 | aese v1.16b, v3.16b |
a3fd8210 AB |
117 | aesmc v1.16b, v1.16b |
118 | bpl 1b | |
119 | aese v0.16b, v4.16b | |
120 | aese v1.16b, v4.16b | |
121 | /* final round key cancels out */ | |
122 | eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ | |
123 | st1 {v0.2d}, [x0] /* store result */ | |
124 | ret | |
125 | ENDPROC(ce_aes_ccm_final) | |
126 | ||
127 | .macro aes_ccm_do_crypt,enc | |
128 | ldr x8, [x6, #8] /* load lower ctr */ | |
129 | ld1 {v0.2d}, [x5] /* load mac */ | |
130 | rev x8, x8 /* keep swabbed ctr in reg */ | |
131 | 0: /* outer loop */ | |
132 | ld1 {v1.1d}, [x6] /* load upper ctr */ | |
133 | prfm pldl1strm, [x1] | |
134 | add x8, x8, #1 | |
135 | rev x9, x8 | |
136 | cmp w4, #12 /* which key size? */ | |
137 | sub w7, w4, #2 /* get modified # of rounds */ | |
138 | ins v1.d[1], x9 /* no carry in lower ctr */ | |
139 | ld1 {v3.2d}, [x3] /* load first round key */ | |
140 | add x10, x3, #16 | |
141 | bmi 1f | |
142 | bne 4f | |
143 | mov v5.16b, v3.16b | |
144 | b 3f | |
145 | 1: mov v4.16b, v3.16b | |
146 | ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ | |
147 | 2: /* inner loop: 3 rounds, 2x interleaved */ | |
148 | aese v0.16b, v4.16b | |
a3fd8210 | 149 | aesmc v0.16b, v0.16b |
4a97abd4 | 150 | aese v1.16b, v4.16b |
a3fd8210 AB |
151 | aesmc v1.16b, v1.16b |
152 | 3: ld1 {v3.2d}, [x10], #16 /* load next round key */ | |
153 | aese v0.16b, v5.16b | |
a3fd8210 | 154 | aesmc v0.16b, v0.16b |
4a97abd4 | 155 | aese v1.16b, v5.16b |
a3fd8210 AB |
156 | aesmc v1.16b, v1.16b |
157 | 4: ld1 {v4.2d}, [x10], #16 /* load next round key */ | |
158 | subs w7, w7, #3 | |
159 | aese v0.16b, v3.16b | |
a3fd8210 | 160 | aesmc v0.16b, v0.16b |
4a97abd4 | 161 | aese v1.16b, v3.16b |
a3fd8210 AB |
162 | aesmc v1.16b, v1.16b |
163 | ld1 {v5.2d}, [x10], #16 /* load next round key */ | |
164 | bpl 2b | |
165 | aese v0.16b, v4.16b | |
166 | aese v1.16b, v4.16b | |
167 | subs w2, w2, #16 | |
168 | bmi 6f /* partial block? */ | |
169 | ld1 {v2.16b}, [x1], #16 /* load next input block */ | |
170 | .if \enc == 1 | |
171 | eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ | |
172 | eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ | |
173 | .else | |
174 | eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */ | |
175 | eor v1.16b, v2.16b, v5.16b /* final round enc */ | |
176 | .endif | |
177 | eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ | |
178 | st1 {v1.16b}, [x0], #16 /* write output block */ | |
179 | bne 0b | |
180 | rev x8, x8 | |
181 | st1 {v0.2d}, [x5] /* store mac */ | |
182 | str x8, [x6, #8] /* store lsb end of ctr (BE) */ | |
183 | 5: ret | |
184 | ||
185 | 6: eor v0.16b, v0.16b, v5.16b /* final round mac */ | |
186 | eor v1.16b, v1.16b, v5.16b /* final round enc */ | |
187 | st1 {v0.2d}, [x5] /* store mac */ | |
188 | add w2, w2, #16 /* process partial tail block */ | |
189 | 7: ldrb w9, [x1], #1 /* get 1 byte of input */ | |
190 | umov w6, v1.b[0] /* get top crypted ctr byte */ | |
191 | umov w7, v0.b[0] /* get top mac byte */ | |
192 | .if \enc == 1 | |
193 | eor w7, w7, w9 | |
194 | eor w9, w9, w6 | |
195 | .else | |
196 | eor w9, w9, w6 | |
197 | eor w7, w7, w9 | |
198 | .endif | |
199 | strb w9, [x0], #1 /* store out byte */ | |
200 | strb w7, [x5], #1 /* store mac byte */ | |
201 | subs w2, w2, #1 | |
202 | beq 5b | |
203 | ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ | |
204 | ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ | |
205 | b 7b | |
206 | .endm | |
207 | ||
208 | /* | |
209 | * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes, | |
210 | * u8 const rk[], u32 rounds, u8 mac[], | |
211 | * u8 ctr[]); | |
212 | * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes, | |
213 | * u8 const rk[], u32 rounds, u8 mac[], | |
214 | * u8 ctr[]); | |
215 | */ | |
216 | ENTRY(ce_aes_ccm_encrypt) | |
217 | aes_ccm_do_crypt 1 | |
218 | ENDPROC(ce_aes_ccm_encrypt) | |
219 | ||
220 | ENTRY(ce_aes_ccm_decrypt) | |
221 | aes_ccm_do_crypt 0 | |
222 | ENDPROC(ce_aes_ccm_decrypt) |