]>
Commit | Line | Data |
---|---|---|
1e59de90 TL |
1 | /********************************************************************** |
2 | Copyright(c) 2021 Arm Corporation All rights reserved. | |
3 | ||
4 | Redistribution and use in source and binary forms, with or without | |
5 | modification, are permitted provided that the following conditions | |
6 | are met: | |
7 | * Redistributions of source code must retain the above copyright | |
8 | notice, this list of conditions and the following disclaimer. | |
9 | * Redistributions in binary form must reproduce the above copyright | |
10 | notice, this list of conditions and the following disclaimer in | |
11 | the documentation and/or other materials provided with the | |
12 | distribution. | |
13 | * Neither the name of Arm Corporation nor the names of its | |
14 | contributors may be used to endorse or promote products derived | |
15 | from this software without specific prior written permission. | |
16 | ||
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | **********************************************************************/ | |
29 | .arch armv8-a+crypto | |
30 | .text | |
31 | #include "cbc_common.S" | |
32 | .altmacro | |
33 | .macro _aes_decrypt_round block:req,key:req | |
34 | aesd v\block\().16b,vKey\key\().16b | |
35 | .if \key < 13 | |
36 | aesimc v\block\().16b,v\block\().16b | |
37 | .endif | |
38 | .if \key > 13 | |
39 | .error "erro her" | |
40 | .endif | |
41 | .endm | |
42 | ||
43 | .macro aes_decrypt_round block,reg,key | |
44 | _aes_decrypt_round In\reg\()_\block,\key | |
45 | .endm | |
46 | ||
47 | .macro load_keys first_key | |
48 | .if \first_key == 4 | |
49 | ld1 {vKey4.4s -vKey6.4s},[keys],3*16 | |
50 | .endif | |
51 | .ifc 2 , \first_key | |
52 | ldr qKey2,[keys],1*16 | |
53 | ld1 {vKey3.16b -vKey6.16b},[keys],4*16 | |
54 | .endif | |
55 | .ifc 0 , \first_key | |
56 | ld1 {vKey0.16b -vKey2.16b},[keys],3*16 | |
57 | ld1 {vKey3.16b -vKey6.16b},[keys],4*16 | |
58 | .endif | |
59 | ld1 {vKey7.16b -vKey10.16b},[keys],4*16 | |
60 | ld1 {vKey11.16b-vKey14.16b},[keys],4*16 | |
61 | .endm | |
62 | ||
63 | .macro aes_decrypt_blocks_round blocks,key_idx,key_reg,next_keyreg,first_idx | |
64 | .if \key_idx == 12 | |
65 | ldr q\next_keyreg,[keys],(\first_idx-13)*16 | |
66 | .else | |
67 | ldr q\next_keyreg,[keys],16 | |
68 | .endif | |
69 | n=0 | |
70 | .rept \blocks | |
71 | _aes_decrypt_round %n,\key_reg | |
72 | n=n+1 | |
73 | .endr | |
74 | .endm | |
75 | ||
76 | .macro aes_decrypt_rounds blocks,key_st,key_end,first_idx | |
77 | j=key_st | |
78 | .rept \key_end - \key_st + 1 | |
79 | aes_decrypt_blocks_round \blocks,%j,%(j%2),%((j+1)%2),\first_idx | |
80 | j=j+1 | |
81 | .endr | |
82 | .endm | |
83 | ||
84 | .macro aes_cbc_decrypt_rounds blocks,first_idx,reg,next_reg | |
85 | aes_decrypt_rounds \blocks,\first_idx,12,\first_idx | |
86 | .endm | |
87 | ||
88 | .macro declare_prefix idx,reg,prefix | |
89 | declare_var_vector_reg \prefix\()\idx,\reg | |
90 | .endm | |
91 | ||
92 | .macro mldr reg,block,addr | |
93 | ldr qIn\reg\()_\block,[\addr],16 | |
94 | .endm | |
95 | ||
96 | .macro mldrin reg,blocks,addr | |
97 | .if \blocks == 1 | |
98 | ldr qIn\reg\()_0,[\addr],16 | |
99 | .exitm | |
100 | .endif | |
101 | .if \blocks == 2 | |
102 | ldp qIn\reg\()_0,qIn\reg\()_1,[\addr],2*16 | |
103 | .exitm | |
104 | .endif | |
105 | .if \blocks == 3 | |
106 | ldr qIn\reg\()_0,[\addr],16 | |
107 | ldp qIn\reg\()_1,qIn\reg\()_2,[\addr],2*16 | |
108 | .exitm | |
109 | .endif | |
110 | .if \blocks == 4 | |
111 | ld1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 | |
112 | .exitm | |
113 | .endif | |
114 | .if \blocks == 5 | |
115 | ldr qIn\reg\()_0,[\addr],16 | |
116 | ld1 {vIn\reg\()_1.16b-vIn\reg\()_4.16b},[\addr],4*16 | |
117 | .exitm | |
118 | .endif | |
119 | .if \blocks == 6 | |
120 | ldp qIn\reg\()_0,qIn\reg\()_1,[\addr],2*16 | |
121 | ld1 {vIn\reg\()_2.16b-vIn\reg\()_5.16b},[\addr],4*16 | |
122 | .exitm | |
123 | .endif | |
124 | .if \blocks == 7 | |
125 | ld1 {vIn\reg\()_0.16b-vIn\reg\()_2.16b},[\addr],3*16 | |
126 | ld1 {vIn\reg\()_3.16b-vIn\reg\()_6.16b},[\addr],4*16 | |
127 | .exitm | |
128 | .endif | |
129 | ||
130 | .if \blocks == 8 | |
131 | ld1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 | |
132 | ld1 {vIn\reg\()_4.16b-vIn\reg\()_7.16b},[\addr],4*16 | |
133 | .exitm | |
134 | .endif | |
135 | .if \blocks == 9 | |
136 | ld1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 | |
137 | ld1 {vIn\reg\()_4.16b-vIn\reg\()_7.16b},[\addr],4*16 | |
138 | ldr qIn\reg\()_8,[\addr],16 | |
139 | .exitm | |
140 | .endif | |
141 | .endm | |
142 | ||
143 | .macro mstrout reg,blocks,addr | |
144 | .if \blocks == 1 | |
145 | str qIn\reg\()_0,[\addr],16 | |
146 | .exitm | |
147 | .endif | |
148 | .if \blocks == 2 | |
149 | stp qIn\reg\()_0,qIn\reg\()_1,[\addr],2*16 | |
150 | .exitm | |
151 | .endif | |
152 | .if \blocks == 3 | |
153 | str qIn\reg\()_0,[\addr],16 | |
154 | stp qIn\reg\()_1,qIn\reg\()_2,[\addr],2*16 | |
155 | .exitm | |
156 | .endif | |
157 | .if \blocks == 4 | |
158 | st1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 | |
159 | .exitm | |
160 | .endif | |
161 | .if \blocks == 5 | |
162 | str qIn\reg\()_0,[\addr],16 | |
163 | st1 {vIn\reg\()_1.16b-vIn\reg\()_4.16b},[\addr],4*16 | |
164 | .exitm | |
165 | .endif | |
166 | .if \blocks == 6 | |
167 | stp qIn\reg\()_0,qIn\reg\()_1,[\addr],2*16 | |
168 | st1 {vIn\reg\()_2.16b-vIn\reg\()_5.16b},[\addr],4*16 | |
169 | .exitm | |
170 | .endif | |
171 | .if \blocks == 7 | |
172 | st1 {vIn\reg\()_0.16b-vIn\reg\()_2.16b},[\addr],3*16 | |
173 | st1 {vIn\reg\()_3.16b-vIn\reg\()_6.16b},[\addr],4*16 | |
174 | .exitm | |
175 | .endif | |
176 | ||
177 | .if \blocks == 8 | |
178 | st1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 | |
179 | st1 {vIn\reg\()_4.16b-vIn\reg\()_7.16b},[\addr],4*16 | |
180 | .exitm | |
181 | .endif | |
182 | .if \blocks == 9 | |
183 | st1 {vIn\reg\()_0.16b-vIn\reg\()_3.16b},[\addr],4*16 | |
184 | st1 {vIn\reg\()_4.16b-vIn\reg\()_7.16b},[\addr],4*16 | |
185 | str qIn\reg\()_8,[\addr],16 | |
186 | .exitm | |
187 | .endif | |
188 | .endm | |
189 | ||
190 | .macro eorkey14 block,reg | |
191 | eor vBlock\block\().16b,vKey14.16b,vState\reg\()_\block\().16b | |
192 | .endm | |
193 | ||
194 | .macro eorblock block,reg | |
195 | eor vIn\reg\()_\block\().16b,vBlock\block\().16b,vIn\reg\()_\block\().16b | |
196 | .endm | |
197 | ||
198 | .macro movstate0 block,reg | |
199 | mov vState\reg\()_0.16b,vIn\reg\()_\block\().16b | |
200 | .endm | |
201 | ||
202 | .macro cbc_decrypt_rounds blocks,reg,first_key,cur_blocks | |
203 | .ifb \cur_blocks | |
204 | _blocks=\blocks | |
205 | .else | |
206 | _blocks=\cur_blocks | |
207 | .endif | |
208 | key=\first_key + 1 | |
209 | .if 3*\blocks+1 >= 32-15+\first_key | |
210 | ldr_key %key,\first_key | |
211 | .endif | |
212 | n=0 | |
213 | .rept _blocks - 1 | |
214 | eorkey14 %((n+1)%_blocks),\reg | |
215 | aes_decrypt_round %n,\reg,\first_key | |
216 | n=n+1 | |
217 | .endr | |
218 | eorkey14 0,\reg | |
219 | movstate0 %(_blocks-1),\reg | |
220 | aes_decrypt_round %n,\reg,\first_key | |
221 | ||
222 | k=0 | |
223 | .rept 15-\first_key-3 | |
224 | n=0 | |
225 | .if 3*\blocks+1 >= 32-15+\first_key | |
226 | ldr_key %(key+k+1),\first_key | |
227 | .endif | |
228 | ||
229 | .rept _blocks | |
230 | aes_decrypt_round %n,\reg,%(key+k) | |
231 | n=n+1 | |
232 | .endr | |
233 | k=k+1 | |
234 | .endr | |
235 | n=0 | |
236 | .if 3*\blocks+1 >= 32-15+\first_key | |
237 | ldr_key \first_key,\first_key | |
238 | .endif | |
239 | .rept _blocks | |
240 | aes_decrypt_round %n,\reg,13 | |
241 | eorblock %n,\reg | |
242 | n=n+1 | |
243 | .endr | |
244 | .endm | |
245 | ||
246 | .macro print_macro a,b,c,d,e | |
247 | .print "print_macro,\a \b \c \d \e" | |
248 | .endm | |
249 | ||
250 | .macro remainder_process blocks,first_key,curblk | |
251 | .if \blocks > (1<<\curblk) | |
252 | tbz xlen_remainder,\curblk,1f | |
253 | mldrin 0,%(1<<\curblk),in | |
254 | cbc_decrypt_rounds \blocks,0,\first_key,%(1<<\curblk) | |
255 | mstrout 0,%(1<<\curblk),out | |
256 | 1: | |
257 | .endif | |
258 | .endm | |
259 | ||
260 | .macro aes_cbc_decrypt_blocks first_key,blocks | |
261 | division \blocks, len_bytes,len_remainder,tmp0,tmp1 | |
262 | mov xlen_quotient_in,xlen_quotient | |
263 | /* | |
264 | input regs(2*\block) + tmp regs(\blocks) + State reg(1) | |
265 | + key regs(15-\first_key) < 32 | |
266 | */ | |
267 | .if 3*\blocks+1 < 32-15+\first_key | |
268 | n=\first_key | |
269 | .rept 15-\first_key | |
270 | declare_prefix %n,%(n+17),Key | |
271 | n=n+1 | |
272 | .endr | |
273 | load_keys \first_key | |
274 | .else | |
275 | n=\first_key | |
276 | .rept 14-\first_key | |
277 | declare_prefix %n,%((n%2)+29),Key | |
278 | n=n+1 | |
279 | .endr | |
280 | declare_prefix 14,31,Key | |
281 | /* load first key */ | |
282 | ldr_key \first_key,\first_key | |
283 | /* load last key */ | |
284 | ldr_key 14,\first_key | |
285 | .endif | |
286 | m=\blocks | |
287 | l=\blocks-1 | |
288 | declare_prefix 0,0,State0_ | |
289 | declare_prefix 0,0,State1_ | |
290 | n=0 | |
291 | .rept \blocks | |
292 | declare_prefix %n,%(n+1),In0_ | |
293 | declare_prefix %n,%(n+m+1),In1_ | |
294 | declare_prefix %n,%(n+2*m+1),Block | |
295 | n=n+1 | |
296 | .endr | |
297 | n=1 | |
298 | .rept \blocks -1 | |
299 | declare_prefix %n,%(n),State0_ | |
300 | declare_prefix %n,%(n+m),State1_ | |
301 | n=n+1 | |
302 | .endr | |
303 | ldr qState0_0,[IV] | |
304 | cbz xlen_quotient,9f | |
305 | mldrin 0,\blocks,in | |
306 | sub xlen_quotient_in,xlen_quotient_in,1 | |
307 | b 5f | |
308 | ||
309 | 3: | |
310 | sub xlen_quotient,xlen_quotient,1 | |
311 | mstrout 1,\blocks,out | |
312 | cbz xlen_quotient,9f | |
313 | 5: | |
314 | cbz xlen_quotient_in,1f | |
315 | mldrin 1,\blocks,in | |
316 | sub xlen_quotient_in,xlen_quotient_in,1 | |
317 | 1: | |
318 | cbc_decrypt_rounds \blocks,0,\first_key | |
319 | sub xlen_quotient,xlen_quotient,1 | |
320 | mstrout 0,\blocks,out | |
321 | cbz xlen_quotient,9f | |
322 | ||
323 | cbz xlen_quotient_in,1f | |
324 | mldrin 0,\blocks,in | |
325 | sub xlen_quotient_in,xlen_quotient_in,1 | |
326 | 1: | |
327 | cbc_decrypt_rounds \blocks,1,\first_key | |
328 | b 3b | |
329 | 9: | |
330 | remainder_process \blocks,\first_key,3 | |
331 | remainder_process \blocks,\first_key,2 | |
332 | remainder_process \blocks,\first_key,1 | |
333 | remainder_process \blocks,\first_key,0 | |
334 | .endm | |
335 | ||
336 | ||
337 | .macro division blocks,quotient,remainder,tmp0,tmp1 | |
338 | .if \blocks == 1 | |
339 | mov x\remainder, 0 | |
340 | .exitm | |
341 | .endif | |
342 | .if \blocks == 2 | |
343 | and x\remainder, x\quotient, 1 | |
344 | lsr x\quotient, x\quotient, 1 | |
345 | .exitm | |
346 | .endif | |
347 | .if \blocks == 3 | |
348 | mov x\tmp0, -6148914691236517206 | |
349 | mov x\remainder, x\quotient | |
350 | movk x\tmp0, 0xaaab, lsl 0 | |
351 | umulh x\tmp0, x\quotient, x\tmp0 | |
352 | and x\tmp1, x\tmp0, -2 | |
353 | lsr x\quotient, x\tmp0, 1 | |
354 | add x\tmp1, x\tmp1, x\quotient | |
355 | sub x\remainder, x\remainder, x\tmp1 | |
356 | .exitm | |
357 | .endif | |
358 | .if \blocks == 4 | |
359 | and x\remainder, x\quotient, 3 | |
360 | lsr x\quotient, x\quotient, 2 | |
361 | .exitm | |
362 | .endif | |
363 | .if \blocks == 5 | |
364 | mov x\tmp0, -3689348814741910324 | |
365 | mov x\remainder, x\quotient | |
366 | movk x\tmp0, 0xcccd, lsl 0 | |
367 | umulh x\tmp0, x\quotient, x\tmp0 | |
368 | and x\tmp1, x\tmp0, -4 | |
369 | lsr x\quotient, x\tmp0, 2 | |
370 | add x\tmp1, x\tmp1, x\quotient | |
371 | sub x\remainder, x\remainder, x\tmp1 | |
372 | .exitm | |
373 | .endif | |
374 | .if \blocks == 6 | |
375 | mov x\tmp0, -6148914691236517206 | |
376 | mov x\tmp1, x\quotient | |
377 | movk x\tmp0, 0xaaab, lsl 0 | |
378 | umulh x\tmp0, x\quotient, x\tmp0 | |
379 | lsr x\quotient, x\tmp0, 2 | |
380 | add x\remainder, x\quotient, x\quotient, lsl 1 | |
381 | sub x\remainder, x\tmp1, x\remainder, lsl 1 | |
382 | .exitm | |
383 | .endif | |
384 | .if \blocks == 7 | |
385 | mov x\tmp0, 9363 | |
386 | mov x\tmp1, x\quotient | |
387 | movk x\tmp0, 0x9249, lsl 16 | |
388 | movk x\tmp0, 0x4924, lsl 32 | |
389 | movk x\tmp0, 0x2492, lsl 48 | |
390 | umulh x\quotient, x\quotient, x\tmp0 | |
391 | sub x\tmp0, x\tmp1, x\quotient | |
392 | add x\tmp0, x\quotient, x\tmp0, lsr 1 | |
393 | lsr x\quotient, x\tmp0, 2 | |
394 | lsl x\remainder, x\quotient, 3 | |
395 | sub x\remainder, x\remainder, x\quotient | |
396 | sub x\remainder, x\tmp1, x\remainder | |
397 | .exitm | |
398 | .endif | |
399 | .if \blocks == 8 | |
400 | and x\remainder, x\quotient, 7 | |
401 | lsr x\quotient, x\quotient, 3 | |
402 | .exitm | |
403 | .endif | |
404 | .if \blocks == 9 | |
405 | mov x\tmp0, 58255 | |
406 | mov x\remainder, x\quotient | |
407 | movk x\tmp0, 0x8e38, lsl 16 | |
408 | movk x\tmp0, 0x38e3, lsl 32 | |
409 | movk x\tmp0, 0xe38e, lsl 48 | |
410 | umulh x\tmp0, x\quotient, x\tmp0 | |
411 | and x\tmp1, x\tmp0, -8 | |
412 | lsr x\quotient, x\tmp0, 3 | |
413 | add x\tmp1, x\tmp1, x\quotient | |
414 | sub x\remainder, x\remainder, x\tmp1 | |
415 | .exitm | |
416 | .endif | |
417 | .endm | |
418 | ||
419 | .macro ldr_key num,first_key | |
420 | ldr qKey\num,[keys,16*(\num - \first_key)] | |
421 | .endm | |
422 | #ifndef CBC_DECRYPT_BLOCKS_NUM | |
423 | #define CBC_DECRYPT_BLOCKS_NUM 8 | |
424 | #endif | |
425 | ||
426 | .macro cbc_decrypt first_key:req,blocks | |
427 | lsr xlen_bytes,xlen_bytes,4 | |
428 | cbz xlen_bytes,10f | |
429 | push_stack | |
430 | aes_cbc_decrypt_blocks \first_key,\blocks | |
431 | pop_stack | |
432 | 10: | |
433 | .endm | |
434 | ||
435 | .set stack_size,64 | |
436 | .macro push_stack | |
437 | stp d8, d9,[sp,-stack_size]! | |
438 | stp d10,d11,[sp,16] | |
439 | stp d12,d13,[sp,32] | |
440 | stp d14,d15,[sp,48] | |
441 | .endm | |
442 | ||
443 | .macro pop_stack | |
444 | ldp d10,d11,[sp,16] | |
445 | ldp d12,d13,[sp,32] | |
446 | ldp d14,d15,[sp,48] | |
447 | ldp d8, d9, [sp], stack_size | |
448 | .endm | |
449 | ||
450 | /* | |
451 | void aes_cbc_dec_128( | |
452 | void *in, //!< Input cipher text | |
453 | uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary | |
454 | uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or dec_keys of cbc_key_data | |
455 | void *out, //!< Output plain text | |
456 | uint64_t len_bytes //!< Must be a multiple of 16 bytes | |
457 | ); | |
458 | */ | |
459 | declare_var_generic_reg in ,0 | |
460 | declare_var_generic_reg IV ,1 | |
461 | declare_var_generic_reg keys ,2 | |
462 | declare_var_generic_reg out ,3 | |
463 | declare_var_generic_reg len_bytes ,4 | |
464 | declare_var_generic_reg len_quotient,4 | |
465 | declare_var_generic_reg len_remainder,5 | |
466 | declare_var_generic_reg tmp0 ,6 | |
467 | declare_var_generic_reg tmp1 ,7 | |
468 | declare_var_generic_reg len_quotient_in,6 | |
469 | ||
470 | .macro define_aes_cbc_dec_func mode:req,blocks:req | |
471 | .global aes_cbc_dec_\mode\()_aes_\blocks | |
472 | aes_cbc_dec_\mode\()_aes_\blocks: | |
473 | cbc_decrypt %((256-mode)/32),\blocks | |
474 | ret | |
475 | .size aes_cbc_dec_\mode\()_aes_\blocks, . - aes_cbc_dec_\mode\()_aes_\blocks | |
476 | .endm | |
477 | ||
478 | .irp blocks,1,2,3,4,5,6,7,8,9 | |
479 | define_aes_cbc_dec_func 128,\blocks | |
480 | define_aes_cbc_dec_func 192,\blocks | |
481 | define_aes_cbc_dec_func 256,\blocks | |
482 | .endr |