]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - arch/x86/crypto/morus640-sse2-asm.S
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 500
[mirror_ubuntu-hirsute-kernel.git] / arch / x86 / crypto / morus640-sse2-asm.S
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * SSE2 implementation of MORUS-640
4 *
5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
7 */
8
9 #include <linux/linkage.h>
10 #include <asm/frame.h>
11
12 #define SHUFFLE_MASK(i0, i1, i2, i3) \
13 (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
14
15 #define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
16 #define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
17 #define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
18
19 #define STATE0 %xmm0
20 #define STATE1 %xmm1
21 #define STATE2 %xmm2
22 #define STATE3 %xmm3
23 #define STATE4 %xmm4
24 #define KEY %xmm5
25 #define MSG %xmm5
26 #define T0 %xmm6
27 #define T1 %xmm7
28
29 .section .rodata.cst16.morus640_const, "aM", @progbits, 32
30 .align 16
31 .Lmorus640_const_0:
32 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
33 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
34 .Lmorus640_const_1:
35 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
36 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
37
38 .section .rodata.cst16.morus640_counter, "aM", @progbits, 16
39 .align 16
40 .Lmorus640_counter:
41 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
42 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
43
44 .text
45
46 .macro morus640_round s0, s1, s2, s3, s4, b, w
47 movdqa \s1, T0
48 pand \s2, T0
49 pxor T0, \s0
50 pxor \s3, \s0
51 movdqa \s0, T0
52 pslld $\b, T0
53 psrld $(32 - \b), \s0
54 pxor T0, \s0
55 pshufd $\w, \s3, \s3
56 .endm
57
58 /*
59 * __morus640_update: internal ABI
60 * input:
61 * STATE[0-4] - input state
62 * MSG - message block
63 * output:
64 * STATE[0-4] - output state
65 * changed:
66 * T0
67 */
68 __morus640_update:
69 morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
70 pxor MSG, STATE1
71 morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
72 pxor MSG, STATE2
73 morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
74 pxor MSG, STATE3
75 morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
76 pxor MSG, STATE4
77 morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
78 ret
79 ENDPROC(__morus640_update)
80
81
82 /*
83 * __morus640_update_zero: internal ABI
84 * input:
85 * STATE[0-4] - input state
86 * output:
87 * STATE[0-4] - output state
88 * changed:
89 * T0
90 */
91 __morus640_update_zero:
92 morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1
93 morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
94 morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3
95 morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
96 morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
97 ret
98 ENDPROC(__morus640_update_zero)
99
100 /*
101 * __load_partial: internal ABI
102 * input:
103 * %rsi - src
104 * %rcx - bytes
105 * output:
106 * MSG - message block
107 * changed:
108 * T0
109 * %r8
110 * %r9
111 */
112 __load_partial:
113 xor %r9d, %r9d
114 pxor MSG, MSG
115
116 mov %rcx, %r8
117 and $0x1, %r8
118 jz .Lld_partial_1
119
120 mov %rcx, %r8
121 and $0x1E, %r8
122 add %rsi, %r8
123 mov (%r8), %r9b
124
125 .Lld_partial_1:
126 mov %rcx, %r8
127 and $0x2, %r8
128 jz .Lld_partial_2
129
130 mov %rcx, %r8
131 and $0x1C, %r8
132 add %rsi, %r8
133 shl $16, %r9
134 mov (%r8), %r9w
135
136 .Lld_partial_2:
137 mov %rcx, %r8
138 and $0x4, %r8
139 jz .Lld_partial_4
140
141 mov %rcx, %r8
142 and $0x18, %r8
143 add %rsi, %r8
144 shl $32, %r9
145 mov (%r8), %r8d
146 xor %r8, %r9
147
148 .Lld_partial_4:
149 movq %r9, MSG
150
151 mov %rcx, %r8
152 and $0x8, %r8
153 jz .Lld_partial_8
154
155 mov %rcx, %r8
156 and $0x10, %r8
157 add %rsi, %r8
158 pslldq $8, MSG
159 movq (%r8), T0
160 pxor T0, MSG
161
162 .Lld_partial_8:
163 ret
164 ENDPROC(__load_partial)
165
166 /*
167 * __store_partial: internal ABI
168 * input:
169 * %rdx - dst
170 * %rcx - bytes
171 * output:
172 * T0 - message block
173 * changed:
174 * %r8
175 * %r9
176 * %r10
177 */
178 __store_partial:
179 mov %rcx, %r8
180 mov %rdx, %r9
181
182 movq T0, %r10
183
184 cmp $8, %r8
185 jl .Lst_partial_8
186
187 mov %r10, (%r9)
188 psrldq $8, T0
189 movq T0, %r10
190
191 sub $8, %r8
192 add $8, %r9
193
194 .Lst_partial_8:
195 cmp $4, %r8
196 jl .Lst_partial_4
197
198 mov %r10d, (%r9)
199 shr $32, %r10
200
201 sub $4, %r8
202 add $4, %r9
203
204 .Lst_partial_4:
205 cmp $2, %r8
206 jl .Lst_partial_2
207
208 mov %r10w, (%r9)
209 shr $16, %r10
210
211 sub $2, %r8
212 add $2, %r9
213
214 .Lst_partial_2:
215 cmp $1, %r8
216 jl .Lst_partial_1
217
218 mov %r10b, (%r9)
219
220 .Lst_partial_1:
221 ret
222 ENDPROC(__store_partial)
223
224 /*
225 * void crypto_morus640_sse2_init(void *state, const void *key, const void *iv);
226 */
227 ENTRY(crypto_morus640_sse2_init)
228 FRAME_BEGIN
229
230 /* load IV: */
231 movdqu (%rdx), STATE0
232 /* load key: */
233 movdqu (%rsi), KEY
234 movdqa KEY, STATE1
235 /* load all ones: */
236 pcmpeqd STATE2, STATE2
237 /* load the constants: */
238 movdqa .Lmorus640_const_0, STATE3
239 movdqa .Lmorus640_const_1, STATE4
240
241 /* update 16 times with zero: */
242 call __morus640_update_zero
243 call __morus640_update_zero
244 call __morus640_update_zero
245 call __morus640_update_zero
246 call __morus640_update_zero
247 call __morus640_update_zero
248 call __morus640_update_zero
249 call __morus640_update_zero
250 call __morus640_update_zero
251 call __morus640_update_zero
252 call __morus640_update_zero
253 call __morus640_update_zero
254 call __morus640_update_zero
255 call __morus640_update_zero
256 call __morus640_update_zero
257 call __morus640_update_zero
258
259 /* xor-in the key again after updates: */
260 pxor KEY, STATE1
261
262 /* store the state: */
263 movdqu STATE0, (0 * 16)(%rdi)
264 movdqu STATE1, (1 * 16)(%rdi)
265 movdqu STATE2, (2 * 16)(%rdi)
266 movdqu STATE3, (3 * 16)(%rdi)
267 movdqu STATE4, (4 * 16)(%rdi)
268
269 FRAME_END
270 ret
271 ENDPROC(crypto_morus640_sse2_init)
272
273 /*
274 * void crypto_morus640_sse2_ad(void *state, const void *data,
275 * unsigned int length);
276 */
277 ENTRY(crypto_morus640_sse2_ad)
278 FRAME_BEGIN
279
280 cmp $16, %rdx
281 jb .Lad_out
282
283 /* load the state: */
284 movdqu (0 * 16)(%rdi), STATE0
285 movdqu (1 * 16)(%rdi), STATE1
286 movdqu (2 * 16)(%rdi), STATE2
287 movdqu (3 * 16)(%rdi), STATE3
288 movdqu (4 * 16)(%rdi), STATE4
289
290 mov %rsi, %r8
291 and $0xF, %r8
292 jnz .Lad_u_loop
293
294 .align 4
295 .Lad_a_loop:
296 movdqa (%rsi), MSG
297 call __morus640_update
298 sub $16, %rdx
299 add $16, %rsi
300 cmp $16, %rdx
301 jge .Lad_a_loop
302
303 jmp .Lad_cont
304 .align 4
305 .Lad_u_loop:
306 movdqu (%rsi), MSG
307 call __morus640_update
308 sub $16, %rdx
309 add $16, %rsi
310 cmp $16, %rdx
311 jge .Lad_u_loop
312
313 .Lad_cont:
314 /* store the state: */
315 movdqu STATE0, (0 * 16)(%rdi)
316 movdqu STATE1, (1 * 16)(%rdi)
317 movdqu STATE2, (2 * 16)(%rdi)
318 movdqu STATE3, (3 * 16)(%rdi)
319 movdqu STATE4, (4 * 16)(%rdi)
320
321 .Lad_out:
322 FRAME_END
323 ret
324 ENDPROC(crypto_morus640_sse2_ad)
325
326 /*
327 * void crypto_morus640_sse2_enc(void *state, const void *src, void *dst,
328 * unsigned int length);
329 */
330 ENTRY(crypto_morus640_sse2_enc)
331 FRAME_BEGIN
332
333 cmp $16, %rcx
334 jb .Lenc_out
335
336 /* load the state: */
337 movdqu (0 * 16)(%rdi), STATE0
338 movdqu (1 * 16)(%rdi), STATE1
339 movdqu (2 * 16)(%rdi), STATE2
340 movdqu (3 * 16)(%rdi), STATE3
341 movdqu (4 * 16)(%rdi), STATE4
342
343 mov %rsi, %r8
344 or %rdx, %r8
345 and $0xF, %r8
346 jnz .Lenc_u_loop
347
348 .align 4
349 .Lenc_a_loop:
350 movdqa (%rsi), MSG
351 movdqa MSG, T0
352 pxor STATE0, T0
353 pshufd $MASK3, STATE1, T1
354 pxor T1, T0
355 movdqa STATE2, T1
356 pand STATE3, T1
357 pxor T1, T0
358 movdqa T0, (%rdx)
359
360 call __morus640_update
361 sub $16, %rcx
362 add $16, %rsi
363 add $16, %rdx
364 cmp $16, %rcx
365 jge .Lenc_a_loop
366
367 jmp .Lenc_cont
368 .align 4
369 .Lenc_u_loop:
370 movdqu (%rsi), MSG
371 movdqa MSG, T0
372 pxor STATE0, T0
373 pshufd $MASK3, STATE1, T1
374 pxor T1, T0
375 movdqa STATE2, T1
376 pand STATE3, T1
377 pxor T1, T0
378 movdqu T0, (%rdx)
379
380 call __morus640_update
381 sub $16, %rcx
382 add $16, %rsi
383 add $16, %rdx
384 cmp $16, %rcx
385 jge .Lenc_u_loop
386
387 .Lenc_cont:
388 /* store the state: */
389 movdqu STATE0, (0 * 16)(%rdi)
390 movdqu STATE1, (1 * 16)(%rdi)
391 movdqu STATE2, (2 * 16)(%rdi)
392 movdqu STATE3, (3 * 16)(%rdi)
393 movdqu STATE4, (4 * 16)(%rdi)
394
395 .Lenc_out:
396 FRAME_END
397 ret
398 ENDPROC(crypto_morus640_sse2_enc)
399
400 /*
401 * void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst,
402 * unsigned int length);
403 */
404 ENTRY(crypto_morus640_sse2_enc_tail)
405 FRAME_BEGIN
406
407 /* load the state: */
408 movdqu (0 * 16)(%rdi), STATE0
409 movdqu (1 * 16)(%rdi), STATE1
410 movdqu (2 * 16)(%rdi), STATE2
411 movdqu (3 * 16)(%rdi), STATE3
412 movdqu (4 * 16)(%rdi), STATE4
413
414 /* encrypt message: */
415 call __load_partial
416
417 movdqa MSG, T0
418 pxor STATE0, T0
419 pshufd $MASK3, STATE1, T1
420 pxor T1, T0
421 movdqa STATE2, T1
422 pand STATE3, T1
423 pxor T1, T0
424
425 call __store_partial
426
427 call __morus640_update
428
429 /* store the state: */
430 movdqu STATE0, (0 * 16)(%rdi)
431 movdqu STATE1, (1 * 16)(%rdi)
432 movdqu STATE2, (2 * 16)(%rdi)
433 movdqu STATE3, (3 * 16)(%rdi)
434 movdqu STATE4, (4 * 16)(%rdi)
435
436 FRAME_END
437 ret
438 ENDPROC(crypto_morus640_sse2_enc_tail)
439
440 /*
441 * void crypto_morus640_sse2_dec(void *state, const void *src, void *dst,
442 * unsigned int length);
443 */
444 ENTRY(crypto_morus640_sse2_dec)
445 FRAME_BEGIN
446
447 cmp $16, %rcx
448 jb .Ldec_out
449
450 /* load the state: */
451 movdqu (0 * 16)(%rdi), STATE0
452 movdqu (1 * 16)(%rdi), STATE1
453 movdqu (2 * 16)(%rdi), STATE2
454 movdqu (3 * 16)(%rdi), STATE3
455 movdqu (4 * 16)(%rdi), STATE4
456
457 mov %rsi, %r8
458 or %rdx, %r8
459 and $0xF, %r8
460 jnz .Ldec_u_loop
461
462 .align 4
463 .Ldec_a_loop:
464 movdqa (%rsi), MSG
465 pxor STATE0, MSG
466 pshufd $MASK3, STATE1, T0
467 pxor T0, MSG
468 movdqa STATE2, T0
469 pand STATE3, T0
470 pxor T0, MSG
471 movdqa MSG, (%rdx)
472
473 call __morus640_update
474 sub $16, %rcx
475 add $16, %rsi
476 add $16, %rdx
477 cmp $16, %rcx
478 jge .Ldec_a_loop
479
480 jmp .Ldec_cont
481 .align 4
482 .Ldec_u_loop:
483 movdqu (%rsi), MSG
484 pxor STATE0, MSG
485 pshufd $MASK3, STATE1, T0
486 pxor T0, MSG
487 movdqa STATE2, T0
488 pand STATE3, T0
489 pxor T0, MSG
490 movdqu MSG, (%rdx)
491
492 call __morus640_update
493 sub $16, %rcx
494 add $16, %rsi
495 add $16, %rdx
496 cmp $16, %rcx
497 jge .Ldec_u_loop
498
499 .Ldec_cont:
500 /* store the state: */
501 movdqu STATE0, (0 * 16)(%rdi)
502 movdqu STATE1, (1 * 16)(%rdi)
503 movdqu STATE2, (2 * 16)(%rdi)
504 movdqu STATE3, (3 * 16)(%rdi)
505 movdqu STATE4, (4 * 16)(%rdi)
506
507 .Ldec_out:
508 FRAME_END
509 ret
510 ENDPROC(crypto_morus640_sse2_dec)
511
512 /*
513 * void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst,
514 * unsigned int length);
515 */
516 ENTRY(crypto_morus640_sse2_dec_tail)
517 FRAME_BEGIN
518
519 /* load the state: */
520 movdqu (0 * 16)(%rdi), STATE0
521 movdqu (1 * 16)(%rdi), STATE1
522 movdqu (2 * 16)(%rdi), STATE2
523 movdqu (3 * 16)(%rdi), STATE3
524 movdqu (4 * 16)(%rdi), STATE4
525
526 /* decrypt message: */
527 call __load_partial
528
529 pxor STATE0, MSG
530 pshufd $MASK3, STATE1, T0
531 pxor T0, MSG
532 movdqa STATE2, T0
533 pand STATE3, T0
534 pxor T0, MSG
535 movdqa MSG, T0
536
537 call __store_partial
538
539 /* mask with byte count: */
540 movq %rcx, T0
541 punpcklbw T0, T0
542 punpcklbw T0, T0
543 punpcklbw T0, T0
544 punpcklbw T0, T0
545 movdqa .Lmorus640_counter, T1
546 pcmpgtb T1, T0
547 pand T0, MSG
548
549 call __morus640_update
550
551 /* store the state: */
552 movdqu STATE0, (0 * 16)(%rdi)
553 movdqu STATE1, (1 * 16)(%rdi)
554 movdqu STATE2, (2 * 16)(%rdi)
555 movdqu STATE3, (3 * 16)(%rdi)
556 movdqu STATE4, (4 * 16)(%rdi)
557
558 FRAME_END
559 ret
560 ENDPROC(crypto_morus640_sse2_dec_tail)
561
562 /*
563 * void crypto_morus640_sse2_final(void *state, void *tag_xor,
564 * u64 assoclen, u64 cryptlen);
565 */
566 ENTRY(crypto_morus640_sse2_final)
567 FRAME_BEGIN
568
569 /* load the state: */
570 movdqu (0 * 16)(%rdi), STATE0
571 movdqu (1 * 16)(%rdi), STATE1
572 movdqu (2 * 16)(%rdi), STATE2
573 movdqu (3 * 16)(%rdi), STATE3
574 movdqu (4 * 16)(%rdi), STATE4
575
576 /* xor state[0] into state[4]: */
577 pxor STATE0, STATE4
578
579 /* prepare length block: */
580 movq %rdx, MSG
581 movq %rcx, T0
582 pslldq $8, T0
583 pxor T0, MSG
584 psllq $3, MSG /* multiply by 8 (to get bit count) */
585
586 /* update state: */
587 call __morus640_update
588 call __morus640_update
589 call __morus640_update
590 call __morus640_update
591 call __morus640_update
592 call __morus640_update
593 call __morus640_update
594 call __morus640_update
595 call __morus640_update
596 call __morus640_update
597
598 /* xor tag: */
599 movdqu (%rsi), MSG
600
601 pxor STATE0, MSG
602 pshufd $MASK3, STATE1, T0
603 pxor T0, MSG
604 movdqa STATE2, T0
605 pand STATE3, T0
606 pxor T0, MSG
607
608 movdqu MSG, (%rsi)
609
610 FRAME_END
611 ret
612 ENDPROC(crypto_morus640_sse2_final)