]>
git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blob - crypto/aegis128-neon-inner.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
7 #include <asm/neon-intrinsics.h>
9 #define AES_ROUND "aese %0.16b, %1.16b \n\t aesmc %0.16b, %0.16b"
13 #define AES_ROUND "aese.8 %q0, %q1 \n\t aesmc.8 %q0, %q0"
16 #define AEGIS_BLOCK_SIZE 16
20 void *memcpy(void *dest
, const void *src
, size_t n
);
21 void *memset(void *s
, int c
, size_t n
);
23 struct aegis128_state
{
27 static struct aegis128_state
aegis128_load_state_neon(const void *state
)
29 return (struct aegis128_state
){ {
38 static void aegis128_save_state_neon(struct aegis128_state st
, void *state
)
40 vst1q_u8(state
, st
.v
[0]);
41 vst1q_u8(state
+ 16, st
.v
[1]);
42 vst1q_u8(state
+ 32, st
.v
[2]);
43 vst1q_u8(state
+ 48, st
.v
[3]);
44 vst1q_u8(state
+ 64, st
.v
[4]);
47 static inline __attribute__((always_inline
))
48 uint8x16_t
aegis_aes_round(uint8x16_t w
)
53 * We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics
54 * to force the compiler to issue the aese/aesmc instructions in pairs.
55 * This is much faster on many cores, where the instruction pair can
56 * execute in a single cycle.
58 asm(AES_ROUND
: "+w"(w
) : "w"(z
));
62 static inline __attribute__((always_inline
))
63 struct aegis128_state
aegis128_update_neon(struct aegis128_state st
,
66 m
^= aegis_aes_round(st
.v
[4]);
67 st
.v
[4] ^= aegis_aes_round(st
.v
[3]);
68 st
.v
[3] ^= aegis_aes_round(st
.v
[2]);
69 st
.v
[2] ^= aegis_aes_round(st
.v
[1]);
70 st
.v
[1] ^= aegis_aes_round(st
.v
[0]);
76 void crypto_aegis128_update_neon(void *state
, const void *msg
)
78 struct aegis128_state st
= aegis128_load_state_neon(state
);
80 st
= aegis128_update_neon(st
, vld1q_u8(msg
));
82 aegis128_save_state_neon(st
, state
);
85 void crypto_aegis128_encrypt_chunk_neon(void *state
, void *dst
, const void *src
,
88 struct aegis128_state st
= aegis128_load_state_neon(state
);
91 while (size
>= AEGIS_BLOCK_SIZE
) {
92 uint8x16_t s
= st
.v
[1] ^ (st
.v
[2] & st
.v
[3]) ^ st
.v
[4];
95 st
= aegis128_update_neon(st
, msg
);
96 vst1q_u8(dst
, msg
^ s
);
98 size
-= AEGIS_BLOCK_SIZE
;
99 src
+= AEGIS_BLOCK_SIZE
;
100 dst
+= AEGIS_BLOCK_SIZE
;
104 uint8x16_t s
= st
.v
[1] ^ (st
.v
[2] & st
.v
[3]) ^ st
.v
[4];
105 uint8_t buf
[AEGIS_BLOCK_SIZE
] = {};
107 memcpy(buf
, src
, size
);
109 st
= aegis128_update_neon(st
, msg
);
110 vst1q_u8(buf
, msg
^ s
);
111 memcpy(dst
, buf
, size
);
114 aegis128_save_state_neon(st
, state
);
117 void crypto_aegis128_decrypt_chunk_neon(void *state
, void *dst
, const void *src
,
120 struct aegis128_state st
= aegis128_load_state_neon(state
);
123 while (size
>= AEGIS_BLOCK_SIZE
) {
124 msg
= vld1q_u8(src
) ^ st
.v
[1] ^ (st
.v
[2] & st
.v
[3]) ^ st
.v
[4];
125 st
= aegis128_update_neon(st
, msg
);
128 size
-= AEGIS_BLOCK_SIZE
;
129 src
+= AEGIS_BLOCK_SIZE
;
130 dst
+= AEGIS_BLOCK_SIZE
;
134 uint8x16_t s
= st
.v
[1] ^ (st
.v
[2] & st
.v
[3]) ^ st
.v
[4];
135 uint8_t buf
[AEGIS_BLOCK_SIZE
];
138 memcpy(buf
, src
, size
);
139 msg
= vld1q_u8(buf
) ^ s
;
141 memcpy(dst
, buf
, size
);
143 st
= aegis128_update_neon(st
, msg
);
146 aegis128_save_state_neon(st
, state
);