]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - arch/arm64/crypto/sha1-ce-core.S
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 500
[mirror_ubuntu-hirsute-kernel.git] / arch / arm64 / crypto / sha1-ce-core.S
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
4 *
5 * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
6 */
7
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10
11 .text
12 .arch armv8-a+crypto
13
14 k0 .req v0
15 k1 .req v1
16 k2 .req v2
17 k3 .req v3
18
19 t0 .req v4
20 t1 .req v5
21
22 dga .req q6
23 dgav .req v6
24 dgb .req s7
25 dgbv .req v7
26
27 dg0q .req q12
28 dg0s .req s12
29 dg0v .req v12
30 dg1s .req s13
31 dg1v .req v13
32 dg2s .req s14
33
34 .macro add_only, op, ev, rc, s0, dg1
35 .ifc \ev, ev
36 add t1.4s, v\s0\().4s, \rc\().4s
37 sha1h dg2s, dg0s
38 .ifnb \dg1
39 sha1\op dg0q, \dg1, t0.4s
40 .else
41 sha1\op dg0q, dg1s, t0.4s
42 .endif
43 .else
44 .ifnb \s0
45 add t0.4s, v\s0\().4s, \rc\().4s
46 .endif
47 sha1h dg1s, dg0s
48 sha1\op dg0q, dg2s, t1.4s
49 .endif
50 .endm
51
52 .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
53 sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
54 add_only \op, \ev, \rc, \s1, \dg1
55 sha1su1 v\s0\().4s, v\s3\().4s
56 .endm
57
58 .macro loadrc, k, val, tmp
59 movz \tmp, :abs_g0_nc:\val
60 movk \tmp, :abs_g1:\val
61 dup \k, \tmp
62 .endm
63
64 /*
65 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
66 * int blocks)
67 */
68 ENTRY(sha1_ce_transform)
69 frame_push 3
70
71 mov x19, x0
72 mov x20, x1
73 mov x21, x2
74
75 /* load round constants */
76 0: loadrc k0.4s, 0x5a827999, w6
77 loadrc k1.4s, 0x6ed9eba1, w6
78 loadrc k2.4s, 0x8f1bbcdc, w6
79 loadrc k3.4s, 0xca62c1d6, w6
80
81 /* load state */
82 ld1 {dgav.4s}, [x19]
83 ldr dgb, [x19, #16]
84
85 /* load sha1_ce_state::finalize */
86 ldr_l w4, sha1_ce_offsetof_finalize, x4
87 ldr w4, [x19, x4]
88
89 /* load input */
90 1: ld1 {v8.4s-v11.4s}, [x20], #64
91 sub w21, w21, #1
92
93 CPU_LE( rev32 v8.16b, v8.16b )
94 CPU_LE( rev32 v9.16b, v9.16b )
95 CPU_LE( rev32 v10.16b, v10.16b )
96 CPU_LE( rev32 v11.16b, v11.16b )
97
98 2: add t0.4s, v8.4s, k0.4s
99 mov dg0v.16b, dgav.16b
100
101 add_update c, ev, k0, 8, 9, 10, 11, dgb
102 add_update c, od, k0, 9, 10, 11, 8
103 add_update c, ev, k0, 10, 11, 8, 9
104 add_update c, od, k0, 11, 8, 9, 10
105 add_update c, ev, k1, 8, 9, 10, 11
106
107 add_update p, od, k1, 9, 10, 11, 8
108 add_update p, ev, k1, 10, 11, 8, 9
109 add_update p, od, k1, 11, 8, 9, 10
110 add_update p, ev, k1, 8, 9, 10, 11
111 add_update p, od, k2, 9, 10, 11, 8
112
113 add_update m, ev, k2, 10, 11, 8, 9
114 add_update m, od, k2, 11, 8, 9, 10
115 add_update m, ev, k2, 8, 9, 10, 11
116 add_update m, od, k2, 9, 10, 11, 8
117 add_update m, ev, k3, 10, 11, 8, 9
118
119 add_update p, od, k3, 11, 8, 9, 10
120 add_only p, ev, k3, 9
121 add_only p, od, k3, 10
122 add_only p, ev, k3, 11
123 add_only p, od
124
125 /* update state */
126 add dgbv.2s, dgbv.2s, dg1v.2s
127 add dgav.4s, dgav.4s, dg0v.4s
128
129 cbz w21, 3f
130
131 if_will_cond_yield_neon
132 st1 {dgav.4s}, [x19]
133 str dgb, [x19, #16]
134 do_cond_yield_neon
135 b 0b
136 endif_yield_neon
137
138 b 1b
139
140 /*
141 * Final block: add padding and total bit count.
142 * Skip if the input size was not a round multiple of the block size,
143 * the padding is handled by the C code in that case.
144 */
145 3: cbz x4, 4f
146 ldr_l w4, sha1_ce_offsetof_count, x4
147 ldr x4, [x19, x4]
148 movi v9.2d, #0
149 mov x8, #0x80000000
150 movi v10.2d, #0
151 ror x7, x4, #29 // ror(lsl(x4, 3), 32)
152 fmov d8, x8
153 mov x4, #0
154 mov v11.d[0], xzr
155 mov v11.d[1], x7
156 b 2b
157
158 /* store new state */
159 4: st1 {dgav.4s}, [x19]
160 str dgb, [x19, #16]
161 frame_pop
162 ret
163 ENDPROC(sha1_ce_transform)