]> git.proxmox.com Git - ceph.git/blob - ceph/src/crypto/isa-l/isa-l_crypto/sha256_mb/aarch64/sha256_mb_x4_ce.S
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crypto / isa-l / isa-l_crypto / sha256_mb / aarch64 / sha256_mb_x4_ce.S
1 /**********************************************************************
2 Copyright(c) 2019 Arm Corporation All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
12 distribution.
13 * Neither the name of Arm Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
29 .arch armv8-a+crypto
30 .text
31 .align 2
32 .p2align 3,,7
33
34 /*
35 Macros
36 */
37
38 .macro declare_var_vector_reg name:req,reg:req
39 \name\()_q .req q\reg
40 \name\()_v .req v\reg
41 \name\()_s .req s\reg
42 .endm
43 /**
44 maros for round 48-63
45 tmp0 : in
46 tmp1 : out
47 */
48 .macro sha256_4_rounds_high msg:req,tmp0:req
49 ldr key_q , [tmp]
50 mov tmp0_v.16b,l0_\tmp0\()_v.16b
51 mov tmp1_v.16b,l1_\tmp0\()_v.16b
52 add l0_\tmp0\()_v.4s,l0_\msg\()_v.4s,key_v.4s
53 add l1_\tmp0\()_v.4s,l1_\msg\()_v.4s,key_v.4s
54 mov tmp2_v.16b,l0_abcd_v.16b
55 mov tmp3_v.16b,l1_abcd_v.16b
56 sha256h l0_abcd_q,l0_efgh_q,tmp0_v.4s
57 sha256h l1_abcd_q,l1_efgh_q,tmp1_v.4s
58 sha256h2 l0_efgh_q,tmp2_q,tmp0_v.4s
59 sha256h2 l1_efgh_q,tmp3_q,tmp1_v.4s
60
61 ldr key_q , [tmp]
62 mov tmp0_v.16b,l2_\tmp0\()_v.16b
63 mov tmp1_v.16b,l3_\tmp0\()_v.16b
64 add tmp,tmp,16
65 add l2_\tmp0\()_v.4s,l2_\msg\()_v.4s,key_v.4s
66 add l3_\tmp0\()_v.4s,l3_\msg\()_v.4s,key_v.4s
67 mov tmp2_v.16b,l2_abcd_v.16b
68 mov tmp3_v.16b,l3_abcd_v.16b
69 sha256h l2_abcd_q,l2_efgh_q,tmp0_v.4s
70 sha256h l3_abcd_q,l3_efgh_q,tmp1_v.4s
71 sha256h2 l2_efgh_q,tmp2_q,tmp0_v.4s
72 sha256h2 l3_efgh_q,tmp3_q,tmp1_v.4s
73
74
75 .endm
76 /**
77 maros for round 0-47
78 */
79 .macro sha256_4_rounds_low msg0:req,msg1:req,msg2:req,msg3:req,tmp0:req
80 sha256su0 l0_\msg0\()_v.4s,l0_\msg1\()_v.4s
81 sha256su0 l1_\msg0\()_v.4s,l1_\msg1\()_v.4s
82 sha256su0 l2_\msg0\()_v.4s,l2_\msg1\()_v.4s
83 sha256su0 l3_\msg0\()_v.4s,l3_\msg1\()_v.4s
84 sha256_4_rounds_high \msg1,\tmp0
85 sha256su1 l0_\msg0\()_v.4s,l0_\msg2\()_v.4s,l0_\msg3\()_v.4s
86 sha256su1 l1_\msg0\()_v.4s,l1_\msg2\()_v.4s,l1_\msg3\()_v.4s
87 sha256su1 l2_\msg0\()_v.4s,l2_\msg2\()_v.4s,l2_\msg3\()_v.4s
88 sha256su1 l3_\msg0\()_v.4s,l3_\msg2\()_v.4s,l3_\msg3\()_v.4s
89 .endm
90
91
92 /*
93 Variable list
94 */
95
96 declare_var_vector_reg key,15
97
98
99 /*
100 digest variables
101 */
102 declare_var_vector_reg l0_abcd,0
103 declare_var_vector_reg l0_efgh,1
104 declare_var_vector_reg l1_abcd,2
105 declare_var_vector_reg l1_efgh,3
106 declare_var_vector_reg l2_abcd,4
107 declare_var_vector_reg l2_efgh,5
108 declare_var_vector_reg l3_abcd,6
109 declare_var_vector_reg l3_efgh,7
110 declare_var_vector_reg l1_abcd_saved,16
111 declare_var_vector_reg l1_efgh_saved,17
112 declare_var_vector_reg l0_abcd_saved,20
113 declare_var_vector_reg l0_efgh_saved,21
114 declare_var_vector_reg l2_abcd_saved,24
115 declare_var_vector_reg l2_efgh_saved,25
116 declare_var_vector_reg l3_abcd_saved,28
117 declare_var_vector_reg l3_efgh_saved,29
118 /*
119 Temporay variables
120 */
121 declare_var_vector_reg l0_tmp0,8
122 declare_var_vector_reg l1_tmp0,9
123 declare_var_vector_reg l2_tmp0,10
124 declare_var_vector_reg l3_tmp0,11
125
126 declare_var_vector_reg tmp0,12
127 declare_var_vector_reg tmp1,13
128 declare_var_vector_reg tmp2,14
129 declare_var_vector_reg tmp3,15
130
131 /*
132 Message variables
133 */
134 declare_var_vector_reg l0_msg0,16
135 declare_var_vector_reg l0_msg1,17
136 declare_var_vector_reg l0_msg2,18
137 declare_var_vector_reg l0_msg3,19
138 declare_var_vector_reg l1_msg0,20
139 declare_var_vector_reg l1_msg1,21
140 declare_var_vector_reg l1_msg2,22
141 declare_var_vector_reg l1_msg3,23
142 declare_var_vector_reg l2_msg0,24
143 declare_var_vector_reg l2_msg1,25
144 declare_var_vector_reg l2_msg2,26
145 declare_var_vector_reg l2_msg3,27
146 declare_var_vector_reg l3_msg0,28
147 declare_var_vector_reg l3_msg1,29
148 declare_var_vector_reg l3_msg2,30
149 declare_var_vector_reg l3_msg3,31
150
151
152
153 /*
154 void sha256_mb_ce_x4(SHA256_JOB *, SHA256_JOB *, SHA256_JOB *, SHA256_JOB *, int);
155 */
156 /*
157 Arguements list
158 */
159 l0_job .req x0
160 l1_job .req x1
161 l2_job .req x2
162 l3_job .req x3
163 len .req w4
164 l0_data .req x5
165 l1_data .req x6
166 l2_data .req x7
167 l3_data .req x8
168 tmp .req x9
169 .global sha256_mb_ce_x4
170 .type sha256_mb_ce_x4, %function
171 sha256_mb_ce_x4:
172 //push d8~d15
173 stp d8,d9,[sp,-192]!
174 stp d10,d11,[sp,16]
175 stp d12,d13,[sp,32]
176 stp d14,d15,[sp,48]
177 ldr l0_data, [l0_job]
178 ldr l0_abcd_q, [l0_job, 64]
179 ldr l0_efgh_q, [l0_job, 80]
180 ldr l1_data, [l1_job]
181 ldr l1_abcd_q, [l1_job, 64]
182 ldr l1_efgh_q, [l1_job, 80]
183 ldr l2_data, [l2_job]
184 ldr l2_abcd_q, [l2_job, 64]
185 ldr l2_efgh_q, [l2_job, 80]
186 ldr l3_data, [l3_job]
187 ldr l3_abcd_q, [l3_job, 64]
188 ldr l3_efgh_q, [l3_job, 80]
189
190
191
192 start_loop:
193
194 //load key addr
195 adr tmp, KEY
196 //load msgs
197 ld1 {l0_msg0_v.4s-l0_msg3_v.4s},[l0_data]
198 ld1 {l1_msg0_v.4s-l1_msg3_v.4s},[l1_data]
199 ld1 {l2_msg0_v.4s-l2_msg3_v.4s},[l2_data]
200 ld1 {l3_msg0_v.4s-l3_msg3_v.4s},[l3_data]
201 ldr key_q,[tmp]
202 add tmp,tmp,16
203 //adjust loop parameter
204 add l0_data,l0_data,64
205 add l1_data,l1_data,64
206 add l2_data,l2_data,64
207 add l3_data,l3_data,64
208 sub len, len, #1
209 cmp len, 0
210
211
212 rev32 l0_msg0_v.16b,l0_msg0_v.16b
213 rev32 l0_msg1_v.16b,l0_msg1_v.16b
214 add l0_tmp0_v.4s, l0_msg0_v.4s,key_v.4s
215 rev32 l0_msg2_v.16b,l0_msg2_v.16b
216 rev32 l0_msg3_v.16b,l0_msg3_v.16b
217
218 rev32 l1_msg0_v.16b,l1_msg0_v.16b
219 rev32 l1_msg1_v.16b,l1_msg1_v.16b
220 add l1_tmp0_v.4s, l1_msg0_v.4s,key_v.4s
221 rev32 l1_msg2_v.16b,l1_msg2_v.16b
222 rev32 l1_msg3_v.16b,l1_msg3_v.16b
223
224 rev32 l2_msg0_v.16b,l2_msg0_v.16b
225 rev32 l2_msg1_v.16b,l2_msg1_v.16b
226 add l2_tmp0_v.4s, l2_msg0_v.4s,key_v.4s
227 rev32 l2_msg2_v.16b,l2_msg2_v.16b
228 rev32 l2_msg3_v.16b,l2_msg3_v.16b
229
230 rev32 l3_msg0_v.16b,l3_msg0_v.16b
231 rev32 l3_msg1_v.16b,l3_msg1_v.16b
232 add l3_tmp0_v.4s, l3_msg0_v.4s,key_v.4s
233 rev32 l3_msg2_v.16b,l3_msg2_v.16b
234 rev32 l3_msg3_v.16b,l3_msg3_v.16b
235
236
237
238 sha256_4_rounds_low msg0,msg1,msg2,msg3,tmp0 /* rounds 0-3 */
239 sha256_4_rounds_low msg1,msg2,msg3,msg0,tmp0
240 sha256_4_rounds_low msg2,msg3,msg0,msg1,tmp0
241 sha256_4_rounds_low msg3,msg0,msg1,msg2,tmp0
242 sha256_4_rounds_low msg0,msg1,msg2,msg3,tmp0 /* rounds 16-19 */
243 sha256_4_rounds_low msg1,msg2,msg3,msg0,tmp0
244 sha256_4_rounds_low msg2,msg3,msg0,msg1,tmp0
245 sha256_4_rounds_low msg3,msg0,msg1,msg2,tmp0
246 sha256_4_rounds_low msg0,msg1,msg2,msg3,tmp0 /* rounds 32-35 */
247 sha256_4_rounds_low msg1,msg2,msg3,msg0,tmp0
248 sha256_4_rounds_low msg2,msg3,msg0,msg1,tmp0
249 sha256_4_rounds_low msg3,msg0,msg1,msg2,tmp0
250
251
252
253 sha256_4_rounds_high msg1,tmp0 /* rounds 48-51 */
254
255 /* msg0 msg1 is free , share with digest regs */
256 ldr l0_abcd_saved_q, [l0_job, 64]
257 ldr l1_abcd_saved_q, [l1_job, 64]
258 ldr l2_abcd_saved_q, [l2_job, 64]
259 ldr l3_abcd_saved_q, [l3_job, 64]
260 ldr l0_efgh_saved_q, [l0_job, 80]
261 ldr l1_efgh_saved_q, [l1_job, 80]
262 ldr l2_efgh_saved_q, [l2_job, 80]
263 ldr l3_efgh_saved_q, [l3_job, 80]
264
265 sha256_4_rounds_high msg2,tmp0
266 sha256_4_rounds_high msg3,tmp0
267
268 /* rounds 60-63 */
269 mov tmp2_v.16b,l0_abcd_v.16b
270 sha256h l0_abcd_q,l0_efgh_q,l0_tmp0_v.4s
271 sha256h2 l0_efgh_q,tmp2_q,l0_tmp0_v.4s
272
273 mov tmp2_v.16b,l1_abcd_v.16b
274 sha256h l1_abcd_q,l1_efgh_q,l1_tmp0_v.4s
275 sha256h2 l1_efgh_q,tmp2_q,l1_tmp0_v.4s
276
277 mov tmp2_v.16b,l2_abcd_v.16b
278 sha256h l2_abcd_q,l2_efgh_q,l2_tmp0_v.4s
279 sha256h2 l2_efgh_q,tmp2_q,l2_tmp0_v.4s
280
281 mov tmp2_v.16b,l3_abcd_v.16b
282 sha256h l3_abcd_q,l3_efgh_q,l3_tmp0_v.4s
283 sha256h2 l3_efgh_q,tmp2_q,l3_tmp0_v.4s
284
285 /* combine state */
286 add l0_abcd_v.4s,l0_abcd_v.4s,l0_abcd_saved_v.4s
287 add l0_efgh_v.4s,l0_efgh_v.4s,l0_efgh_saved_v.4s
288 add l1_abcd_v.4s,l1_abcd_v.4s,l1_abcd_saved_v.4s
289 add l1_efgh_v.4s,l1_efgh_v.4s,l1_efgh_saved_v.4s
290 add l2_abcd_v.4s,l2_abcd_v.4s,l2_abcd_saved_v.4s
291 add l2_efgh_v.4s,l2_efgh_v.4s,l2_efgh_saved_v.4s
292 add l3_abcd_v.4s,l3_abcd_v.4s,l3_abcd_saved_v.4s
293 add l3_efgh_v.4s,l3_efgh_v.4s,l3_efgh_saved_v.4s
294
295 str l0_abcd_q, [l0_job, 64]
296 str l0_efgh_q, [l0_job, 80]
297 str l1_abcd_q, [l1_job, 64]
298 str l1_efgh_q, [l1_job, 80]
299 str l2_abcd_q, [l2_job, 64]
300 str l2_efgh_q, [l2_job, 80]
301 str l3_abcd_q, [l3_job, 64]
302 str l3_efgh_q, [l3_job, 80]
303
304 bgt start_loop
305
306
307 ldp d10,d11,[sp,16]
308 ldp d12,d13,[sp,32]
309 ldp d14,d15,[sp,48]
310 ldp d8, d9, [sp], 192
311 ret
312
313 .size sha256_mb_ce_x4, .-sha256_mb_ce_x4
314 .section .rol0_data.cst16,"aM",@progbits,16
315 .align 4
316 KEY:
317 .word 0x428A2F98
318 .word 0x71374491
319 .word 0xB5C0FBCF
320 .word 0xE9B5DBA5
321 .word 0x3956C25B
322 .word 0x59F111F1
323 .word 0x923F82A4
324 .word 0xAB1C5ED5
325 .word 0xD807AA98
326 .word 0x12835B01
327 .word 0x243185BE
328 .word 0x550C7DC3
329 .word 0x72BE5D74
330 .word 0x80DEB1FE
331 .word 0x9BDC06A7
332 .word 0xC19BF174
333 .word 0xE49B69C1
334 .word 0xEFBE4786
335 .word 0x0FC19DC6
336 .word 0x240CA1CC
337 .word 0x2DE92C6F
338 .word 0x4A7484AA
339 .word 0x5CB0A9DC
340 .word 0x76F988DA
341 .word 0x983E5152
342 .word 0xA831C66D
343 .word 0xB00327C8
344 .word 0xBF597FC7
345 .word 0xC6E00BF3
346 .word 0xD5A79147
347 .word 0x06CA6351
348 .word 0x14292967
349 .word 0x27B70A85
350 .word 0x2E1B2138
351 .word 0x4D2C6DFC
352 .word 0x53380D13
353 .word 0x650A7354
354 .word 0x766A0ABB
355 .word 0x81C2C92E
356 .word 0x92722C85
357 .word 0xA2BFE8A1
358 .word 0xA81A664B
359 .word 0xC24B8B70
360 .word 0xC76C51A3
361 .word 0xD192E819
362 .word 0xD6990624
363 .word 0xF40E3585
364 .word 0x106AA070
365 .word 0x19A4C116
366 .word 0x1E376C08
367 .word 0x2748774C
368 .word 0x34B0BCB5
369 .word 0x391C0CB3
370 .word 0x4ED8AA4A
371 .word 0x5B9CCA4F
372 .word 0x682E6FF3
373 .word 0x748F82EE
374 .word 0x78A5636F
375 .word 0x84C87814
376 .word 0x8CC70208
377 .word 0x90BEFFFA
378 .word 0xA4506CEB
379 .word 0xBEF9A3F7
380 .word 0xC67178F2