1 /**********************************************************************
2 Copyright(c) 2019 Arm Corporation All rights reserved.
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright
10 notice, this list of conditions and the following disclaimer in
11 the documentation and/or other materials provided with the
13 * Neither the name of Arm Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 **********************************************************************/
38 .macro declare_var_vector_reg name:req,reg:req
48 .macro sha256_4_rounds_high msg:req,tmp0:req
50 mov tmp0_v.16b,l0_\tmp0\()_v.16b
51 mov tmp1_v.16b,l1_\tmp0\()_v.16b
52 add l0_\tmp0\()_v.4s,l0_\msg\()_v.4s,key_v.4s
53 add l1_\tmp0\()_v.4s,l1_\msg\()_v.4s,key_v.4s
54 mov tmp2_v.16b,l0_abcd_v.16b
55 mov tmp3_v.16b,l1_abcd_v.16b
56 sha256h l0_abcd_q,l0_efgh_q,tmp0_v.4s
57 sha256h l1_abcd_q,l1_efgh_q,tmp1_v.4s
58 sha256h2 l0_efgh_q,tmp2_q,tmp0_v.4s
59 sha256h2 l1_efgh_q,tmp3_q,tmp1_v.4s
62 mov tmp0_v.16b,l2_\tmp0\()_v.16b
63 mov tmp1_v.16b,l3_\tmp0\()_v.16b
65 add l2_\tmp0\()_v.4s,l2_\msg\()_v.4s,key_v.4s
66 add l3_\tmp0\()_v.4s,l3_\msg\()_v.4s,key_v.4s
67 mov tmp2_v.16b,l2_abcd_v.16b
68 mov tmp3_v.16b,l3_abcd_v.16b
69 sha256h l2_abcd_q,l2_efgh_q,tmp0_v.4s
70 sha256h l3_abcd_q,l3_efgh_q,tmp1_v.4s
71 sha256h2 l2_efgh_q,tmp2_q,tmp0_v.4s
72 sha256h2 l3_efgh_q,tmp3_q,tmp1_v.4s
79 .macro sha256_4_rounds_low msg0:req,msg1:req,msg2:req,msg3:req,tmp0:req
80 sha256su0 l0_\msg0\()_v.4s,l0_\msg1\()_v.4s
81 sha256su0 l1_\msg0\()_v.4s,l1_\msg1\()_v.4s
82 sha256su0 l2_\msg0\()_v.4s,l2_\msg1\()_v.4s
83 sha256su0 l3_\msg0\()_v.4s,l3_\msg1\()_v.4s
84 sha256_4_rounds_high \msg1,\tmp0
85 sha256su1 l0_\msg0\()_v.4s,l0_\msg2\()_v.4s,l0_\msg3\()_v.4s
86 sha256su1 l1_\msg0\()_v.4s,l1_\msg2\()_v.4s,l1_\msg3\()_v.4s
87 sha256su1 l2_\msg0\()_v.4s,l2_\msg2\()_v.4s,l2_\msg3\()_v.4s
88 sha256su1 l3_\msg0\()_v.4s,l3_\msg2\()_v.4s,l3_\msg3\()_v.4s
96 declare_var_vector_reg key,15
102 declare_var_vector_reg l0_abcd,0
103 declare_var_vector_reg l0_efgh,1
104 declare_var_vector_reg l1_abcd,2
105 declare_var_vector_reg l1_efgh,3
106 declare_var_vector_reg l2_abcd,4
107 declare_var_vector_reg l2_efgh,5
108 declare_var_vector_reg l3_abcd,6
109 declare_var_vector_reg l3_efgh,7
110 declare_var_vector_reg l1_abcd_saved,16
111 declare_var_vector_reg l1_efgh_saved,17
112 declare_var_vector_reg l0_abcd_saved,20
113 declare_var_vector_reg l0_efgh_saved,21
114 declare_var_vector_reg l2_abcd_saved,24
115 declare_var_vector_reg l2_efgh_saved,25
116 declare_var_vector_reg l3_abcd_saved,28
117 declare_var_vector_reg l3_efgh_saved,29
121 declare_var_vector_reg l0_tmp0,8
122 declare_var_vector_reg l1_tmp0,9
123 declare_var_vector_reg l2_tmp0,10
124 declare_var_vector_reg l3_tmp0,11
126 declare_var_vector_reg tmp0,12
127 declare_var_vector_reg tmp1,13
128 declare_var_vector_reg tmp2,14
129 declare_var_vector_reg tmp3,15
134 declare_var_vector_reg l0_msg0,16
135 declare_var_vector_reg l0_msg1,17
136 declare_var_vector_reg l0_msg2,18
137 declare_var_vector_reg l0_msg3,19
138 declare_var_vector_reg l1_msg0,20
139 declare_var_vector_reg l1_msg1,21
140 declare_var_vector_reg l1_msg2,22
141 declare_var_vector_reg l1_msg3,23
142 declare_var_vector_reg l2_msg0,24
143 declare_var_vector_reg l2_msg1,25
144 declare_var_vector_reg l2_msg2,26
145 declare_var_vector_reg l2_msg3,27
146 declare_var_vector_reg l3_msg0,28
147 declare_var_vector_reg l3_msg1,29
148 declare_var_vector_reg l3_msg2,30
149 declare_var_vector_reg l3_msg3,31
154 void sha256_mb_ce_x4(SHA256_JOB *, SHA256_JOB *, SHA256_JOB *, SHA256_JOB *, int);
169 .global sha256_mb_ce_x4
170 .type sha256_mb_ce_x4, %function
177 ldr l0_data, [l0_job]
178 ldr l0_abcd_q, [l0_job, 64]
179 ldr l0_efgh_q, [l0_job, 80]
180 ldr l1_data, [l1_job]
181 ldr l1_abcd_q, [l1_job, 64]
182 ldr l1_efgh_q, [l1_job, 80]
183 ldr l2_data, [l2_job]
184 ldr l2_abcd_q, [l2_job, 64]
185 ldr l2_efgh_q, [l2_job, 80]
186 ldr l3_data, [l3_job]
187 ldr l3_abcd_q, [l3_job, 64]
188 ldr l3_efgh_q, [l3_job, 80]
197 ld1 {l0_msg0_v.4s-l0_msg3_v.4s},[l0_data]
198 ld1 {l1_msg0_v.4s-l1_msg3_v.4s},[l1_data]
199 ld1 {l2_msg0_v.4s-l2_msg3_v.4s},[l2_data]
200 ld1 {l3_msg0_v.4s-l3_msg3_v.4s},[l3_data]
203 //adjust loop parameter
204 add l0_data,l0_data,64
205 add l1_data,l1_data,64
206 add l2_data,l2_data,64
207 add l3_data,l3_data,64
212 rev32 l0_msg0_v.16b,l0_msg0_v.16b
213 rev32 l0_msg1_v.16b,l0_msg1_v.16b
214 add l0_tmp0_v.4s, l0_msg0_v.4s,key_v.4s
215 rev32 l0_msg2_v.16b,l0_msg2_v.16b
216 rev32 l0_msg3_v.16b,l0_msg3_v.16b
218 rev32 l1_msg0_v.16b,l1_msg0_v.16b
219 rev32 l1_msg1_v.16b,l1_msg1_v.16b
220 add l1_tmp0_v.4s, l1_msg0_v.4s,key_v.4s
221 rev32 l1_msg2_v.16b,l1_msg2_v.16b
222 rev32 l1_msg3_v.16b,l1_msg3_v.16b
224 rev32 l2_msg0_v.16b,l2_msg0_v.16b
225 rev32 l2_msg1_v.16b,l2_msg1_v.16b
226 add l2_tmp0_v.4s, l2_msg0_v.4s,key_v.4s
227 rev32 l2_msg2_v.16b,l2_msg2_v.16b
228 rev32 l2_msg3_v.16b,l2_msg3_v.16b
230 rev32 l3_msg0_v.16b,l3_msg0_v.16b
231 rev32 l3_msg1_v.16b,l3_msg1_v.16b
232 add l3_tmp0_v.4s, l3_msg0_v.4s,key_v.4s
233 rev32 l3_msg2_v.16b,l3_msg2_v.16b
234 rev32 l3_msg3_v.16b,l3_msg3_v.16b
238 sha256_4_rounds_low msg0,msg1,msg2,msg3,tmp0 /* rounds 0-3 */
239 sha256_4_rounds_low msg1,msg2,msg3,msg0,tmp0
240 sha256_4_rounds_low msg2,msg3,msg0,msg1,tmp0
241 sha256_4_rounds_low msg3,msg0,msg1,msg2,tmp0
242 sha256_4_rounds_low msg0,msg1,msg2,msg3,tmp0 /* rounds 16-19 */
243 sha256_4_rounds_low msg1,msg2,msg3,msg0,tmp0
244 sha256_4_rounds_low msg2,msg3,msg0,msg1,tmp0
245 sha256_4_rounds_low msg3,msg0,msg1,msg2,tmp0
246 sha256_4_rounds_low msg0,msg1,msg2,msg3,tmp0 /* rounds 32-35 */
247 sha256_4_rounds_low msg1,msg2,msg3,msg0,tmp0
248 sha256_4_rounds_low msg2,msg3,msg0,msg1,tmp0
249 sha256_4_rounds_low msg3,msg0,msg1,msg2,tmp0
253 sha256_4_rounds_high msg1,tmp0 /* rounds 48-51 */
255 /* msg0 msg1 is free , share with digest regs */
256 ldr l0_abcd_saved_q, [l0_job, 64]
257 ldr l1_abcd_saved_q, [l1_job, 64]
258 ldr l2_abcd_saved_q, [l2_job, 64]
259 ldr l3_abcd_saved_q, [l3_job, 64]
260 ldr l0_efgh_saved_q, [l0_job, 80]
261 ldr l1_efgh_saved_q, [l1_job, 80]
262 ldr l2_efgh_saved_q, [l2_job, 80]
263 ldr l3_efgh_saved_q, [l3_job, 80]
265 sha256_4_rounds_high msg2,tmp0
266 sha256_4_rounds_high msg3,tmp0
269 mov tmp2_v.16b,l0_abcd_v.16b
270 sha256h l0_abcd_q,l0_efgh_q,l0_tmp0_v.4s
271 sha256h2 l0_efgh_q,tmp2_q,l0_tmp0_v.4s
273 mov tmp2_v.16b,l1_abcd_v.16b
274 sha256h l1_abcd_q,l1_efgh_q,l1_tmp0_v.4s
275 sha256h2 l1_efgh_q,tmp2_q,l1_tmp0_v.4s
277 mov tmp2_v.16b,l2_abcd_v.16b
278 sha256h l2_abcd_q,l2_efgh_q,l2_tmp0_v.4s
279 sha256h2 l2_efgh_q,tmp2_q,l2_tmp0_v.4s
281 mov tmp2_v.16b,l3_abcd_v.16b
282 sha256h l3_abcd_q,l3_efgh_q,l3_tmp0_v.4s
283 sha256h2 l3_efgh_q,tmp2_q,l3_tmp0_v.4s
286 add l0_abcd_v.4s,l0_abcd_v.4s,l0_abcd_saved_v.4s
287 add l0_efgh_v.4s,l0_efgh_v.4s,l0_efgh_saved_v.4s
288 add l1_abcd_v.4s,l1_abcd_v.4s,l1_abcd_saved_v.4s
289 add l1_efgh_v.4s,l1_efgh_v.4s,l1_efgh_saved_v.4s
290 add l2_abcd_v.4s,l2_abcd_v.4s,l2_abcd_saved_v.4s
291 add l2_efgh_v.4s,l2_efgh_v.4s,l2_efgh_saved_v.4s
292 add l3_abcd_v.4s,l3_abcd_v.4s,l3_abcd_saved_v.4s
293 add l3_efgh_v.4s,l3_efgh_v.4s,l3_efgh_saved_v.4s
295 str l0_abcd_q, [l0_job, 64]
296 str l0_efgh_q, [l0_job, 80]
297 str l1_abcd_q, [l1_job, 64]
298 str l1_efgh_q, [l1_job, 80]
299 str l2_abcd_q, [l2_job, 64]
300 str l2_efgh_q, [l2_job, 80]
301 str l3_abcd_q, [l3_job, 64]
302 str l3_efgh_q, [l3_job, 80]
310 ldp d8, d9, [sp], 192
313 .size sha256_mb_ce_x4, .-sha256_mb_ce_x4
314 .section .rol0_data.cst16,"aM",@progbits,16