]>
Commit | Line | Data |
---|---|---|
60468255 JK |
1 | /* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function |
2 | * | |
3 | * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms of the GNU General Public License as published by the Free | |
7 | * Software Foundation; either version 2 of the License, or (at your option) | |
8 | * any later version. | |
9 | */ | |
10 | ||
11 | #include <linux/linkage.h> | |
0777e3e1 | 12 | #include <asm/assembler.h> |
60468255 JK |
13 | |
14 | .syntax unified | |
15 | .code 32 | |
16 | .fpu neon | |
17 | ||
18 | .text | |
19 | ||
20 | ||
21 | /* Context structure */ | |
22 | ||
23 | #define state_h0 0 | |
24 | #define state_h1 4 | |
25 | #define state_h2 8 | |
26 | #define state_h3 12 | |
27 | #define state_h4 16 | |
28 | ||
29 | ||
30 | /* Constants */ | |
31 | ||
32 | #define K1 0x5A827999 | |
33 | #define K2 0x6ED9EBA1 | |
34 | #define K3 0x8F1BBCDC | |
35 | #define K4 0xCA62C1D6 | |
36 | .align 4 | |
37 | .LK_VEC: | |
38 | .LK1: .long K1, K1, K1, K1 | |
39 | .LK2: .long K2, K2, K2, K2 | |
40 | .LK3: .long K3, K3, K3, K3 | |
41 | .LK4: .long K4, K4, K4, K4 | |
42 | ||
43 | ||
44 | /* Register macros */ | |
45 | ||
46 | #define RSTATE r0 | |
47 | #define RDATA r1 | |
48 | #define RNBLKS r2 | |
49 | #define ROLDSTACK r3 | |
50 | #define RWK lr | |
51 | ||
52 | #define _a r4 | |
53 | #define _b r5 | |
54 | #define _c r6 | |
55 | #define _d r7 | |
56 | #define _e r8 | |
57 | ||
58 | #define RT0 r9 | |
59 | #define RT1 r10 | |
60 | #define RT2 r11 | |
61 | #define RT3 r12 | |
62 | ||
63 | #define W0 q0 | |
0777e3e1 | 64 | #define W1 q7 |
60468255 JK |
65 | #define W2 q2 |
66 | #define W3 q3 | |
67 | #define W4 q4 | |
0777e3e1 AB |
68 | #define W5 q6 |
69 | #define W6 q5 | |
70 | #define W7 q1 | |
60468255 JK |
71 | |
72 | #define tmp0 q8 | |
73 | #define tmp1 q9 | |
74 | #define tmp2 q10 | |
75 | #define tmp3 q11 | |
76 | ||
77 | #define qK1 q12 | |
78 | #define qK2 q13 | |
79 | #define qK3 q14 | |
80 | #define qK4 q15 | |
81 | ||
0777e3e1 AB |
82 | #ifdef CONFIG_CPU_BIG_ENDIAN |
83 | #define ARM_LE(code...) | |
84 | #else | |
85 | #define ARM_LE(code...) code | |
86 | #endif | |
60468255 JK |
87 | |
88 | /* Round function macros. */ | |
89 | ||
90 | #define WK_offs(i) (((i) & 15) * 4) | |
91 | ||
92 | #define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
93 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
94 | ldr RT3, [sp, WK_offs(i)]; \ | |
95 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
96 | bic RT0, d, b; \ | |
97 | add e, e, a, ror #(32 - 5); \ | |
98 | and RT1, c, b; \ | |
99 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
100 | add RT0, RT0, RT3; \ | |
101 | add e, e, RT1; \ | |
102 | ror b, #(32 - 30); \ | |
103 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
104 | add e, e, RT0; | |
105 | ||
106 | #define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
107 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
108 | ldr RT3, [sp, WK_offs(i)]; \ | |
109 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
110 | eor RT0, d, b; \ | |
111 | add e, e, a, ror #(32 - 5); \ | |
112 | eor RT0, RT0, c; \ | |
113 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
114 | add e, e, RT3; \ | |
115 | ror b, #(32 - 30); \ | |
116 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
117 | add e, e, RT0; \ | |
118 | ||
119 | #define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
120 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
121 | ldr RT3, [sp, WK_offs(i)]; \ | |
122 | pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
123 | eor RT0, b, c; \ | |
124 | and RT1, b, c; \ | |
125 | add e, e, a, ror #(32 - 5); \ | |
126 | pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
127 | and RT0, RT0, d; \ | |
128 | add RT1, RT1, RT3; \ | |
129 | add e, e, RT0; \ | |
130 | ror b, #(32 - 30); \ | |
131 | pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ | |
132 | add e, e, RT1; | |
133 | ||
134 | #define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
135 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
136 | _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
137 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) | |
138 | ||
139 | #define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\ | |
140 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
141 | _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ | |
142 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) | |
143 | ||
144 | #define R(a,b,c,d,e,f,i) \ | |
145 | _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\ | |
146 | W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) | |
147 | ||
148 | #define dummy(...) | |
149 | ||
150 | ||
151 | /* Input expansion macros. */ | |
152 | ||
153 | /********* Precalc macros for rounds 0-15 *************************************/ | |
154 | ||
155 | #define W_PRECALC_00_15() \ | |
156 | add RWK, sp, #(WK_offs(0)); \ | |
157 | \ | |
0777e3e1 AB |
158 | vld1.32 {W0, W7}, [RDATA]!; \ |
159 | ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ | |
160 | vld1.32 {W6, W5}, [RDATA]!; \ | |
60468255 | 161 | vadd.u32 tmp0, W0, curK; \ |
0777e3e1 AB |
162 | ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ |
163 | ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ | |
60468255 | 164 | vadd.u32 tmp1, W7, curK; \ |
0777e3e1 | 165 | ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ |
60468255 JK |
166 | vadd.u32 tmp2, W6, curK; \ |
167 | vst1.32 {tmp0, tmp1}, [RWK]!; \ | |
168 | vadd.u32 tmp3, W5, curK; \ | |
169 | vst1.32 {tmp2, tmp3}, [RWK]; \ | |
170 | ||
171 | #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 172 | vld1.32 {W0, W7}, [RDATA]!; \ |
60468255 JK |
173 | |
174 | #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
175 | add RWK, sp, #(WK_offs(0)); \ | |
176 | ||
177 | #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 178 | ARM_LE(vrev32.8 W0, W0; ) /* big => little */ \ |
60468255 JK |
179 | |
180 | #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 181 | vld1.32 {W6, W5}, [RDATA]!; \ |
60468255 JK |
182 | |
183 | #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
184 | vadd.u32 tmp0, W0, curK; \ | |
185 | ||
186 | #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 187 | ARM_LE(vrev32.8 W7, W7; ) /* big => little */ \ |
60468255 JK |
188 | |
189 | #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 190 | ARM_LE(vrev32.8 W6, W6; ) /* big => little */ \ |
60468255 JK |
191 | |
192 | #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
193 | vadd.u32 tmp1, W7, curK; \ | |
194 | ||
195 | #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
0777e3e1 | 196 | ARM_LE(vrev32.8 W5, W5; ) /* big => little */ \ |
60468255 JK |
197 | |
198 | #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
199 | vadd.u32 tmp2, W6, curK; \ | |
200 | ||
201 | #define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
202 | vst1.32 {tmp0, tmp1}, [RWK]!; \ | |
203 | ||
204 | #define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
205 | vadd.u32 tmp3, W5, curK; \ | |
206 | ||
207 | #define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
208 | vst1.32 {tmp2, tmp3}, [RWK]; \ | |
209 | ||
210 | ||
211 | /********* Precalc macros for rounds 16-31 ************************************/ | |
212 | ||
213 | #define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
214 | veor tmp0, tmp0; \ | |
215 | vext.8 W, W_m16, W_m12, #8; \ | |
216 | ||
217 | #define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
218 | add RWK, sp, #(WK_offs(i)); \ | |
219 | vext.8 tmp0, W_m04, tmp0, #4; \ | |
220 | ||
221 | #define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
222 | veor tmp0, tmp0, W_m16; \ | |
223 | veor.32 W, W, W_m08; \ | |
224 | ||
225 | #define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
226 | veor tmp1, tmp1; \ | |
227 | veor W, W, tmp0; \ | |
228 | ||
229 | #define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
230 | vshl.u32 tmp0, W, #1; \ | |
231 | ||
232 | #define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
233 | vext.8 tmp1, tmp1, W, #(16-12); \ | |
234 | vshr.u32 W, W, #31; \ | |
235 | ||
236 | #define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
237 | vorr tmp0, tmp0, W; \ | |
238 | vshr.u32 W, tmp1, #30; \ | |
239 | ||
240 | #define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
241 | vshl.u32 tmp1, tmp1, #2; \ | |
242 | ||
243 | #define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
244 | veor tmp0, tmp0, W; \ | |
245 | ||
246 | #define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
247 | veor W, tmp0, tmp1; \ | |
248 | ||
249 | #define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
250 | vadd.u32 tmp0, W, curK; \ | |
251 | ||
252 | #define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
253 | vst1.32 {tmp0}, [RWK]; | |
254 | ||
255 | ||
256 | /********* Precalc macros for rounds 32-79 ************************************/ | |
257 | ||
258 | #define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
259 | veor W, W_m28; \ | |
260 | ||
261 | #define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
262 | vext.8 tmp0, W_m08, W_m04, #8; \ | |
263 | ||
264 | #define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
265 | veor W, W_m16; \ | |
266 | ||
267 | #define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
268 | veor W, tmp0; \ | |
269 | ||
270 | #define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
271 | add RWK, sp, #(WK_offs(i&~3)); \ | |
272 | ||
273 | #define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
274 | vshl.u32 tmp1, W, #2; \ | |
275 | ||
276 | #define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
277 | vshr.u32 tmp0, W, #30; \ | |
278 | ||
279 | #define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
280 | vorr W, tmp0, tmp1; \ | |
281 | ||
282 | #define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
283 | vadd.u32 tmp0, W, curK; \ | |
284 | ||
285 | #define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ | |
286 | vst1.32 {tmp0}, [RWK]; | |
287 | ||
288 | ||
289 | /* | |
290 | * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. | |
291 | * | |
292 | * unsigned int | |
293 | * sha1_transform_neon (void *ctx, const unsigned char *data, | |
294 | * unsigned int nblks) | |
295 | */ | |
296 | .align 3 | |
297 | ENTRY(sha1_transform_neon) | |
298 | /* input: | |
299 | * r0: ctx, CTX | |
300 | * r1: data (64*nblks bytes) | |
301 | * r2: nblks | |
302 | */ | |
303 | ||
304 | cmp RNBLKS, #0; | |
305 | beq .Ldo_nothing; | |
306 | ||
307 | push {r4-r12, lr}; | |
308 | /*vpush {q4-q7};*/ | |
309 | ||
310 | adr RT3, .LK_VEC; | |
311 | ||
312 | mov ROLDSTACK, sp; | |
313 | ||
314 | /* Align stack. */ | |
315 | sub RT0, sp, #(16*4); | |
316 | and RT0, #(~(16-1)); | |
317 | mov sp, RT0; | |
318 | ||
319 | vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */ | |
320 | ||
321 | /* Get the values of the chaining variables. */ | |
322 | ldm RSTATE, {_a-_e}; | |
323 | ||
324 | vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */ | |
325 | ||
326 | #undef curK | |
327 | #define curK qK1 | |
328 | /* Precalc 0-15. */ | |
329 | W_PRECALC_00_15(); | |
330 | ||
331 | .Loop: | |
332 | /* Transform 0-15 + Precalc 16-31. */ | |
333 | _R( _a, _b, _c, _d, _e, F1, 0, | |
334 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, | |
335 | W4, W5, W6, W7, W0, _, _, _ ); | |
336 | _R( _e, _a, _b, _c, _d, F1, 1, | |
337 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, | |
338 | W4, W5, W6, W7, W0, _, _, _ ); | |
339 | _R( _d, _e, _a, _b, _c, F1, 2, | |
340 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, | |
341 | W4, W5, W6, W7, W0, _, _, _ ); | |
342 | _R( _c, _d, _e, _a, _b, F1, 3, | |
343 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, | |
344 | W4, W5, W6, W7, W0, _, _, _ ); | |
345 | ||
346 | #undef curK | |
347 | #define curK qK2 | |
348 | _R( _b, _c, _d, _e, _a, F1, 4, | |
349 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, | |
350 | W3, W4, W5, W6, W7, _, _, _ ); | |
351 | _R( _a, _b, _c, _d, _e, F1, 5, | |
352 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, | |
353 | W3, W4, W5, W6, W7, _, _, _ ); | |
354 | _R( _e, _a, _b, _c, _d, F1, 6, | |
355 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, | |
356 | W3, W4, W5, W6, W7, _, _, _ ); | |
357 | _R( _d, _e, _a, _b, _c, F1, 7, | |
358 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, | |
359 | W3, W4, W5, W6, W7, _, _, _ ); | |
360 | ||
361 | _R( _c, _d, _e, _a, _b, F1, 8, | |
362 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, | |
363 | W2, W3, W4, W5, W6, _, _, _ ); | |
364 | _R( _b, _c, _d, _e, _a, F1, 9, | |
365 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, | |
366 | W2, W3, W4, W5, W6, _, _, _ ); | |
367 | _R( _a, _b, _c, _d, _e, F1, 10, | |
368 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, | |
369 | W2, W3, W4, W5, W6, _, _, _ ); | |
370 | _R( _e, _a, _b, _c, _d, F1, 11, | |
371 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, | |
372 | W2, W3, W4, W5, W6, _, _, _ ); | |
373 | ||
374 | _R( _d, _e, _a, _b, _c, F1, 12, | |
375 | WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, | |
376 | W1, W2, W3, W4, W5, _, _, _ ); | |
377 | _R( _c, _d, _e, _a, _b, F1, 13, | |
378 | WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, | |
379 | W1, W2, W3, W4, W5, _, _, _ ); | |
380 | _R( _b, _c, _d, _e, _a, F1, 14, | |
381 | WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, | |
382 | W1, W2, W3, W4, W5, _, _, _ ); | |
383 | _R( _a, _b, _c, _d, _e, F1, 15, | |
384 | WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, | |
385 | W1, W2, W3, W4, W5, _, _, _ ); | |
386 | ||
387 | /* Transform 16-63 + Precalc 32-79. */ | |
388 | _R( _e, _a, _b, _c, _d, F1, 16, | |
389 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, | |
390 | W0, W1, W2, W3, W4, W5, W6, W7); | |
391 | _R( _d, _e, _a, _b, _c, F1, 17, | |
392 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, | |
393 | W0, W1, W2, W3, W4, W5, W6, W7); | |
394 | _R( _c, _d, _e, _a, _b, F1, 18, | |
395 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, | |
396 | W0, W1, W2, W3, W4, W5, W6, W7); | |
397 | _R( _b, _c, _d, _e, _a, F1, 19, | |
398 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, | |
399 | W0, W1, W2, W3, W4, W5, W6, W7); | |
400 | ||
401 | _R( _a, _b, _c, _d, _e, F2, 20, | |
402 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, | |
403 | W7, W0, W1, W2, W3, W4, W5, W6); | |
404 | _R( _e, _a, _b, _c, _d, F2, 21, | |
405 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, | |
406 | W7, W0, W1, W2, W3, W4, W5, W6); | |
407 | _R( _d, _e, _a, _b, _c, F2, 22, | |
408 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, | |
409 | W7, W0, W1, W2, W3, W4, W5, W6); | |
410 | _R( _c, _d, _e, _a, _b, F2, 23, | |
411 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, | |
412 | W7, W0, W1, W2, W3, W4, W5, W6); | |
413 | ||
414 | #undef curK | |
415 | #define curK qK3 | |
416 | _R( _b, _c, _d, _e, _a, F2, 24, | |
417 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, | |
418 | W6, W7, W0, W1, W2, W3, W4, W5); | |
419 | _R( _a, _b, _c, _d, _e, F2, 25, | |
420 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, | |
421 | W6, W7, W0, W1, W2, W3, W4, W5); | |
422 | _R( _e, _a, _b, _c, _d, F2, 26, | |
423 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, | |
424 | W6, W7, W0, W1, W2, W3, W4, W5); | |
425 | _R( _d, _e, _a, _b, _c, F2, 27, | |
426 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, | |
427 | W6, W7, W0, W1, W2, W3, W4, W5); | |
428 | ||
429 | _R( _c, _d, _e, _a, _b, F2, 28, | |
430 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, | |
431 | W5, W6, W7, W0, W1, W2, W3, W4); | |
432 | _R( _b, _c, _d, _e, _a, F2, 29, | |
433 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, | |
434 | W5, W6, W7, W0, W1, W2, W3, W4); | |
435 | _R( _a, _b, _c, _d, _e, F2, 30, | |
436 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, | |
437 | W5, W6, W7, W0, W1, W2, W3, W4); | |
438 | _R( _e, _a, _b, _c, _d, F2, 31, | |
439 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, | |
440 | W5, W6, W7, W0, W1, W2, W3, W4); | |
441 | ||
442 | _R( _d, _e, _a, _b, _c, F2, 32, | |
443 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, | |
444 | W4, W5, W6, W7, W0, W1, W2, W3); | |
445 | _R( _c, _d, _e, _a, _b, F2, 33, | |
446 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, | |
447 | W4, W5, W6, W7, W0, W1, W2, W3); | |
448 | _R( _b, _c, _d, _e, _a, F2, 34, | |
449 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, | |
450 | W4, W5, W6, W7, W0, W1, W2, W3); | |
451 | _R( _a, _b, _c, _d, _e, F2, 35, | |
452 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, | |
453 | W4, W5, W6, W7, W0, W1, W2, W3); | |
454 | ||
455 | _R( _e, _a, _b, _c, _d, F2, 36, | |
456 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, | |
457 | W3, W4, W5, W6, W7, W0, W1, W2); | |
458 | _R( _d, _e, _a, _b, _c, F2, 37, | |
459 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, | |
460 | W3, W4, W5, W6, W7, W0, W1, W2); | |
461 | _R( _c, _d, _e, _a, _b, F2, 38, | |
462 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, | |
463 | W3, W4, W5, W6, W7, W0, W1, W2); | |
464 | _R( _b, _c, _d, _e, _a, F2, 39, | |
465 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, | |
466 | W3, W4, W5, W6, W7, W0, W1, W2); | |
467 | ||
468 | _R( _a, _b, _c, _d, _e, F3, 40, | |
469 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, | |
470 | W2, W3, W4, W5, W6, W7, W0, W1); | |
471 | _R( _e, _a, _b, _c, _d, F3, 41, | |
472 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, | |
473 | W2, W3, W4, W5, W6, W7, W0, W1); | |
474 | _R( _d, _e, _a, _b, _c, F3, 42, | |
475 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, | |
476 | W2, W3, W4, W5, W6, W7, W0, W1); | |
477 | _R( _c, _d, _e, _a, _b, F3, 43, | |
478 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, | |
479 | W2, W3, W4, W5, W6, W7, W0, W1); | |
480 | ||
481 | #undef curK | |
482 | #define curK qK4 | |
483 | _R( _b, _c, _d, _e, _a, F3, 44, | |
484 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, | |
485 | W1, W2, W3, W4, W5, W6, W7, W0); | |
486 | _R( _a, _b, _c, _d, _e, F3, 45, | |
487 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, | |
488 | W1, W2, W3, W4, W5, W6, W7, W0); | |
489 | _R( _e, _a, _b, _c, _d, F3, 46, | |
490 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, | |
491 | W1, W2, W3, W4, W5, W6, W7, W0); | |
492 | _R( _d, _e, _a, _b, _c, F3, 47, | |
493 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, | |
494 | W1, W2, W3, W4, W5, W6, W7, W0); | |
495 | ||
496 | _R( _c, _d, _e, _a, _b, F3, 48, | |
497 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, | |
498 | W0, W1, W2, W3, W4, W5, W6, W7); | |
499 | _R( _b, _c, _d, _e, _a, F3, 49, | |
500 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, | |
501 | W0, W1, W2, W3, W4, W5, W6, W7); | |
502 | _R( _a, _b, _c, _d, _e, F3, 50, | |
503 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, | |
504 | W0, W1, W2, W3, W4, W5, W6, W7); | |
505 | _R( _e, _a, _b, _c, _d, F3, 51, | |
506 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, | |
507 | W0, W1, W2, W3, W4, W5, W6, W7); | |
508 | ||
509 | _R( _d, _e, _a, _b, _c, F3, 52, | |
510 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, | |
511 | W7, W0, W1, W2, W3, W4, W5, W6); | |
512 | _R( _c, _d, _e, _a, _b, F3, 53, | |
513 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, | |
514 | W7, W0, W1, W2, W3, W4, W5, W6); | |
515 | _R( _b, _c, _d, _e, _a, F3, 54, | |
516 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, | |
517 | W7, W0, W1, W2, W3, W4, W5, W6); | |
518 | _R( _a, _b, _c, _d, _e, F3, 55, | |
519 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, | |
520 | W7, W0, W1, W2, W3, W4, W5, W6); | |
521 | ||
522 | _R( _e, _a, _b, _c, _d, F3, 56, | |
523 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, | |
524 | W6, W7, W0, W1, W2, W3, W4, W5); | |
525 | _R( _d, _e, _a, _b, _c, F3, 57, | |
526 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, | |
527 | W6, W7, W0, W1, W2, W3, W4, W5); | |
528 | _R( _c, _d, _e, _a, _b, F3, 58, | |
529 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, | |
530 | W6, W7, W0, W1, W2, W3, W4, W5); | |
531 | _R( _b, _c, _d, _e, _a, F3, 59, | |
532 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, | |
533 | W6, W7, W0, W1, W2, W3, W4, W5); | |
534 | ||
535 | subs RNBLKS, #1; | |
536 | ||
537 | _R( _a, _b, _c, _d, _e, F4, 60, | |
538 | WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, | |
539 | W5, W6, W7, W0, W1, W2, W3, W4); | |
540 | _R( _e, _a, _b, _c, _d, F4, 61, | |
541 | WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, | |
542 | W5, W6, W7, W0, W1, W2, W3, W4); | |
543 | _R( _d, _e, _a, _b, _c, F4, 62, | |
544 | WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, | |
545 | W5, W6, W7, W0, W1, W2, W3, W4); | |
546 | _R( _c, _d, _e, _a, _b, F4, 63, | |
547 | WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, | |
548 | W5, W6, W7, W0, W1, W2, W3, W4); | |
549 | ||
550 | beq .Lend; | |
551 | ||
552 | /* Transform 64-79 + Precalc 0-15 of next block. */ | |
553 | #undef curK | |
554 | #define curK qK1 | |
555 | _R( _b, _c, _d, _e, _a, F4, 64, | |
556 | WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
557 | _R( _a, _b, _c, _d, _e, F4, 65, | |
558 | WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
559 | _R( _e, _a, _b, _c, _d, F4, 66, | |
560 | WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
561 | _R( _d, _e, _a, _b, _c, F4, 67, | |
562 | WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
563 | ||
564 | _R( _c, _d, _e, _a, _b, F4, 68, | |
565 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
566 | _R( _b, _c, _d, _e, _a, F4, 69, | |
567 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
568 | _R( _a, _b, _c, _d, _e, F4, 70, | |
569 | WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
570 | _R( _e, _a, _b, _c, _d, F4, 71, | |
571 | WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
572 | ||
573 | _R( _d, _e, _a, _b, _c, F4, 72, | |
574 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
575 | _R( _c, _d, _e, _a, _b, F4, 73, | |
576 | dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
577 | _R( _b, _c, _d, _e, _a, F4, 74, | |
578 | WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
579 | _R( _a, _b, _c, _d, _e, F4, 75, | |
580 | WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
581 | ||
582 | _R( _e, _a, _b, _c, _d, F4, 76, | |
583 | WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
584 | _R( _d, _e, _a, _b, _c, F4, 77, | |
585 | WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
586 | _R( _c, _d, _e, _a, _b, F4, 78, | |
587 | WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ ); | |
588 | _R( _b, _c, _d, _e, _a, F4, 79, | |
589 | WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ ); | |
590 | ||
591 | /* Update the chaining variables. */ | |
592 | ldm RSTATE, {RT0-RT3}; | |
593 | add _a, RT0; | |
594 | ldr RT0, [RSTATE, #state_h4]; | |
595 | add _b, RT1; | |
596 | add _c, RT2; | |
597 | add _d, RT3; | |
598 | add _e, RT0; | |
599 | stm RSTATE, {_a-_e}; | |
600 | ||
601 | b .Loop; | |
602 | ||
603 | .Lend: | |
604 | /* Transform 64-79 */ | |
605 | R( _b, _c, _d, _e, _a, F4, 64 ); | |
606 | R( _a, _b, _c, _d, _e, F4, 65 ); | |
607 | R( _e, _a, _b, _c, _d, F4, 66 ); | |
608 | R( _d, _e, _a, _b, _c, F4, 67 ); | |
609 | R( _c, _d, _e, _a, _b, F4, 68 ); | |
610 | R( _b, _c, _d, _e, _a, F4, 69 ); | |
611 | R( _a, _b, _c, _d, _e, F4, 70 ); | |
612 | R( _e, _a, _b, _c, _d, F4, 71 ); | |
613 | R( _d, _e, _a, _b, _c, F4, 72 ); | |
614 | R( _c, _d, _e, _a, _b, F4, 73 ); | |
615 | R( _b, _c, _d, _e, _a, F4, 74 ); | |
616 | R( _a, _b, _c, _d, _e, F4, 75 ); | |
617 | R( _e, _a, _b, _c, _d, F4, 76 ); | |
618 | R( _d, _e, _a, _b, _c, F4, 77 ); | |
619 | R( _c, _d, _e, _a, _b, F4, 78 ); | |
620 | R( _b, _c, _d, _e, _a, F4, 79 ); | |
621 | ||
622 | mov sp, ROLDSTACK; | |
623 | ||
624 | /* Update the chaining variables. */ | |
625 | ldm RSTATE, {RT0-RT3}; | |
626 | add _a, RT0; | |
627 | ldr RT0, [RSTATE, #state_h4]; | |
628 | add _b, RT1; | |
629 | add _c, RT2; | |
630 | add _d, RT3; | |
631 | /*vpop {q4-q7};*/ | |
632 | add _e, RT0; | |
633 | stm RSTATE, {_a-_e}; | |
634 | ||
635 | pop {r4-r12, pc}; | |
636 | ||
637 | .Ldo_nothing: | |
638 | bx lr | |
639 | ENDPROC(sha1_transform_neon) |