]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/intel-ipsec-mb/avx2/sha1_x8_avx2.asm
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / intel-ipsec-mb / avx2 / sha1_x8_avx2.asm
1 ;;
2 ;; Copyright (c) 2012-2018, Intel Corporation
3 ;;
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
6 ;;
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
15 ;;
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 ;;
27
28 ;; Stack must be aligned to 32 bytes before call
29 ;; Windows clobbers: rax rdx r8 r9 r10 r11 r12 r13 r14 r15
30 ;; Windows preserves: rbx rcx rsi rdi rbp
31 ;;
32 ;; Linux clobbers: rax rdx rsi r9 r10 r11 r12 r13 r14 r15
33 ;; Linux preserves: rbx rcx rdi rbp r8
34 ;;
35 ;; clobbers ymm0-15
36
37 %include "os.asm"
38 ;%define DO_DBGPRINT
39 %include "dbgprint.asm"
40 %include "mb_mgr_datastruct.asm"
41
42 section .data
43 default rel
44 align 32
45 PSHUFFLE_BYTE_FLIP_MASK: ;ddq 0x0c0d0e0f08090a0b0405060700010203
46 ;ddq 0x0c0d0e0f08090a0b0405060700010203
47 dq 0x0405060700010203, 0x0c0d0e0f08090a0b
48 dq 0x0405060700010203, 0x0c0d0e0f08090a0b
49 K00_19: ;ddq 0x5A8279995A8279995A8279995A827999
50 ;ddq 0x5A8279995A8279995A8279995A827999
51 dq 0x5A8279995A827999, 0x5A8279995A827999
52 dq 0x5A8279995A827999, 0x5A8279995A827999
53 K20_39: ;ddq 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
54 ;ddq 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
55 dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
56 dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
57 K40_59: ;ddq 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
58 ;ddq 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
59 dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
60 dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
61 K60_79: ;ddq 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
62 ;ddq 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
63 dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
64 dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
65
66 section .text
67
68 %ifdef LINUX
69 %define arg1 rdi
70 %define arg2 rsi
71 %define reg3 rdx
72 %else
73 %define arg1 rcx
74 %define arg2 rdx
75 %define reg3 r8
76 %endif
77
78 %define state arg1
79 %define num_blks arg2
80
81 %define inp0 r9
82 %define inp1 r10
83 %define inp2 r11
84 %define inp3 r12
85 %define inp4 r13
86 %define inp5 r14
87 %define inp6 r15
88 %define inp7 reg3
89
90 %define IDX rax
91
92 ; ymm0 A
93 ; ymm1 B
94 ; ymm2 C
95 ; ymm3 D
96 ; ymm4 E
97 ; ymm5 F AA
98 ; ymm6 T0 BB
99 ; ymm7 T1 CC
100 ; ymm8 T2 DD
101 ; ymm9 T3 EE
102 ; ymm10 T4 TMP
103 ; ymm11 T5 FUN
104 ; ymm12 T6 K
105 ; ymm13 T7 W14
106 ; ymm14 T8 W15
107 ; ymm15 T9 W16
108
109 %define A ymm0
110 %define B ymm1
111 %define C ymm2
112 %define D ymm3
113 %define E ymm4
114
115 %define F ymm5
116 %define T0 ymm6
117 %define T1 ymm7
118 %define T2 ymm8
119 %define T3 ymm9
120 %define T4 ymm10
121 %define T5 ymm11
122 %define T6 ymm12
123 %define T7 ymm13
124 %define T8 ymm14
125 %define T9 ymm15
126
127 %define AA ymm5
128 %define BB ymm6
129 %define CC ymm7
130 %define DD ymm8
131 %define EE ymm9
132 %define TMP ymm10
133 %define FUN ymm11
134 %define K ymm12
135 %define W14 ymm13
136 %define W15 ymm14
137 %define W16 ymm15
138
139
140 ;; Assume stack aligned to 32 bytes before call
141 ;; Therefore FRAMESIZE mod 32 must be 32-8 = 24
142 %define FRAMESZ 32*16 + 24
143
144 %define VMOVPS vmovups
145
146
147
148 ; TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
149 ; "transpose" data in {r0...r7} using temps {t0...t1}
150 ; Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
151 ; r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
152 ; r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
153 ; r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
154 ; r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
155 ; r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
156 ; r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
157 ; r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
158 ; r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
159 ;
160 ; Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
161 ; r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
162 ; r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
163 ; r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
164 ; r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
165 ; r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
166 ; r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
167 ; r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
168 ; r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
169 ;
170 %macro TRANSPOSE8 10
171 %define %%r0 %1
172 %define %%r1 %2
173 %define %%r2 %3
174 %define %%r3 %4
175 %define %%r4 %5
176 %define %%r5 %6
177 %define %%r6 %7
178 %define %%r7 %8
179 %define %%t0 %9
180 %define %%t1 %10
181 ; process top half (r0..r3) {a...d}
182 vshufps %%t0, %%r0, %%r1, 0x44 ; t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
183 vshufps %%r0, %%r0, %%r1, 0xEE ; r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
184 vshufps %%t1, %%r2, %%r3, 0x44 ; t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
185 vshufps %%r2, %%r2, %%r3, 0xEE ; r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
186 vshufps %%r3, %%t0, %%t1, 0xDD ; r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
187 vshufps %%r1, %%r0, %%r2, 0x88 ; r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
188 vshufps %%r0, %%r0, %%r2, 0xDD ; r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
189 vshufps %%t0, %%t0, %%t1, 0x88 ; t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
190
191 ; use r2 in place of t0
192 ; process bottom half (r4..r7) {e...h}
193 vshufps %%r2, %%r4, %%r5, 0x44 ; r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
194 vshufps %%r4, %%r4, %%r5, 0xEE ; r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
195 vshufps %%t1, %%r6, %%r7, 0x44 ; t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
196 vshufps %%r6, %%r6, %%r7, 0xEE ; r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
197 vshufps %%r7, %%r2, %%t1, 0xDD ; r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
198 vshufps %%r5, %%r4, %%r6, 0x88 ; r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
199 vshufps %%r4, %%r4, %%r6, 0xDD ; r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
200 vshufps %%t1, %%r2, %%t1, 0x88 ; t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
201
202 vperm2f128 %%r6, %%r5, %%r1, 0x13 ; h6...a6
203 vperm2f128 %%r2, %%r5, %%r1, 0x02 ; h2...a2
204 vperm2f128 %%r5, %%r7, %%r3, 0x13 ; h5...a5
205 vperm2f128 %%r1, %%r7, %%r3, 0x02 ; h1...a1
206 vperm2f128 %%r7, %%r4, %%r0, 0x13 ; h7...a7
207 vperm2f128 %%r3, %%r4, %%r0, 0x02 ; h3...a3
208 vperm2f128 %%r4, %%t1, %%t0, 0x13 ; h4...a4
209 vperm2f128 %%r0, %%t1, %%t0, 0x02 ; h0...a0
210 %endmacro
211
212 ;;
213 ;; Magic functions defined in FIPS 180-1
214 ;;
215 ;MAGIC_F0 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((D ^ (B & (C ^ D)))
216 %macro MAGIC_F0 5
217 %define %%regF %1
218 %define %%regB %2
219 %define %%regC %3
220 %define %%regD %4
221 %define %%regT %5
222 ;vmovdqa %%regF,%%regC
223 vpxor %%regF, %%regC,%%regD
224 vpand %%regF, %%regF,%%regB
225 vpxor %%regF, %%regF,%%regD
226 %endmacro
227
228 ;MAGIC_F1 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; (B ^ C ^ D)
229 %macro MAGIC_F1 5
230 %define %%regF %1
231 %define %%regB %2
232 %define %%regC %3
233 %define %%regD %4
234 %define %%regT %5
235 ;vmovdqa %%regF,%%regD
236 vpxor %%regF,%%regD,%%regC
237 vpxor %%regF,%%regF,%%regB
238 %endmacro
239
240
241
242 ;MAGIC_F2 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((B & C) | (B & D) | (C & D))
243 %macro MAGIC_F2 5
244 %define %%regF %1
245 %define %%regB %2
246 %define %%regC %3
247 %define %%regD %4
248 %define %%regT %5
249 ;vmovdqa %%regF,%%regB
250 ;vmovdqa %%regT,%%regB
251 vpor %%regF,%%regB,%%regC
252 vpand %%regT,%%regB,%%regC
253 vpand %%regF,%%regF,%%regD
254 vpor %%regF,%%regF,%%regT
255 %endmacro
256
257 ;MAGIC_F3 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ
258 %macro MAGIC_F3 5
259 %define %%regF %1
260 %define %%regB %2
261 %define %%regC %3
262 %define %%regD %4
263 %define %%regT %5
264 MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
265 %endmacro
266
267 ; PROLD reg, imm, tmp
268 %macro PROLD 3
269 %define %%reg %1
270 %define %%imm %2
271 %define %%tmp %3
272 ;vmovdqa %%tmp, %%reg
273 vpsrld %%tmp, %%reg, (32-%%imm)
274 vpslld %%reg, %%reg, %%imm
275 vpor %%reg, %%reg, %%tmp
276 %endmacro
277
278 ; PROLD reg, imm, tmp
279 %macro PROLD_nd 4
280 %define %%reg %1
281 %define %%imm %2
282 %define %%tmp %3
283 %define %%src %4
284 ;vmovdqa %%tmp, %%reg
285 vpsrld %%tmp, %%src, (32-%%imm)
286 vpslld %%reg, %%src, %%imm
287 vpor %%reg, %%reg, %%tmp
288 %endmacro
289
290 %macro SHA1_STEP_00_15 10
291 %define %%regA %1
292 %define %%regB %2
293 %define %%regC %3
294 %define %%regD %4
295 %define %%regE %5
296 %define %%regT %6
297 %define %%regF %7
298 %define %%memW %8
299 %define %%immCNT %9
300 %define %%MAGIC %10
301 vpaddd %%regE, %%regE,%%immCNT
302 vpaddd %%regE, %%regE,[rsp + (%%memW * 32)]
303 ;vmovdqa %%regT,%%regA
304 PROLD_nd %%regT,5, %%regF,%%regA
305 vpaddd %%regE, %%regE,%%regT
306 %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
307 PROLD %%regB,30, %%regT
308 vpaddd %%regE, %%regE,%%regF
309 %endmacro
310
311 %macro SHA1_STEP_16_79 10
312 %define %%regA %1
313 %define %%regB %2
314 %define %%regC %3
315 %define %%regD %4
316 %define %%regE %5
317 %define %%regT %6
318 %define %%regF %7
319 %define %%memW %8
320 %define %%immCNT %9
321 %define %%MAGIC %10
322 vpaddd %%regE, %%regE,%%immCNT
323
324 vmovdqa W14, [rsp + ((%%memW - 14) & 15) * 32]
325 vpxor W16, W16, W14
326 vpxor W16, W16, [rsp + ((%%memW - 8) & 15) * 32]
327 vpxor W16, W16, [rsp + ((%%memW - 3) & 15) * 32]
328
329 ;vmovdqa %%regF, W16
330 vpsrld %%regF, W16, (32-1)
331 vpslld W16, W16, 1
332 vpor %%regF, %%regF, W16
333 ROTATE_W
334
335 vmovdqa [rsp + ((%%memW - 0) & 15) * 32],%%regF
336 vpaddd %%regE, %%regE,%%regF
337
338 ;vmovdqa %%regT,%%regA
339 PROLD_nd %%regT,5, %%regF, %%regA
340 vpaddd %%regE, %%regE,%%regT
341 %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
342 PROLD %%regB,30, %%regT
343 vpaddd %%regE,%%regE,%%regF
344 %endmacro
345
346
347 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
348 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
349 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
350
351 %macro ROTATE_ARGS 0
352 %xdefine TMP_ E
353 %xdefine E D
354 %xdefine D C
355 %xdefine C B
356 %xdefine B A
357 %xdefine A TMP_
358 %endm
359
360 %macro ROTATE_W 0
361 %xdefine TMP_ W16
362 %xdefine W16 W15
363 %xdefine W15 W14
364 %xdefine W14 TMP_
365 %endm
366
367 align 32
368
369 ; void sha1_x8_avx2(void *state, int num_blks)
370 ; arg 1 : rcx : pointer to array[4] of pointer to input data
371 ; arg 2 : rdx : size (in blocks) ;; assumed to be >= 1
372 MKGLOBAL(sha1_x8_avx2,function,internal)
373 sha1_x8_avx2:
374 sub rsp, FRAMESZ
375
376 ;; Initialize digests
377 vmovdqu A, [state + 0*SHA1_DIGEST_ROW_SIZE]
378 vmovdqu B, [state + 1*SHA1_DIGEST_ROW_SIZE]
379 vmovdqu C, [state + 2*SHA1_DIGEST_ROW_SIZE]
380 vmovdqu D, [state + 3*SHA1_DIGEST_ROW_SIZE]
381 vmovdqu E, [state + 4*SHA1_DIGEST_ROW_SIZE]
382 DBGPRINTL_YMM "Sha1-AVX2 incoming transposed digest", A, B, C, D, E
383
384 ;; transpose input onto stack
385 mov inp0,[state+_data_ptr_sha1+0*PTR_SZ]
386 mov inp1,[state+_data_ptr_sha1+1*PTR_SZ]
387 mov inp2,[state+_data_ptr_sha1+2*PTR_SZ]
388 mov inp3,[state+_data_ptr_sha1+3*PTR_SZ]
389 mov inp4,[state+_data_ptr_sha1+4*PTR_SZ]
390 mov inp5,[state+_data_ptr_sha1+5*PTR_SZ]
391 mov inp6,[state+_data_ptr_sha1+6*PTR_SZ]
392 mov inp7,[state+_data_ptr_sha1+7*PTR_SZ]
393
394 xor IDX, IDX
395 lloop:
396 vmovdqa F, [rel PSHUFFLE_BYTE_FLIP_MASK]
397 %assign I 0
398 %rep 2
399 VMOVPS T0,[inp0+IDX]
400 VMOVPS T1,[inp1+IDX]
401 VMOVPS T2,[inp2+IDX]
402 VMOVPS T3,[inp3+IDX]
403 VMOVPS T4,[inp4+IDX]
404 VMOVPS T5,[inp5+IDX]
405 VMOVPS T6,[inp6+IDX]
406 VMOVPS T7,[inp7+IDX]
407 TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
408 DBGPRINTL_YMM "Sha1-AVX2 incoming transposed input", T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
409 vpshufb T0, T0, F
410 vmovdqa [rsp+(I*8+0)*32],T0
411 vpshufb T1, T1, F
412 vmovdqa [rsp+(I*8+1)*32],T1
413 vpshufb T2, T2, F
414 vmovdqa [rsp+(I*8+2)*32],T2
415 vpshufb T3, T3, F
416 vmovdqa [rsp+(I*8+3)*32],T3
417 vpshufb T4, T4, F
418 vmovdqa [rsp+(I*8+4)*32],T4
419 vpshufb T5, T5, F
420 vmovdqa [rsp+(I*8+5)*32],T5
421 vpshufb T6, T6, F
422 vmovdqa [rsp+(I*8+6)*32],T6
423 vpshufb T7, T7, F
424 vmovdqa [rsp+(I*8+7)*32],T7
425 add IDX, 32
426 %assign I (I+1)
427 %endrep
428
429
430 ; save old digests
431 vmovdqa AA, A
432 vmovdqa BB, B
433 vmovdqa CC, C
434 vmovdqa DD, D
435 vmovdqa EE, E
436
437 ;;
438 ;; perform 0-79 steps
439 ;;
440 vmovdqa K, [rel K00_19]
441 ;; do rounds 0...15
442 %assign I 0
443 %rep 16
444 SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
445 ROTATE_ARGS
446 %assign I (I+1)
447 %endrep
448
449 ;; do rounds 16...19
450 vmovdqa W16, [rsp + ((16 - 16) & 15) * 32]
451 vmovdqa W15, [rsp + ((16 - 15) & 15) * 32]
452 %rep 4
453 SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
454 ROTATE_ARGS
455 %assign I (I+1)
456 %endrep
457
458 ;; do rounds 20...39
459 vmovdqa K, [rel K20_39]
460 %rep 20
461 SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1
462 ROTATE_ARGS
463 %assign I (I+1)
464 %endrep
465
466 ;; do rounds 40...59
467 vmovdqa K, [rel K40_59]
468 %rep 20
469 SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2
470 ROTATE_ARGS
471 %assign I (I+1)
472 %endrep
473
474 ;; do rounds 60...79
475 vmovdqa K, [rel K60_79]
476 %rep 20
477 SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3
478 ROTATE_ARGS
479 %assign I (I+1)
480 %endrep
481
482 vpaddd A,A,AA
483 vpaddd B,B,BB
484 vpaddd C,C,CC
485 vpaddd D,D,DD
486 vpaddd E,E,EE
487
488 sub num_blks, 1
489 jne lloop
490
491 ; write out digests
492 vmovdqu [state + 0*SHA1_DIGEST_ROW_SIZE], A
493 vmovdqu [state + 1*SHA1_DIGEST_ROW_SIZE], B
494 vmovdqu [state + 2*SHA1_DIGEST_ROW_SIZE], C
495 vmovdqu [state + 3*SHA1_DIGEST_ROW_SIZE], D
496 vmovdqu [state + 4*SHA1_DIGEST_ROW_SIZE], E
497 DBGPRINTL_YMM "Sha1-AVX2 outgoing transposed digest", A, B, C, D, E
498 ;; update input pointers
499 add inp0, IDX
500 add inp1, IDX
501 add inp2, IDX
502 add inp3, IDX
503 add inp4, IDX
504 add inp5, IDX
505 add inp6, IDX
506 add inp7, IDX
507 mov [state+_data_ptr_sha1+0*PTR_SZ], inp0
508 mov [state+_data_ptr_sha1+1*PTR_SZ], inp1
509 mov [state+_data_ptr_sha1+2*PTR_SZ], inp2
510 mov [state+_data_ptr_sha1+3*PTR_SZ], inp3
511 mov [state+_data_ptr_sha1+4*PTR_SZ], inp4
512 mov [state+_data_ptr_sha1+5*PTR_SZ], inp5
513 mov [state+_data_ptr_sha1+6*PTR_SZ], inp6
514 mov [state+_data_ptr_sha1+7*PTR_SZ], inp7
515
516 ;;;;;;;;;;;;;;;;
517 ;; Postamble
518
519 add rsp, FRAMESZ
520
521 ret
522
523 %ifdef LINUX
524 section .note.GNU-stack noalloc noexec nowrite progbits
525 %endif