]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
5 | ; modification, are permitted provided that the following conditions | |
6 | ; are met: | |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
30 | ;; code to compute 16 SHA1 using AVX-512 | |
31 | ;; | |
32 | ||
33 | %include "reg_sizes.asm" | |
34 | ||
35 | %ifdef HAVE_AS_KNOWS_AVX512 | |
36 | default rel | |
37 | ||
38 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
39 | %define VMOVPS vmovdqu64 | |
40 | ;SIMD variables definition | |
41 | %define A zmm0 | |
42 | %define B zmm1 | |
43 | %define C zmm2 | |
44 | %define D zmm3 | |
45 | %define E zmm4 | |
46 | %define HH0 zmm5 | |
47 | %define HH1 zmm6 | |
48 | %define HH2 zmm7 | |
49 | %define HH3 zmm8 | |
50 | %define HH4 zmm9 | |
51 | %define KT zmm10 | |
52 | %define XTMP0 zmm11 | |
53 | %define XTMP1 zmm12 | |
54 | %define SHUF_MASK zmm13 | |
55 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
56 | ;using extra 16 ZMM registers to place the inverse input data | |
57 | %define W0 zmm16 | |
58 | %define W1 zmm17 | |
59 | %define W2 zmm18 | |
60 | %define W3 zmm19 | |
61 | %define W4 zmm20 | |
62 | %define W5 zmm21 | |
63 | %define W6 zmm22 | |
64 | %define W7 zmm23 | |
65 | %define W8 zmm24 | |
66 | %define W9 zmm25 | |
67 | %define W10 zmm26 | |
68 | %define W11 zmm27 | |
69 | %define W12 zmm28 | |
70 | %define W13 zmm29 | |
71 | %define W14 zmm30 | |
72 | %define W15 zmm31 | |
73 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
74 | ;macros definition | |
75 | %macro ROTATE_ARGS 0 | |
76 | %xdefine TMP_ E | |
77 | %xdefine E D | |
78 | %xdefine D C | |
79 | %xdefine C B | |
80 | %xdefine B A | |
81 | %xdefine A TMP_ | |
82 | %endm | |
83 | ||
84 | %macro PROCESS_LOOP 2 | |
85 | %define %%WT %1 | |
86 | %define %%F_IMMED %2 | |
87 | ||
88 | ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt | |
89 | ; E=D, D=C, C=ROTL_30(B), B=A, A=T | |
90 | ||
91 | ; Ft | |
92 | ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D) | |
93 | ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D | |
94 | ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D) | |
95 | ||
96 | vmovdqa32 XTMP1, B ; Copy B | |
97 | vpaddd E, E, %%WT ; E = E + Wt | |
98 | vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D) | |
99 | vpaddd E, E, KT ; E = E + Wt + Kt | |
100 | vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A) | |
101 | vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt | |
102 | vprold B, B, 30 ; B = ROTL_30(B) | |
103 | vpaddd E, E, XTMP0 ; E = T | |
104 | ||
105 | ROTATE_ARGS | |
106 | %endmacro | |
107 | ||
108 | ;; Insert murmur's instructions into this macro. | |
109 | ;; Every section_loop of mh_sha1 calls PROCESS_LOOP 80 and | |
110 | ;; MSG_SCHED_ROUND_16_79 64 times and processes 1024 Bytes. | |
111 | ;; So insert 1 murmur block per section_loop. | |
112 | %macro PROCESS_LOOP_MUR 2 | |
113 | %define %%WT %1 | |
114 | %define %%F_IMMED %2 | |
115 | ||
116 | ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt | |
117 | ; E=D, D=C, C=ROTL_30(B), B=A, A=T | |
118 | ||
119 | ; Ft | |
120 | ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D) | |
121 | ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D | |
122 | ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D) | |
123 | ||
124 | mov mur_data1, [mur_in_p] | |
125 | mov mur_data2, [mur_in_p + 8] | |
126 | vmovdqa32 XTMP1, B ; Copy B | |
127 | imul mur_data1, mur_c1_r | |
128 | imul mur_data2, mur_c2_r | |
129 | vpaddd E, E, %%WT ; E = E + Wt | |
130 | rol mur_data1, R1 | |
131 | rol mur_data2, R2 | |
132 | vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D) | |
133 | imul mur_data1, mur_c2_r | |
134 | imul mur_data2, mur_c1_r | |
135 | vpaddd E, E, KT ; E = E + Wt + Kt | |
136 | xor mur_hash1, mur_data1 | |
137 | add mur_in_p, 16 | |
138 | vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A) | |
139 | rol mur_hash1, R3 | |
140 | vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt | |
141 | add mur_hash1, mur_hash2 | |
142 | vprold B, B, 30 ; B = ROTL_30(B) | |
143 | lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1] | |
144 | vpaddd E, E, XTMP0 ; E = T | |
145 | xor mur_hash2, mur_data2 | |
146 | ||
147 | ROTATE_ARGS | |
148 | %endmacro | |
149 | ||
150 | %macro MSG_SCHED_ROUND_16_79_MUR 4 | |
151 | %define %%WT %1 | |
152 | %define %%WTp2 %2 | |
153 | %define %%WTp8 %3 | |
154 | %define %%WTp13 %4 | |
155 | ; Wt = ROTL_1(Wt-3 ^ Wt-8 ^ Wt-14 ^ Wt-16) | |
156 | ; Wt+16 = ROTL_1(Wt+13 ^ Wt+8 ^ Wt+2 ^ Wt) | |
157 | vpternlogd %%WT, %%WTp2, %%WTp8, 0x96 | |
158 | rol mur_hash2, R4 | |
159 | vpxord %%WT, %%WT, %%WTp13 | |
160 | add mur_hash2, mur_hash1 | |
161 | lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2] | |
162 | vprold %%WT, %%WT, 1 | |
163 | %endmacro | |
164 | ||
165 | %define APPEND(a,b) a %+ b | |
166 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
167 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
168 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
169 | %ifidn __OUTPUT_FORMAT__, elf64 | |
170 | ; Linux | |
171 | %define arg0 rdi | |
172 | %define arg1 rsi | |
173 | %define arg2 rdx | |
174 | %define arg3 rcx | |
175 | ||
176 | %define arg4 r8d | |
177 | %define arg5 r9 | |
178 | ||
179 | %define tmp1 r10 | |
180 | %define tmp2 r11 | |
181 | %define tmp3 r12 ; must be saved and restored | |
182 | %define tmp4 r13 ; must be saved and restored | |
183 | %define tmp5 r14 ; must be saved and restored | |
184 | %define tmp6 r15 ; must be saved and restored | |
185 | %define tmp7 rbx ; must be saved and restored | |
186 | %define tmp8 rbp ; must be saved and restored | |
187 | %define return rax | |
188 | ||
189 | %define func(x) x: | |
190 | %macro FUNC_SAVE 0 | |
191 | push r12 | |
192 | push r13 | |
193 | push r14 | |
194 | push r15 | |
195 | push rbx | |
196 | push rbp | |
197 | %endmacro | |
198 | %macro FUNC_RESTORE 0 | |
199 | pop rbp | |
200 | pop rbx | |
201 | pop r15 | |
202 | pop r14 | |
203 | pop r13 | |
204 | pop r12 | |
205 | %endmacro | |
206 | %else | |
207 | ; Windows | |
208 | %define arg0 rcx | |
209 | %define arg1 rdx | |
210 | %define arg2 r8 | |
211 | %define arg3 r9 | |
212 | ||
213 | %define arg4 r10d | |
214 | %define arg5 r11 | |
215 | %define tmp1 r12 ; must be saved and restored | |
216 | %define tmp2 r13 ; must be saved and restored | |
217 | %define tmp3 r14 ; must be saved and restored | |
218 | %define tmp4 r15 ; must be saved and restored | |
219 | %define tmp5 rdi ; must be saved and restored | |
220 | %define tmp6 rsi ; must be saved and restored | |
221 | %define tmp7 rbx ; must be saved and restored | |
222 | %define tmp8 rbp ; must be saved and restored | |
223 | %define return rax | |
224 | ||
225 | %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8 | |
226 | %define PS 8 | |
227 | %define arg(x) [rsp + stack_size + PS + PS*x] | |
228 | ; remove unwind info macros | |
229 | %define func(x) x: | |
230 | %macro FUNC_SAVE 0 | |
231 | sub rsp, stack_size | |
232 | movdqa [rsp + 0*16], xmm6 | |
233 | movdqa [rsp + 1*16], xmm7 | |
234 | movdqa [rsp + 2*16], xmm8 | |
235 | movdqa [rsp + 3*16], xmm9 | |
236 | movdqa [rsp + 4*16], xmm10 | |
237 | movdqa [rsp + 5*16], xmm11 | |
238 | movdqa [rsp + 6*16], xmm12 | |
239 | movdqa [rsp + 7*16], xmm13 | |
240 | movdqa [rsp + 8*16], xmm14 | |
241 | movdqa [rsp + 9*16], xmm15 | |
242 | mov [rsp + 10*16 + 0*8], r12 | |
243 | mov [rsp + 10*16 + 1*8], r13 | |
244 | mov [rsp + 10*16 + 2*8], r14 | |
245 | mov [rsp + 10*16 + 3*8], r15 | |
246 | mov [rsp + 10*16 + 4*8], rdi | |
247 | mov [rsp + 10*16 + 5*8], rsi | |
248 | mov [rsp + 10*16 + 6*8], rbx | |
249 | mov [rsp + 10*16 + 7*8], rbp | |
250 | mov arg4, arg(4) | |
251 | %endmacro | |
252 | ||
253 | %macro FUNC_RESTORE 0 | |
254 | movdqa xmm6, [rsp + 0*16] | |
255 | movdqa xmm7, [rsp + 1*16] | |
256 | movdqa xmm8, [rsp + 2*16] | |
257 | movdqa xmm9, [rsp + 3*16] | |
258 | movdqa xmm10, [rsp + 4*16] | |
259 | movdqa xmm11, [rsp + 5*16] | |
260 | movdqa xmm12, [rsp + 6*16] | |
261 | movdqa xmm13, [rsp + 7*16] | |
262 | movdqa xmm14, [rsp + 8*16] | |
263 | movdqa xmm15, [rsp + 9*16] | |
264 | mov r12, [rsp + 10*16 + 0*8] | |
265 | mov r13, [rsp + 10*16 + 1*8] | |
266 | mov r14, [rsp + 10*16 + 2*8] | |
267 | mov r15, [rsp + 10*16 + 3*8] | |
268 | mov rdi, [rsp + 10*16 + 4*8] | |
269 | mov rsi, [rsp + 10*16 + 5*8] | |
270 | mov rbx, [rsp + 10*16 + 6*8] | |
271 | mov rbp, [rsp + 10*16 + 7*8] | |
272 | add rsp, stack_size | |
273 | %endmacro | |
274 | %endif | |
275 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
276 | %define loops arg4 | |
277 | ;variables of mh_sha1 | |
278 | %define mh_in_p arg0 | |
279 | %define mh_digests_p arg1 | |
280 | %define mh_data_p arg2 | |
281 | %define mh_segs tmp1 | |
282 | ;variables of murmur3 | |
283 | %define mur_in_p tmp2 | |
284 | %define mur_digest_p arg3 | |
285 | %define mur_hash1 tmp3 | |
286 | %define mur_hash2 tmp4 | |
287 | %define mur_data1 tmp5 | |
288 | %define mur_data2 return | |
289 | %define mur_c1_r tmp6 | |
290 | %define mur_c2_r arg5 | |
291 | ; constants of murmur3_x64_128 | |
292 | %define R1 31 | |
293 | %define R2 33 | |
294 | %define R3 27 | |
295 | %define R4 31 | |
296 | %define M 5 | |
297 | %define N1 0x52dce729;DWORD | |
298 | %define N2 0x38495ab5;DWORD | |
299 | %define C1 QWORD(0x87c37b91114253d5) | |
300 | %define C2 QWORD(0x4cf5ad432745937f) | |
301 | ;variables used by storing segs_digests on stack | |
302 | %define RSP_SAVE tmp7 | |
303 | ||
304 | %define pref tmp8 | |
305 | %macro PREFETCH_X 1 | |
306 | %define %%mem %1 | |
307 | prefetchnta %%mem | |
308 | %endmacro | |
309 | ||
310 | ;init hash digests | |
311 | ; segs_digests:low addr-> high_addr | |
312 | ; a | b | c | ...| p | (16) | |
313 | ; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | | |
314 | ; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | | |
315 | ; .... | |
316 | ; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | | |
317 | ||
318 | [bits 64] | |
319 | section .text | |
320 | align 32 | |
321 | ||
322 | ;void mh_sha1_murmur3_x64_128_block_avx512 (const uint8_t * input_data, | |
323 | ; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], | |
324 | ; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], | |
325 | ; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], | |
326 | ; uint32_t num_blocks); | |
327 | ; arg 0 pointer to input data | |
328 | ; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) | |
329 | ; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. | |
330 | ; arg 3 pointer to murmur3 digest | |
331 | ; arg 4 number of 1KB blocks | |
332 | ; | |
333 | global mh_sha1_murmur3_x64_128_block_avx512 | |
334 | func(mh_sha1_murmur3_x64_128_block_avx512) | |
335 | FUNC_SAVE | |
336 | ||
337 | ; save rsp | |
338 | mov RSP_SAVE, rsp | |
339 | ||
340 | cmp loops, 0 | |
341 | jle .return | |
342 | ||
343 | ; align rsp to 64 Bytes needed by avx512 | |
344 | and rsp, ~0x3f | |
345 | ||
346 | ; copy segs_digests into registers. | |
347 | VMOVPS HH0, [mh_digests_p + 64*0] | |
348 | VMOVPS HH1, [mh_digests_p + 64*1] | |
349 | VMOVPS HH2, [mh_digests_p + 64*2] | |
350 | VMOVPS HH3, [mh_digests_p + 64*3] | |
351 | VMOVPS HH4, [mh_digests_p + 64*4] | |
352 | ;a mask used to transform to big-endian data | |
353 | vmovdqa64 SHUF_MASK, [PSHUFFLE_BYTE_FLIP_MASK] | |
354 | ||
355 | ;init murmur variables | |
356 | mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1 | |
357 | ;load murmur hash digests and multiplier | |
358 | mov mur_hash1, [mur_digest_p] | |
359 | mov mur_hash2, [mur_digest_p + 8] | |
360 | mov mur_c1_r, C1 | |
361 | mov mur_c2_r, C2 | |
362 | ||
363 | .block_loop: | |
364 | ;transform to big-endian data and store on aligned_frame | |
365 | ;using extra 16 ZMM registers instead of stack | |
366 | %assign I 0 | |
367 | %rep 8 | |
368 | %assign J (I+1) | |
369 | VMOVPS APPEND(W,I),[mh_in_p + I*64+0*64] | |
370 | VMOVPS APPEND(W,J),[mh_in_p + I*64+1*64] | |
371 | ||
372 | vpshufb APPEND(W,I), APPEND(W,I), SHUF_MASK | |
373 | vpshufb APPEND(W,J), APPEND(W,J), SHUF_MASK | |
374 | %assign I (I+2) | |
375 | %endrep | |
376 | ||
377 | vmovdqa64 A, HH0 | |
378 | vmovdqa64 B, HH1 | |
379 | vmovdqa64 C, HH2 | |
380 | vmovdqa64 D, HH3 | |
381 | vmovdqa64 E, HH4 | |
382 | ||
383 | vmovdqa32 KT, [K00_19] | |
384 | %assign I 0xCA | |
385 | %assign J 0 | |
386 | %assign K 2 | |
387 | %assign L 8 | |
388 | %assign M 13 | |
389 | %assign N 0 | |
390 | %rep 80 | |
391 | %if N < 64 ; stitching 64 times | |
392 | PROCESS_LOOP_MUR APPEND(W,J), I | |
393 | MSG_SCHED_ROUND_16_79_MUR APPEND(W,J), APPEND(W,K), APPEND(W,L), APPEND(W,M) | |
394 | %else ; 64 <= N < 80, without stitching | |
395 | PROCESS_LOOP APPEND(W,J), I | |
396 | %endif | |
397 | %if N = 19 | |
398 | vmovdqa32 KT, [K20_39] | |
399 | %assign I 0x96 | |
400 | %elif N = 39 | |
401 | vmovdqa32 KT, [K40_59] | |
402 | %assign I 0xE8 | |
403 | %elif N = 59 | |
404 | vmovdqa32 KT, [K60_79] | |
405 | %assign I 0x96 | |
406 | %endif | |
407 | %if N % 20 = 19 | |
408 | PREFETCH_X [mh_in_p + 1024+128*(N / 20)] | |
409 | PREFETCH_X [mh_in_p + 1024+128*(N / 20 +1)] | |
410 | %endif | |
411 | %assign J ((J+1)% 16) | |
412 | %assign K ((K+1)% 16) | |
413 | %assign L ((L+1)% 16) | |
414 | %assign M ((M+1)% 16) | |
415 | %assign N (N+1) | |
416 | %endrep | |
417 | ||
418 | ; Add old digest | |
419 | vpaddd HH0,A, HH0 | |
420 | vpaddd HH1,B, HH1 | |
421 | vpaddd HH2,C, HH2 | |
422 | vpaddd HH3,D, HH3 | |
423 | vpaddd HH4,E, HH4 | |
424 | ||
425 | add mh_in_p, 1024 | |
426 | sub loops, 1 | |
427 | jne .block_loop | |
428 | ||
429 | ;store murmur-hash digest | |
430 | mov [mur_digest_p], mur_hash1 | |
431 | mov [mur_digest_p + 8], mur_hash2 | |
432 | ||
433 | ; copy segs_digests to mh_digests_p | |
434 | VMOVPS [mh_digests_p + 64*0], HH0 | |
435 | VMOVPS [mh_digests_p + 64*1], HH1 | |
436 | VMOVPS [mh_digests_p + 64*2], HH2 | |
437 | VMOVPS [mh_digests_p + 64*3], HH3 | |
438 | VMOVPS [mh_digests_p + 64*4], HH4 | |
439 | ||
440 | mov rsp, RSP_SAVE ; restore rsp | |
441 | ||
442 | .return: | |
443 | FUNC_RESTORE | |
444 | ret | |
445 | ||
446 | ||
447 | section .data align=64 | |
448 | ||
449 | align 64 | |
450 | PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203 | |
451 | dq 0x0c0d0e0f08090a0b | |
452 | dq 0x0405060700010203 | |
453 | dq 0x0c0d0e0f08090a0b | |
454 | dq 0x0405060700010203 | |
455 | dq 0x0c0d0e0f08090a0b | |
456 | dq 0x0405060700010203 | |
457 | dq 0x0c0d0e0f08090a0b | |
458 | ||
459 | K00_19: dq 0x5A8279995A827999 | |
460 | dq 0x5A8279995A827999 | |
461 | dq 0x5A8279995A827999 | |
462 | dq 0x5A8279995A827999 | |
463 | dq 0x5A8279995A827999 | |
464 | dq 0x5A8279995A827999 | |
465 | dq 0x5A8279995A827999 | |
466 | dq 0x5A8279995A827999 | |
467 | ||
468 | K20_39: dq 0x6ED9EBA16ED9EBA1 | |
469 | dq 0x6ED9EBA16ED9EBA1 | |
470 | dq 0x6ED9EBA16ED9EBA1 | |
471 | dq 0x6ED9EBA16ED9EBA1 | |
472 | dq 0x6ED9EBA16ED9EBA1 | |
473 | dq 0x6ED9EBA16ED9EBA1 | |
474 | dq 0x6ED9EBA16ED9EBA1 | |
475 | dq 0x6ED9EBA16ED9EBA1 | |
476 | ||
477 | K40_59: dq 0x8F1BBCDC8F1BBCDC | |
478 | dq 0x8F1BBCDC8F1BBCDC | |
479 | dq 0x8F1BBCDC8F1BBCDC | |
480 | dq 0x8F1BBCDC8F1BBCDC | |
481 | dq 0x8F1BBCDC8F1BBCDC | |
482 | dq 0x8F1BBCDC8F1BBCDC | |
483 | dq 0x8F1BBCDC8F1BBCDC | |
484 | dq 0x8F1BBCDC8F1BBCDC | |
485 | ||
486 | K60_79: dq 0xCA62C1D6CA62C1D6 | |
487 | dq 0xCA62C1D6CA62C1D6 | |
488 | dq 0xCA62C1D6CA62C1D6 | |
489 | dq 0xCA62C1D6CA62C1D6 | |
490 | dq 0xCA62C1D6CA62C1D6 | |
491 | dq 0xCA62C1D6CA62C1D6 | |
492 | dq 0xCA62C1D6CA62C1D6 | |
493 | dq 0xCA62C1D6CA62C1D6 | |
494 | ||
495 | %else | |
496 | %ifidn __OUTPUT_FORMAT__, win64 | |
497 | global no_sha1_murmur3_x64_128_block_avx512 | |
498 | no_sha1_murmur3_x64_128_block_avx512: | |
499 | %endif | |
500 | %endif ; HAVE_AS_KNOWS_AVX512 |