]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
1e59de90 | 5 | ; modification, are permitted provided that the following conditions |
7c673cae FG |
6 | ; are met: |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
30 | %include "sha256_mb_mgr_datastruct.asm" | |
31 | %include "reg_sizes.asm" | |
32 | ||
1e59de90 | 33 | [bits 64] |
7c673cae | 34 | default rel |
1e59de90 | 35 | section .text |
7c673cae FG |
36 | |
37 | ;; code to compute quad SHA256 using AVX | |
38 | ;; Logic designed/laid out by JDG | |
39 | ||
40 | ; transpose r0, r1, r2, r3, t0, t1 | |
41 | ; "transpose" data in {r0..r3} using temps {t0..t3} | |
42 | ; Input looks like: {r0 r1 r2 r3} | |
43 | ; r0 = {a3 a2 a1 a0} | |
44 | ; r1 = {b3 b2 b1 b0} | |
45 | ; r2 = {c3 c2 c1 c0} | |
46 | ; r3 = {d3 d2 d1 d0} | |
47 | ; | |
48 | ; output looks like: {t0 r1 r0 r3} | |
49 | ; t0 = {d0 c0 b0 a0} | |
50 | ; r1 = {d1 c1 b1 a1} | |
51 | ; r0 = {d2 c2 b2 a2} | |
52 | ; r3 = {d3 c3 b3 a3} | |
1e59de90 | 53 | ; |
7c673cae FG |
54 | %macro TRANSPOSE 6 |
55 | %define %%r0 %1 | |
56 | %define %%r1 %2 | |
57 | %define %%r2 %3 | |
58 | %define %%r3 %4 | |
59 | %define %%t0 %5 | |
60 | %define %%t1 %6 | |
61 | vshufps %%t0, %%r0, %%r1, 0x44 ; t0 = {b1 b0 a1 a0} | |
62 | vshufps %%r0, %%r0, %%r1, 0xEE ; r0 = {b3 b2 a3 a2} | |
63 | ||
64 | vshufps %%t1, %%r2, %%r3, 0x44 ; t1 = {d1 d0 c1 c0} | |
65 | vshufps %%r2, %%r2, %%r3, 0xEE ; r2 = {d3 d2 c3 c2} | |
66 | ||
67 | vshufps %%r1, %%t0, %%t1, 0xDD ; r1 = {d1 c1 b1 a1} | |
68 | ||
69 | vshufps %%r3, %%r0, %%r2, 0xDD ; r3 = {d3 c3 b3 a3} | |
70 | ||
71 | vshufps %%r0, %%r0, %%r2, 0x88 ; r0 = {d2 c2 b2 a2} | |
72 | vshufps %%t0, %%t0, %%t1, 0x88 ; t0 = {d0 c0 b0 a0} | |
1e59de90 | 73 | %endmacro |
7c673cae FG |
74 | |
75 | ||
76 | %define TABLE K256_4_MB | |
77 | %define SZ 4 | |
78 | %define SZ4 4*SZ | |
79 | %define ROUNDS 64*SZ4 | |
80 | ||
81 | %define a xmm0 | |
82 | %define b xmm1 | |
83 | %define c xmm2 | |
84 | %define d xmm3 | |
85 | %define e xmm4 | |
86 | %define f xmm5 | |
87 | %define g xmm6 | |
88 | %define h xmm7 | |
89 | ||
90 | %define a0 xmm8 | |
91 | %define a1 xmm9 | |
92 | %define a2 xmm10 | |
93 | ||
94 | %define TT0 xmm14 | |
95 | %define TT1 xmm13 | |
96 | %define TT2 xmm12 | |
97 | %define TT3 xmm11 | |
98 | %define TT4 xmm10 | |
99 | %define TT5 xmm9 | |
100 | ||
101 | %define T1 xmm14 | |
102 | %define TMP xmm15 | |
103 | ||
104 | ||
105 | %macro ROTATE_ARGS 0 | |
106 | %xdefine TMP_ h | |
107 | %xdefine h g | |
108 | %xdefine g f | |
109 | %xdefine f e | |
110 | %xdefine e d | |
111 | %xdefine d c | |
112 | %xdefine c b | |
113 | %xdefine b a | |
114 | %xdefine a TMP_ | |
115 | %endm | |
116 | ||
117 | ; PRORD reg, imm, tmp | |
118 | %macro PRORD 3 | |
119 | %define %%reg %1 | |
120 | %define %%imm %2 | |
121 | %define %%tmp %3 | |
122 | vpslld %%tmp, %%reg, (32-(%%imm)) | |
123 | vpsrld %%reg, %%reg, %%imm | |
124 | vpor %%reg, %%reg, %%tmp | |
125 | %endmacro | |
126 | ||
127 | ; non-destructive | |
128 | ; PRORD_nd reg, imm, tmp, src | |
129 | %macro PRORD_nd 4 | |
130 | %define %%reg %1 | |
131 | %define %%imm %2 | |
132 | %define %%tmp %3 | |
133 | %define %%src %4 | |
134 | vpslld %%tmp, %%src, (32-(%%imm)) | |
135 | vpsrld %%reg, %%src, %%imm | |
136 | vpor %%reg, %%reg, %%tmp | |
137 | %endmacro | |
138 | ||
139 | ; PRORD dst/src, amt | |
140 | %macro PRORD 2 | |
141 | PRORD %1, %2, TMP | |
142 | %endmacro | |
143 | ||
144 | ; PRORD_nd dst, src, amt | |
145 | %macro PRORD_nd 3 | |
146 | PRORD_nd %1, %3, TMP, %2 | |
147 | %endmacro | |
148 | ||
149 | ;; arguments passed implicitly in preprocessor symbols i, a...h | |
150 | %macro ROUND_00_15 2 | |
151 | %define %%T1 %1 | |
152 | %define %%i %2 | |
153 | ||
154 | ||
155 | PRORD_nd a0, e, (11-6) ; sig1: a0 = (e >> 5) | |
156 | ||
157 | vpxor a2, f, g ; ch: a2 = f^g | |
158 | vpand a2, e ; ch: a2 = (f^g)&e | |
159 | vpxor a2, g ; a2 = ch | |
160 | ||
161 | PRORD_nd a1, e, 25 ; sig1: a1 = (e >> 25) | |
162 | vmovdqa [SZ4*(%%i&0xf) + rsp], %%T1 | |
163 | vpaddd %%T1, %%T1, [TBL + ROUND] ; T1 = W + K | |
164 | vpxor a0, a0, e ; sig1: a0 = e ^ (e >> 5) | |
165 | PRORD a0, 6 ; sig1: a0 = (e >> 6) ^ (e >> 11) | |
166 | vpaddd h, h, a2 ; h = h + ch | |
167 | PRORD_nd a2, a, (13-2) ; sig0: a2 = (a >> 11) | |
168 | vpaddd h, h, %%T1 ; h = h + ch + W + K | |
169 | vpxor a0, a0, a1 ; a0 = sigma1 | |
170 | PRORD_nd a1, a, 22 ; sig0: a1 = (a >> 22) | |
171 | vpxor %%T1, a, c ; maj: T1 = a^c | |
172 | add ROUND, SZ4 ; ROUND++ | |
173 | vpand %%T1, %%T1, b ; maj: T1 = (a^c)&b | |
174 | vpaddd h, h, a0 | |
175 | ||
176 | vpaddd d, d, h | |
177 | ||
178 | vpxor a2, a2, a ; sig0: a2 = a ^ (a >> 11) | |
179 | PRORD a2, 2 ; sig0: a2 = (a >> 2) ^ (a >> 13) | |
180 | vpxor a2, a2, a1 ; a2 = sig0 | |
181 | vpand a1, a, c ; maj: a1 = a&c | |
182 | vpor a1, a1, %%T1 ; a1 = maj | |
183 | vpaddd h, h, a1 ; h = h + ch + W + K + maj | |
184 | vpaddd h, h, a2 ; h = h + ch + W + K + maj + sigma0 | |
185 | ||
186 | ROTATE_ARGS | |
187 | %endm | |
188 | ||
189 | ||
190 | ;; arguments passed implicitly in preprocessor symbols i, a...h | |
191 | %macro ROUND_16_XX 2 | |
192 | %define %%T1 %1 | |
193 | %define %%i %2 | |
194 | ||
195 | vmovdqa %%T1, [SZ4*((%%i-15)&0xf) + rsp] | |
196 | vmovdqa a1, [SZ4*((%%i-2)&0xf) + rsp] | |
197 | vmovdqa a0, %%T1 | |
198 | PRORD %%T1, 18-7 | |
199 | vmovdqa a2, a1 | |
200 | PRORD a1, 19-17 | |
201 | vpxor %%T1, %%T1, a0 | |
202 | PRORD %%T1, 7 | |
203 | vpxor a1, a1, a2 | |
204 | PRORD a1, 17 | |
205 | vpsrld a0, a0, 3 | |
206 | vpxor %%T1, %%T1, a0 | |
207 | vpsrld a2, a2, 10 | |
208 | vpxor a1, a1, a2 | |
209 | vpaddd %%T1, %%T1, [SZ4*((%%i-16)&0xf) + rsp] | |
210 | vpaddd a1, a1, [SZ4*((%%i-7)&0xf) + rsp] | |
211 | vpaddd %%T1, %%T1, a1 | |
212 | ||
213 | ROUND_00_15 %%T1, %%i | |
214 | %endm | |
215 | ||
216 | %define DIGEST_SIZE 8*SZ4 | |
217 | %define DATA 16*SZ4 | |
218 | %define ALIGNMENT 1*8 | |
219 | ; ALIGNMENT makes FRAMESZ + pushes an odd multiple of 8 | |
220 | %define FRAMESZ (DATA + DIGEST_SIZE + ALIGNMENT) | |
221 | %define _DIGEST (DATA) | |
1e59de90 | 222 | |
7c673cae FG |
223 | %define VMOVPS vmovups |
224 | ||
225 | %define inp0 r8 | |
226 | %define inp1 r9 | |
227 | %define inp2 r10 | |
228 | %define inp3 r11 | |
229 | ||
1e59de90 | 230 | %ifidn __OUTPUT_FORMAT__, elf64 |
7c673cae FG |
231 | ; Linux definitions |
232 | %define arg1 rdi | |
233 | %define arg2 rsi | |
234 | %else | |
235 | ; Windows definitions | |
236 | %define arg1 rcx | |
237 | %define arg2 rdx | |
238 | %endif | |
239 | ||
240 | ; Common definitions | |
241 | %define IDX rax | |
242 | %define ROUND rbx | |
243 | %define TBL r12 | |
244 | ||
1e59de90 | 245 | ;; void sha256_mb_x4_avx(SHA256_MB_ARGS_X8 *args, uint64_t len); |
7c673cae FG |
246 | ;; arg 1 : arg1 : pointer args (only 4 of the 8 lanes used) |
247 | ;; arg 2 : arg2 : size of data in blocks (assumed >= 1) | |
248 | ;; | |
249 | ;; Clobbers registers: arg2, rax, rbx, r8-r12, xmm0-xmm15 | |
250 | ;; | |
1e59de90 | 251 | mk_global sha256_mb_x4_avx, function, internal |
7c673cae FG |
252 | align 32 |
253 | sha256_mb_x4_avx: | |
1e59de90 | 254 | endbranch |
7c673cae FG |
255 | sub rsp, FRAMESZ |
256 | ||
257 | ;; Initialize digests | |
258 | vmovdqa a,[arg1+0*SZ4] | |
259 | vmovdqa b,[arg1+1*SZ4] | |
260 | vmovdqa c,[arg1+2*SZ4] | |
261 | vmovdqa d,[arg1+3*SZ4] | |
262 | vmovdqa e,[arg1+4*SZ4] | |
263 | vmovdqa f,[arg1+5*SZ4] | |
264 | vmovdqa g,[arg1+6*SZ4] | |
265 | vmovdqa h,[arg1+7*SZ4] | |
266 | ||
267 | lea TBL,[TABLE] | |
1e59de90 | 268 | |
7c673cae FG |
269 | ;; transpose input onto stack |
270 | mov inp0,[arg1 + _data_ptr + 0*8] | |
271 | mov inp1,[arg1 + _data_ptr + 1*8] | |
272 | mov inp2,[arg1 + _data_ptr + 2*8] | |
273 | mov inp3,[arg1 + _data_ptr + 3*8] | |
274 | ||
275 | xor IDX, IDX | |
276 | lloop: | |
277 | xor ROUND, ROUND | |
278 | ||
279 | ;; save old digest | |
280 | vmovdqa [rsp + _DIGEST + 0*SZ4], a | |
281 | vmovdqa [rsp + _DIGEST + 1*SZ4], b | |
282 | vmovdqa [rsp + _DIGEST + 2*SZ4], c | |
283 | vmovdqa [rsp + _DIGEST + 3*SZ4], d | |
284 | vmovdqa [rsp + _DIGEST + 4*SZ4], e | |
285 | vmovdqa [rsp + _DIGEST + 5*SZ4], f | |
286 | vmovdqa [rsp + _DIGEST + 6*SZ4], g | |
287 | vmovdqa [rsp + _DIGEST + 7*SZ4], h | |
288 | ||
289 | %assign i 0 | |
290 | %rep 4 | |
291 | vmovdqa TMP, [PSHUFFLE_BYTE_FLIP_MASK] | |
292 | VMOVPS TT2,[inp0+IDX+i*16] | |
293 | VMOVPS TT1,[inp1+IDX+i*16] | |
294 | VMOVPS TT4,[inp2+IDX+i*16] | |
295 | VMOVPS TT3,[inp3+IDX+i*16] | |
296 | TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5 | |
297 | vpshufb TT0, TT0, TMP | |
298 | vpshufb TT1, TT1, TMP | |
299 | vpshufb TT2, TT2, TMP | |
300 | vpshufb TT3, TT3, TMP | |
1e59de90 TL |
301 | ROUND_00_15 TT0,(i*4+0) |
302 | ROUND_00_15 TT1,(i*4+1) | |
303 | ROUND_00_15 TT2,(i*4+2) | |
304 | ROUND_00_15 TT3,(i*4+3) | |
7c673cae FG |
305 | %assign i (i+1) |
306 | %endrep | |
307 | add IDX, 4*4*4 | |
308 | ||
1e59de90 | 309 | |
7c673cae FG |
310 | %assign i (i*4) |
311 | ||
312 | jmp Lrounds_16_xx | |
313 | align 16 | |
314 | Lrounds_16_xx: | |
315 | %rep 16 | |
316 | ROUND_16_XX T1, i | |
317 | %assign i (i+1) | |
318 | %endrep | |
319 | ||
320 | cmp ROUND,ROUNDS | |
321 | jb Lrounds_16_xx | |
322 | ||
323 | ;; add old digest | |
324 | vpaddd a, a, [rsp + _DIGEST + 0*SZ4] | |
325 | vpaddd b, b, [rsp + _DIGEST + 1*SZ4] | |
326 | vpaddd c, c, [rsp + _DIGEST + 2*SZ4] | |
327 | vpaddd d, d, [rsp + _DIGEST + 3*SZ4] | |
328 | vpaddd e, e, [rsp + _DIGEST + 4*SZ4] | |
329 | vpaddd f, f, [rsp + _DIGEST + 5*SZ4] | |
330 | vpaddd g, g, [rsp + _DIGEST + 6*SZ4] | |
331 | vpaddd h, h, [rsp + _DIGEST + 7*SZ4] | |
332 | ||
333 | ||
334 | sub arg2, 1 | |
335 | jne lloop | |
336 | ||
337 | ; write digests out | |
338 | vmovdqa [arg1+0*SZ4],a | |
339 | vmovdqa [arg1+1*SZ4],b | |
340 | vmovdqa [arg1+2*SZ4],c | |
341 | vmovdqa [arg1+3*SZ4],d | |
342 | vmovdqa [arg1+4*SZ4],e | |
343 | vmovdqa [arg1+5*SZ4],f | |
344 | vmovdqa [arg1+6*SZ4],g | |
345 | vmovdqa [arg1+7*SZ4],h | |
1e59de90 | 346 | |
7c673cae FG |
347 | ; update input pointers |
348 | add inp0, IDX | |
349 | mov [arg1 + _data_ptr + 0*8], inp0 | |
350 | add inp1, IDX | |
351 | mov [arg1 + _data_ptr + 1*8], inp1 | |
352 | add inp2, IDX | |
353 | mov [arg1 + _data_ptr + 2*8], inp2 | |
354 | add inp3, IDX | |
355 | mov [arg1 + _data_ptr + 3*8], inp3 | |
356 | ||
357 | ;;;;;;;;;;;;;;;; | |
358 | ;; Postamble | |
1e59de90 | 359 | |
7c673cae FG |
360 | add rsp, FRAMESZ |
361 | ret | |
362 | ||
363 | section .data align=64 | |
364 | ||
365 | align 64 | |
366 | TABLE: | |
367 | dq 0x428a2f98428a2f98, 0x428a2f98428a2f98 | |
368 | dq 0x7137449171374491, 0x7137449171374491 | |
369 | dq 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf | |
370 | dq 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5 | |
371 | dq 0x3956c25b3956c25b, 0x3956c25b3956c25b | |
372 | dq 0x59f111f159f111f1, 0x59f111f159f111f1 | |
373 | dq 0x923f82a4923f82a4, 0x923f82a4923f82a4 | |
374 | dq 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5 | |
375 | dq 0xd807aa98d807aa98, 0xd807aa98d807aa98 | |
376 | dq 0x12835b0112835b01, 0x12835b0112835b01 | |
377 | dq 0x243185be243185be, 0x243185be243185be | |
378 | dq 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3 | |
379 | dq 0x72be5d7472be5d74, 0x72be5d7472be5d74 | |
380 | dq 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe | |
381 | dq 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7 | |
382 | dq 0xc19bf174c19bf174, 0xc19bf174c19bf174 | |
383 | dq 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1 | |
384 | dq 0xefbe4786efbe4786, 0xefbe4786efbe4786 | |
385 | dq 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6 | |
386 | dq 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc | |
387 | dq 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f | |
388 | dq 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa | |
389 | dq 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc | |
390 | dq 0x76f988da76f988da, 0x76f988da76f988da | |
391 | dq 0x983e5152983e5152, 0x983e5152983e5152 | |
392 | dq 0xa831c66da831c66d, 0xa831c66da831c66d | |
393 | dq 0xb00327c8b00327c8, 0xb00327c8b00327c8 | |
394 | dq 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7 | |
395 | dq 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3 | |
396 | dq 0xd5a79147d5a79147, 0xd5a79147d5a79147 | |
397 | dq 0x06ca635106ca6351, 0x06ca635106ca6351 | |
398 | dq 0x1429296714292967, 0x1429296714292967 | |
399 | dq 0x27b70a8527b70a85, 0x27b70a8527b70a85 | |
400 | dq 0x2e1b21382e1b2138, 0x2e1b21382e1b2138 | |
401 | dq 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc | |
402 | dq 0x53380d1353380d13, 0x53380d1353380d13 | |
403 | dq 0x650a7354650a7354, 0x650a7354650a7354 | |
404 | dq 0x766a0abb766a0abb, 0x766a0abb766a0abb | |
405 | dq 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e | |
406 | dq 0x92722c8592722c85, 0x92722c8592722c85 | |
407 | dq 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1 | |
408 | dq 0xa81a664ba81a664b, 0xa81a664ba81a664b | |
409 | dq 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70 | |
410 | dq 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3 | |
411 | dq 0xd192e819d192e819, 0xd192e819d192e819 | |
412 | dq 0xd6990624d6990624, 0xd6990624d6990624 | |
413 | dq 0xf40e3585f40e3585, 0xf40e3585f40e3585 | |
414 | dq 0x106aa070106aa070, 0x106aa070106aa070 | |
415 | dq 0x19a4c11619a4c116, 0x19a4c11619a4c116 | |
416 | dq 0x1e376c081e376c08, 0x1e376c081e376c08 | |
417 | dq 0x2748774c2748774c, 0x2748774c2748774c | |
418 | dq 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5 | |
419 | dq 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3 | |
420 | dq 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a | |
421 | dq 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f | |
422 | dq 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3 | |
423 | dq 0x748f82ee748f82ee, 0x748f82ee748f82ee | |
424 | dq 0x78a5636f78a5636f, 0x78a5636f78a5636f | |
425 | dq 0x84c8781484c87814, 0x84c8781484c87814 | |
426 | dq 0x8cc702088cc70208, 0x8cc702088cc70208 | |
427 | dq 0x90befffa90befffa, 0x90befffa90befffa | |
428 | dq 0xa4506ceba4506ceb, 0xa4506ceba4506ceb | |
429 | dq 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7 | |
430 | dq 0xc67178f2c67178f2, 0xc67178f2c67178f2 | |
431 | PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b |