]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | ;; |
2 | ;; Copyright (c) 2012-2018, Intel Corporation | |
3 | ;; | |
4 | ;; Redistribution and use in source and binary forms, with or without | |
5 | ;; modification, are permitted provided that the following conditions are met: | |
6 | ;; | |
7 | ;; * Redistributions of source code must retain the above copyright notice, | |
8 | ;; this list of conditions and the following disclaimer. | |
9 | ;; * Redistributions in binary form must reproduce the above copyright | |
10 | ;; notice, this list of conditions and the following disclaimer in the | |
11 | ;; documentation and/or other materials provided with the distribution. | |
12 | ;; * Neither the name of Intel Corporation nor the names of its contributors | |
13 | ;; may be used to endorse or promote products derived from this software | |
14 | ;; without specific prior written permission. | |
15 | ;; | |
16 | ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
17 | ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
18 | ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
19 | ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | |
20 | ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
21 | ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
22 | ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
23 | ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
24 | ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
25 | ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
26 | ;; | |
27 | ||
28 | ;; code to compute octal MD5 using SSE | |
29 | ||
30 | ;; Stack must be aligned to 16 bytes before call | |
31 | ;; Windows clobbers: rax rbx rdx rsi rdi r8 r9 r10 r11 r12 r13 r14 r15 | |
32 | ;; Windows preserves: rcx rbp | |
33 | ;; | |
34 | ;; Linux clobbers: rax rbx rcx rdx rsi r8 r9 r10 r11 r12 r13 r14 r15 | |
35 | ;; Linux preserves: rdi rbp | |
36 | ;; | |
37 | ;; clobbers xmm0-15 | |
38 | ||
f67539c2 | 39 | %include "include/os.asm" |
11fdf7f2 TL |
40 | %include "mb_mgr_datastruct.asm" |
41 | ||
42 | section .data align=64 | |
43 | default rel | |
44 | ||
45 | align 64 | |
46 | MKGLOBAL(MD5_TABLE,data,internal) | |
47 | MD5_TABLE: | |
48 | dd 0xd76aa478, 0xd76aa478, 0xd76aa478, 0xd76aa478 | |
49 | dd 0xe8c7b756, 0xe8c7b756, 0xe8c7b756, 0xe8c7b756 | |
50 | dd 0x242070db, 0x242070db, 0x242070db, 0x242070db | |
51 | dd 0xc1bdceee, 0xc1bdceee, 0xc1bdceee, 0xc1bdceee | |
52 | dd 0xf57c0faf, 0xf57c0faf, 0xf57c0faf, 0xf57c0faf | |
53 | dd 0x4787c62a, 0x4787c62a, 0x4787c62a, 0x4787c62a | |
54 | dd 0xa8304613, 0xa8304613, 0xa8304613, 0xa8304613 | |
55 | dd 0xfd469501, 0xfd469501, 0xfd469501, 0xfd469501 | |
56 | dd 0x698098d8, 0x698098d8, 0x698098d8, 0x698098d8 | |
57 | dd 0x8b44f7af, 0x8b44f7af, 0x8b44f7af, 0x8b44f7af | |
58 | dd 0xffff5bb1, 0xffff5bb1, 0xffff5bb1, 0xffff5bb1 | |
59 | dd 0x895cd7be, 0x895cd7be, 0x895cd7be, 0x895cd7be | |
60 | dd 0x6b901122, 0x6b901122, 0x6b901122, 0x6b901122 | |
61 | dd 0xfd987193, 0xfd987193, 0xfd987193, 0xfd987193 | |
62 | dd 0xa679438e, 0xa679438e, 0xa679438e, 0xa679438e | |
63 | dd 0x49b40821, 0x49b40821, 0x49b40821, 0x49b40821 | |
64 | dd 0xf61e2562, 0xf61e2562, 0xf61e2562, 0xf61e2562 | |
65 | dd 0xc040b340, 0xc040b340, 0xc040b340, 0xc040b340 | |
66 | dd 0x265e5a51, 0x265e5a51, 0x265e5a51, 0x265e5a51 | |
67 | dd 0xe9b6c7aa, 0xe9b6c7aa, 0xe9b6c7aa, 0xe9b6c7aa | |
68 | dd 0xd62f105d, 0xd62f105d, 0xd62f105d, 0xd62f105d | |
69 | dd 0x02441453, 0x02441453, 0x02441453, 0x02441453 | |
70 | dd 0xd8a1e681, 0xd8a1e681, 0xd8a1e681, 0xd8a1e681 | |
71 | dd 0xe7d3fbc8, 0xe7d3fbc8, 0xe7d3fbc8, 0xe7d3fbc8 | |
72 | dd 0x21e1cde6, 0x21e1cde6, 0x21e1cde6, 0x21e1cde6 | |
73 | dd 0xc33707d6, 0xc33707d6, 0xc33707d6, 0xc33707d6 | |
74 | dd 0xf4d50d87, 0xf4d50d87, 0xf4d50d87, 0xf4d50d87 | |
75 | dd 0x455a14ed, 0x455a14ed, 0x455a14ed, 0x455a14ed | |
76 | dd 0xa9e3e905, 0xa9e3e905, 0xa9e3e905, 0xa9e3e905 | |
77 | dd 0xfcefa3f8, 0xfcefa3f8, 0xfcefa3f8, 0xfcefa3f8 | |
78 | dd 0x676f02d9, 0x676f02d9, 0x676f02d9, 0x676f02d9 | |
79 | dd 0x8d2a4c8a, 0x8d2a4c8a, 0x8d2a4c8a, 0x8d2a4c8a | |
80 | dd 0xfffa3942, 0xfffa3942, 0xfffa3942, 0xfffa3942 | |
81 | dd 0x8771f681, 0x8771f681, 0x8771f681, 0x8771f681 | |
82 | dd 0x6d9d6122, 0x6d9d6122, 0x6d9d6122, 0x6d9d6122 | |
83 | dd 0xfde5380c, 0xfde5380c, 0xfde5380c, 0xfde5380c | |
84 | dd 0xa4beea44, 0xa4beea44, 0xa4beea44, 0xa4beea44 | |
85 | dd 0x4bdecfa9, 0x4bdecfa9, 0x4bdecfa9, 0x4bdecfa9 | |
86 | dd 0xf6bb4b60, 0xf6bb4b60, 0xf6bb4b60, 0xf6bb4b60 | |
87 | dd 0xbebfbc70, 0xbebfbc70, 0xbebfbc70, 0xbebfbc70 | |
88 | dd 0x289b7ec6, 0x289b7ec6, 0x289b7ec6, 0x289b7ec6 | |
89 | dd 0xeaa127fa, 0xeaa127fa, 0xeaa127fa, 0xeaa127fa | |
90 | dd 0xd4ef3085, 0xd4ef3085, 0xd4ef3085, 0xd4ef3085 | |
91 | dd 0x04881d05, 0x04881d05, 0x04881d05, 0x04881d05 | |
92 | dd 0xd9d4d039, 0xd9d4d039, 0xd9d4d039, 0xd9d4d039 | |
93 | dd 0xe6db99e5, 0xe6db99e5, 0xe6db99e5, 0xe6db99e5 | |
94 | dd 0x1fa27cf8, 0x1fa27cf8, 0x1fa27cf8, 0x1fa27cf8 | |
95 | dd 0xc4ac5665, 0xc4ac5665, 0xc4ac5665, 0xc4ac5665 | |
96 | dd 0xf4292244, 0xf4292244, 0xf4292244, 0xf4292244 | |
97 | dd 0x432aff97, 0x432aff97, 0x432aff97, 0x432aff97 | |
98 | dd 0xab9423a7, 0xab9423a7, 0xab9423a7, 0xab9423a7 | |
99 | dd 0xfc93a039, 0xfc93a039, 0xfc93a039, 0xfc93a039 | |
100 | dd 0x655b59c3, 0x655b59c3, 0x655b59c3, 0x655b59c3 | |
101 | dd 0x8f0ccc92, 0x8f0ccc92, 0x8f0ccc92, 0x8f0ccc92 | |
102 | dd 0xffeff47d, 0xffeff47d, 0xffeff47d, 0xffeff47d | |
103 | dd 0x85845dd1, 0x85845dd1, 0x85845dd1, 0x85845dd1 | |
104 | dd 0x6fa87e4f, 0x6fa87e4f, 0x6fa87e4f, 0x6fa87e4f | |
105 | dd 0xfe2ce6e0, 0xfe2ce6e0, 0xfe2ce6e0, 0xfe2ce6e0 | |
106 | dd 0xa3014314, 0xa3014314, 0xa3014314, 0xa3014314 | |
107 | dd 0x4e0811a1, 0x4e0811a1, 0x4e0811a1, 0x4e0811a1 | |
108 | dd 0xf7537e82, 0xf7537e82, 0xf7537e82, 0xf7537e82 | |
109 | dd 0xbd3af235, 0xbd3af235, 0xbd3af235, 0xbd3af235 | |
110 | dd 0x2ad7d2bb, 0x2ad7d2bb, 0x2ad7d2bb, 0x2ad7d2bb | |
111 | dd 0xeb86d391, 0xeb86d391, 0xeb86d391, 0xeb86d391 | |
112 | ||
113 | ONES: | |
114 | dd 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff | |
115 | ||
116 | section .text | |
117 | ||
118 | %ifdef LINUX | |
119 | ;; Linux Registers | |
120 | %define arg1 rdi | |
121 | %define arg2 rsi | |
122 | %define mem1 rcx | |
123 | %define mem2 rdx | |
124 | %else | |
125 | %define arg1 rcx | |
126 | %define arg2 rdx | |
127 | %define mem1 rdi | |
128 | %define mem2 rsi | |
129 | %endif | |
130 | ||
131 | ;; rbp is not clobbered | |
132 | ||
133 | %define inp0 r8 | |
134 | %define inp1 r9 | |
135 | %define inp2 r10 | |
136 | %define inp3 r11 | |
137 | %define inp4 r12 | |
138 | %define inp5 r13 | |
139 | %define inp6 r14 | |
140 | %define inp7 r15 | |
141 | ||
142 | %define TBL rax | |
143 | %define IDX rbx | |
144 | ||
145 | %define A xmm0 | |
146 | %define B xmm1 | |
147 | %define C xmm2 | |
148 | %define D xmm3 | |
149 | %define E xmm4 ; tmp | |
150 | %define F xmm5 ; tmp | |
151 | ||
152 | %define A2 xmm6 | |
153 | %define B2 xmm7 | |
154 | %define C2 xmm8 | |
155 | %define D2 xmm9 | |
156 | ||
157 | ||
158 | %define FUN E | |
159 | %define TMP F | |
160 | %define FUN2 xmm10 | |
161 | %define TMP2 xmm11 | |
162 | ||
163 | %define T0 xmm10 | |
164 | %define T1 xmm11 | |
165 | %define T2 xmm12 | |
166 | %define T3 xmm13 | |
167 | %define T4 xmm14 | |
168 | %define T5 xmm15 | |
169 | ||
170 | ; Stack Layout | |
171 | ; | |
172 | ; 470 DD2 | |
173 | ; 460 CC2 | |
174 | ; 450 BB2 | |
175 | ; 440 AA2 | |
176 | ; 430 DD | |
177 | ; 420 CC | |
178 | ; 410 BB | |
179 | ; 400 AA | |
180 | ; | |
181 | ; 3F0 data2[15] for lanes 7...4 \ | |
182 | ; ... \ | |
183 | ; 300 data2[0] for lanes 7...4 \ | |
184 | ; 2F0 data2[15] for lanes 3...0 > mem block 2 | |
185 | ; ... / | |
186 | ; 210 data2[1] for lanes 3...0 / | |
187 | ; 200 data2[0] for lanes 3...0 / | |
188 | ; | |
189 | ; 1F0 data1[15] for lanes 7...4 \ | |
190 | ; ... \ | |
191 | ; 100 data1[0] for lanes 7...4 \ | |
192 | ; F0 data1[15] for lanes 3...0 > mem block 1 | |
193 | ; ... / | |
194 | ; 10 data1[1] for lanes 3...0 / | |
195 | ; 0 data1[0] for lanes 3...0 / | |
196 | ||
197 | ; stack size must be an odd multiple of 8 bytes in size | |
198 | struc STACK | |
199 | _DATA: reso 2*2*16 ; 2 blocks * 2 sets of lanes * 16 regs | |
200 | _DIGEST: reso 8 ; stores AA-DD, AA2-DD2 | |
201 | resb 8 ; for alignment | |
202 | endstruc | |
203 | %define STACK_SIZE STACK_size | |
204 | ||
205 | %define AA rsp + _DIGEST + 16*0 | |
206 | %define BB rsp + _DIGEST + 16*1 | |
207 | %define CC rsp + _DIGEST + 16*2 | |
208 | %define DD rsp + _DIGEST + 16*3 | |
209 | %define AA2 rsp + _DIGEST + 16*4 | |
210 | %define BB2 rsp + _DIGEST + 16*5 | |
211 | %define CC2 rsp + _DIGEST + 16*6 | |
212 | %define DD2 rsp + _DIGEST + 16*7 | |
213 | ||
214 | ;; | |
215 | ;; MD5 left rotations (number of bits) | |
216 | ;; | |
217 | rot11 equ 7 | |
218 | rot12 equ 12 | |
219 | rot13 equ 17 | |
220 | rot14 equ 22 | |
221 | rot21 equ 5 | |
222 | rot22 equ 9 | |
223 | rot23 equ 14 | |
224 | rot24 equ 20 | |
225 | rot31 equ 4 | |
226 | rot32 equ 11 | |
227 | rot33 equ 16 | |
228 | rot34 equ 23 | |
229 | rot41 equ 6 | |
230 | rot42 equ 10 | |
231 | rot43 equ 15 | |
232 | rot44 equ 21 | |
233 | ||
234 | ; transpose r0, r1, r2, r3, t0, t1 | |
235 | ; "transpose" data in {r0..r3} using temps {t0..t3} | |
236 | ; Input looks like: {r0 r1 r2 r3} | |
237 | ; r0 = {a3 a2 a1 a0} | |
238 | ; r1 = {b3 b2 b1 b0} | |
239 | ; r2 = {c3 c2 c1 c0} | |
240 | ; r3 = {d3 d2 d1 d0} | |
241 | ; | |
242 | ; output looks like: {t0 r1 r0 r3} | |
243 | ; t0 = {d0 c0 b0 a0} | |
244 | ; r1 = {d1 c1 b1 a1} | |
245 | ; r0 = {d2 c2 b2 a2} | |
246 | ; r3 = {d3 c3 b3 a3} | |
247 | ; | |
248 | %macro TRANSPOSE 6 | |
249 | %define %%r0 %1 | |
250 | %define %%r1 %2 | |
251 | %define %%r2 %3 | |
252 | %define %%r3 %4 | |
253 | %define %%t0 %5 | |
254 | %define %%t1 %6 | |
255 | movdqa %%t0, %%r0 | |
256 | shufps %%t0, %%r1, 0x44 ; t0 = {b1 b0 a1 a0} | |
257 | shufps %%r0, %%r1, 0xEE ; r0 = {b3 b2 a3 a2} | |
258 | ||
259 | movdqa %%t1, %%r2 | |
260 | shufps %%t1, %%r3, 0x44 ; t1 = {d1 d0 c1 c0} | |
261 | shufps %%r2, %%r3, 0xEE ; r2 = {d3 d2 c3 c2} | |
262 | ||
263 | movdqa %%r1, %%t0 | |
264 | shufps %%r1, %%t1, 0xDD ; r1 = {d1 c1 b1 a1} | |
265 | ||
266 | movdqa %%r3, %%r0 | |
267 | shufps %%r3, %%r2, 0xDD ; r3 = {d3 c3 b3 a3} | |
268 | ||
269 | shufps %%r0, %%r2, 0x88 ; r0 = {d2 c2 b2 a2} | |
270 | shufps %%t0, %%t1, 0x88 ; t0 = {d0 c0 b0 a0} | |
271 | %endmacro | |
272 | ||
273 | ;; | |
274 | ;; Magic functions defined in RFC 1321 | |
275 | ;; | |
276 | ; macro MAGIC_F F,X,Y,Z ;; F = ((Z) ^ ((X) & ((Y) ^ (Z)))) | |
277 | %macro MAGIC_F 4 | |
278 | %define %%F %1 | |
279 | %define %%X %2 | |
280 | %define %%Y %3 | |
281 | %define %%Z %4 | |
282 | movdqa %%F,%%Z | |
283 | pxor %%F,%%Y | |
284 | pand %%F,%%X | |
285 | pxor %%F,%%Z | |
286 | %endmacro | |
287 | ||
288 | ; macro MAGIC_G F,X,Y,Z ;; F = F((Z),(X),(Y)) | |
289 | %macro MAGIC_G 4 | |
290 | %define %%F %1 | |
291 | %define %%X %2 | |
292 | %define %%Y %3 | |
293 | %define %%Z %4 | |
294 | MAGIC_F %%F,%%Z,%%X,%%Y | |
295 | %endmacro | |
296 | ||
297 | ; macro MAGIC_H F,X,Y,Z ;; F = ((X) ^ (Y) ^ (Z)) | |
298 | %macro MAGIC_H 4 | |
299 | %define %%F %1 | |
300 | %define %%X %2 | |
301 | %define %%Y %3 | |
302 | %define %%Z %4 | |
303 | movdqa %%F,%%Z | |
304 | pxor %%F,%%Y | |
305 | pxor %%F,%%X | |
306 | %endmacro | |
307 | ||
308 | ; macro MAGIC_I F,X,Y,Z ;; F = ((Y) ^ ((X) | ~(Z))) | |
309 | %macro MAGIC_I 4 | |
310 | %define %%F %1 | |
311 | %define %%X %2 | |
312 | %define %%Y %3 | |
313 | %define %%Z %4 | |
314 | movdqa %%F,%%Z | |
315 | pxor %%F,[rel ONES] ; pnot %%F | |
316 | por %%F,%%X | |
317 | pxor %%F,%%Y | |
318 | %endmacro | |
319 | ||
320 | ; PROLD reg, imm, tmp | |
321 | %macro PROLD 3 | |
322 | %define %%reg %1 | |
323 | %define %%imm %2 | |
324 | %define %%tmp %3 | |
325 | movdqa %%tmp, %%reg | |
326 | psrld %%tmp, (32-%%imm) | |
327 | pslld %%reg, %%imm | |
328 | por %%reg, %%tmp | |
329 | %endmacro | |
330 | ||
331 | ;; | |
332 | ;; single MD5 step | |
333 | ;; | |
334 | ;; A = B +ROL32((A +MAGIC(B,C,D) +data +const), nrot) | |
335 | ;; | |
336 | ; macro MD5_STEP1 MAGIC_FUN, A,B,C,D, A2,B2,C3,D2, FUN, TMP, data, MD5const, nrot | |
337 | %macro MD5_STEP1 14 | |
338 | %define %%MAGIC_FUN %1 | |
339 | %define %%A %2 | |
340 | %define %%B %3 | |
341 | %define %%C %4 | |
342 | %define %%D %5 | |
343 | %define %%A2 %6 | |
344 | %define %%B2 %7 | |
345 | %define %%C2 %8 | |
346 | %define %%D2 %9 | |
347 | %define %%FUN %10 | |
348 | %define %%TMP %11 | |
349 | %define %%data %12 | |
350 | %define %%MD5const %13 | |
351 | %define %%nrot %14 | |
352 | ||
353 | paddd %%A, %%MD5const | |
354 | paddd %%A2, %%MD5const | |
355 | paddd %%A, [%%data] | |
356 | paddd %%A2, [%%data + 16*16] | |
357 | %%MAGIC_FUN %%FUN, %%B,%%C,%%D | |
358 | paddd %%A, %%FUN | |
359 | %%MAGIC_FUN %%FUN, %%B2,%%C2,%%D2 | |
360 | paddd %%A2, %%FUN | |
361 | PROLD %%A,%%nrot, %%TMP | |
362 | PROLD %%A2,%%nrot, %%TMP | |
363 | paddd %%A, %%B | |
364 | paddd %%A2, %%B2 | |
365 | %endmacro | |
366 | ||
367 | ;; | |
368 | ;; single MD5 step | |
369 | ;; | |
370 | ;; A = B +ROL32((A +MAGIC(B,C,D) +data +const), nrot) | |
371 | ;; | |
372 | ; macro MD5_STEP MAGIC_FUN, A,B,C,D, A2,B2,C3,D2, FUN, TMP, FUN2, TMP2, data, | |
373 | ; MD5const, nrot | |
374 | %macro MD5_STEP 16 | |
375 | %define %%MAGIC_FUN %1 | |
376 | %define %%A %2 | |
377 | %define %%B %3 | |
378 | %define %%C %4 | |
379 | %define %%D %5 | |
380 | %define %%A2 %6 | |
381 | %define %%B2 %7 | |
382 | %define %%C2 %8 | |
383 | %define %%D2 %9 | |
384 | %define %%FUN %10 | |
385 | %define %%TMP %11 | |
386 | %define %%FUN2 %12 | |
387 | %define %%TMP2 %13 | |
388 | %define %%data %14 | |
389 | %define %%MD5const %15 | |
390 | %define %%nrot %16 | |
391 | ||
392 | paddd %%A, %%MD5const | |
393 | paddd %%A2, %%MD5const | |
394 | paddd %%A, [%%data] | |
395 | paddd %%A2, [%%data + 16*16] | |
396 | %%MAGIC_FUN %%FUN, %%B,%%C,%%D | |
397 | %%MAGIC_FUN %%FUN2, %%B2,%%C2,%%D2 | |
398 | paddd %%A, %%FUN | |
399 | paddd %%A2, %%FUN2 | |
400 | PROLD %%A,%%nrot, %%TMP | |
401 | PROLD %%A2,%%nrot, %%TMP2 | |
402 | paddd %%A, %%B | |
403 | paddd %%A2, %%B2 | |
404 | %endmacro | |
405 | ||
406 | ; void md5_x4x2_sse(MD5_ARGS *args, UINT64 num_blks) | |
407 | ; arg 1 : pointer to MD5_ARGS structure | |
408 | ; arg 2 : number of blocks (>=1) | |
409 | ; | |
410 | align 32 | |
411 | MKGLOBAL(md5_x4x2_sse,function,internal) | |
412 | md5_x4x2_sse: | |
413 | ||
414 | sub rsp, STACK_SIZE | |
415 | ||
416 | ;; each row of transposed digests is split into 2 parts, the right half stored in A, and left half in A2 | |
417 | ;; Initialize digests | |
418 | movdqa A,[arg1 + 0*16 + 0*MD5_DIGEST_ROW_SIZE] | |
419 | movdqa B,[arg1 + 0*16 + 1*MD5_DIGEST_ROW_SIZE] | |
420 | movdqa C,[arg1 + 0*16 + 2*MD5_DIGEST_ROW_SIZE] | |
421 | movdqa D,[arg1 + 0*16 + 3*MD5_DIGEST_ROW_SIZE] | |
422 | ||
423 | ;; Initialize digests | |
424 | movdqa A2,[arg1 + 1*16 + 0*MD5_DIGEST_ROW_SIZE] | |
425 | movdqa B2,[arg1 + 1*16 + 1*MD5_DIGEST_ROW_SIZE] | |
426 | movdqa C2,[arg1 + 1*16 + 2*MD5_DIGEST_ROW_SIZE] | |
427 | movdqa D2,[arg1 + 1*16 + 3*MD5_DIGEST_ROW_SIZE] | |
428 | ||
429 | lea TBL, [rel MD5_TABLE] | |
430 | ||
431 | ;; load input pointers | |
432 | mov inp0,[arg1+_data_ptr_md5 +0*PTR_SZ] | |
433 | mov inp1,[arg1+_data_ptr_md5 +1*PTR_SZ] | |
434 | mov inp2,[arg1+_data_ptr_md5 +2*PTR_SZ] | |
435 | mov inp3,[arg1+_data_ptr_md5 +3*PTR_SZ] | |
436 | mov inp4,[arg1+_data_ptr_md5 +4*PTR_SZ] | |
437 | mov inp5,[arg1+_data_ptr_md5 +5*PTR_SZ] | |
438 | mov inp6,[arg1+_data_ptr_md5 +6*PTR_SZ] | |
439 | mov inp7,[arg1+_data_ptr_md5 +7*PTR_SZ] | |
440 | xor IDX, IDX | |
441 | ||
442 | ; Make ping-pong pointers to the two memory blocks | |
443 | mov mem1, rsp | |
444 | lea mem2, [rsp + 16*16*2] | |
445 | ||
446 | ||
447 | ;; Load first block of data and save back to stack | |
448 | %assign I 0 | |
449 | %rep 4 | |
450 | movdqu T2,[inp0+IDX+I*16] | |
451 | movdqu T1,[inp1+IDX+I*16] | |
452 | movdqu T4,[inp2+IDX+I*16] | |
453 | movdqu T3,[inp3+IDX+I*16] | |
454 | TRANSPOSE T2, T1, T4, T3, T0, T5 | |
455 | movdqa [mem1+(I*4+0)*16],T0 | |
456 | movdqa [mem1+(I*4+1)*16],T1 | |
457 | movdqa [mem1+(I*4+2)*16],T2 | |
458 | movdqa [mem1+(I*4+3)*16],T3 | |
459 | ||
460 | movdqu T2,[inp4+IDX+I*16] | |
461 | movdqu T1,[inp5+IDX+I*16] | |
462 | movdqu T4,[inp6+IDX+I*16] | |
463 | movdqu T3,[inp7+IDX+I*16] | |
464 | TRANSPOSE T2, T1, T4, T3, T0, T5 | |
465 | movdqa [mem1+(I*4+0)*16 + 16*16],T0 | |
466 | movdqa [mem1+(I*4+1)*16 + 16*16],T1 | |
467 | movdqa [mem1+(I*4+2)*16 + 16*16],T2 | |
468 | movdqa [mem1+(I*4+3)*16 + 16*16],T3 | |
469 | %assign I (I+1) | |
470 | %endrep | |
471 | ||
472 | lloop: | |
473 | ; save old digests | |
474 | movdqa [AA], A | |
475 | movdqa [BB], B | |
476 | movdqa [CC], C | |
477 | movdqa [DD], D | |
478 | ; save old digests | |
479 | movdqa [AA2], A2 | |
480 | movdqa [BB2], B2 | |
481 | movdqa [CC2], C2 | |
482 | movdqa [DD2], D2 | |
483 | ||
484 | add IDX, 4*16 | |
485 | sub arg2, 1 | |
486 | je lastblock | |
487 | ||
488 | MD5_STEP1 MAGIC_F, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 0*16, [TBL+ 0*16], rot11 | |
489 | MD5_STEP1 MAGIC_F, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 + 1*16, [TBL+ 1*16], rot12 | |
490 | MD5_STEP1 MAGIC_F, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 + 2*16, [TBL+ 2*16], rot13 | |
491 | MD5_STEP1 MAGIC_F, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 + 3*16, [TBL+ 3*16], rot14 | |
492 | MD5_STEP1 MAGIC_F, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 4*16, [TBL+ 4*16], rot11 | |
493 | MD5_STEP1 MAGIC_F, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 + 5*16, [TBL+ 5*16], rot12 | |
494 | MD5_STEP1 MAGIC_F, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 + 6*16, [TBL+ 6*16], rot13 | |
495 | MD5_STEP1 MAGIC_F, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 + 7*16, [TBL+ 7*16], rot14 | |
496 | ||
497 | %assign I 0 | |
498 | movdqu T2,[inp0+IDX+I*16] | |
499 | movdqu T1,[inp1+IDX+I*16] | |
500 | movdqu T4,[inp2+IDX+I*16] | |
501 | movdqu T3,[inp3+IDX+I*16] | |
502 | TRANSPOSE T2, T1, T4, T3, T0, T5 | |
503 | movdqa [mem2+(I*4+0)*16],T0 | |
504 | movdqa [mem2+(I*4+1)*16],T1 | |
505 | movdqa [mem2+(I*4+2)*16],T2 | |
506 | movdqa [mem2+(I*4+3)*16],T3 | |
507 | ||
508 | MD5_STEP1 MAGIC_F, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 8*16, [TBL+ 8*16], rot11 | |
509 | MD5_STEP1 MAGIC_F, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 + 9*16, [TBL+ 9*16], rot12 | |
510 | MD5_STEP1 MAGIC_F, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 +10*16, [TBL+10*16], rot13 | |
511 | MD5_STEP1 MAGIC_F, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 +11*16, [TBL+11*16], rot14 | |
512 | MD5_STEP1 MAGIC_F, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 +12*16, [TBL+12*16], rot11 | |
513 | MD5_STEP1 MAGIC_F, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 +13*16, [TBL+13*16], rot12 | |
514 | MD5_STEP1 MAGIC_F, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 +14*16, [TBL+14*16], rot13 | |
515 | MD5_STEP1 MAGIC_F, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 +15*16, [TBL+15*16], rot14 | |
516 | ||
517 | ||
518 | movdqu T2,[inp4+IDX+I*16] | |
519 | movdqu T1,[inp5+IDX+I*16] | |
520 | movdqu T4,[inp6+IDX+I*16] | |
521 | movdqu T3,[inp7+IDX+I*16] | |
522 | TRANSPOSE T2, T1, T4, T3, T0, T5 | |
523 | movdqa [mem2+(I*4+0)*16 + 16*16],T0 | |
524 | movdqa [mem2+(I*4+1)*16 + 16*16],T1 | |
525 | movdqa [mem2+(I*4+2)*16 + 16*16],T2 | |
526 | movdqa [mem2+(I*4+3)*16 + 16*16],T3 | |
527 | %assign I (I+1) | |
528 | ||
529 | MD5_STEP1 MAGIC_G, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 1*16, [TBL+16*16], rot21 | |
530 | MD5_STEP1 MAGIC_G, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 + 6*16, [TBL+17*16], rot22 | |
531 | MD5_STEP1 MAGIC_G, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 +11*16, [TBL+18*16], rot23 | |
532 | MD5_STEP1 MAGIC_G, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 + 0*16, [TBL+19*16], rot24 | |
533 | MD5_STEP1 MAGIC_G, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 5*16, [TBL+20*16], rot21 | |
534 | MD5_STEP1 MAGIC_G, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 +10*16, [TBL+21*16], rot22 | |
535 | MD5_STEP1 MAGIC_G, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 +15*16, [TBL+22*16], rot23 | |
536 | MD5_STEP1 MAGIC_G, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 + 4*16, [TBL+23*16], rot24 | |
537 | ||
538 | movdqu T2,[inp0+IDX+I*16] | |
539 | movdqu T1,[inp1+IDX+I*16] | |
540 | movdqu T4,[inp2+IDX+I*16] | |
541 | movdqu T3,[inp3+IDX+I*16] | |
542 | TRANSPOSE T2, T1, T4, T3, T0, T5 | |
543 | movdqa [mem2+(I*4+0)*16],T0 | |
544 | movdqa [mem2+(I*4+1)*16],T1 | |
545 | movdqa [mem2+(I*4+2)*16],T2 | |
546 | movdqa [mem2+(I*4+3)*16],T3 | |
547 | ||
548 | MD5_STEP1 MAGIC_G, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 9*16, [TBL+24*16], rot21 | |
549 | MD5_STEP1 MAGIC_G, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 +14*16, [TBL+25*16], rot22 | |
550 | MD5_STEP1 MAGIC_G, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 + 3*16, [TBL+26*16], rot23 | |
551 | MD5_STEP1 MAGIC_G, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 + 8*16, [TBL+27*16], rot24 | |
552 | MD5_STEP1 MAGIC_G, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 +13*16, [TBL+28*16], rot21 | |
553 | MD5_STEP1 MAGIC_G, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 + 2*16, [TBL+29*16], rot22 | |
554 | MD5_STEP1 MAGIC_G, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 + 7*16, [TBL+30*16], rot23 | |
555 | MD5_STEP1 MAGIC_G, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 +12*16, [TBL+31*16], rot24 | |
556 | ||
557 | movdqu T2,[inp4+IDX+I*16] | |
558 | movdqu T1,[inp5+IDX+I*16] | |
559 | movdqu T4,[inp6+IDX+I*16] | |
560 | movdqu T3,[inp7+IDX+I*16] | |
561 | TRANSPOSE T2, T1, T4, T3, T0, T5 | |
562 | movdqa [mem2+(I*4+0)*16 + 16*16],T0 | |
563 | movdqa [mem2+(I*4+1)*16 + 16*16],T1 | |
564 | movdqa [mem2+(I*4+2)*16 + 16*16],T2 | |
565 | movdqa [mem2+(I*4+3)*16 + 16*16],T3 | |
566 | %assign I (I+1) | |
567 | ||
568 | MD5_STEP1 MAGIC_H, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 5*16, [TBL+32*16], rot31 | |
569 | MD5_STEP1 MAGIC_H, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 + 8*16, [TBL+33*16], rot32 | |
570 | MD5_STEP1 MAGIC_H, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 +11*16, [TBL+34*16], rot33 | |
571 | MD5_STEP1 MAGIC_H, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 +14*16, [TBL+35*16], rot34 | |
572 | MD5_STEP1 MAGIC_H, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 1*16, [TBL+36*16], rot31 | |
573 | MD5_STEP1 MAGIC_H, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 + 4*16, [TBL+37*16], rot32 | |
574 | MD5_STEP1 MAGIC_H, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 + 7*16, [TBL+38*16], rot33 | |
575 | MD5_STEP1 MAGIC_H, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 +10*16, [TBL+39*16], rot34 | |
576 | ||
577 | movdqu T2,[inp0+IDX+I*16] | |
578 | movdqu T1,[inp1+IDX+I*16] | |
579 | movdqu T4,[inp2+IDX+I*16] | |
580 | movdqu T3,[inp3+IDX+I*16] | |
581 | TRANSPOSE T2, T1, T4, T3, T0, T5 | |
582 | movdqa [mem2+(I*4+0)*16],T0 | |
583 | movdqa [mem2+(I*4+1)*16],T1 | |
584 | movdqa [mem2+(I*4+2)*16],T2 | |
585 | movdqa [mem2+(I*4+3)*16],T3 | |
586 | ||
587 | MD5_STEP1 MAGIC_H, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 +13*16, [TBL+40*16], rot31 | |
588 | MD5_STEP1 MAGIC_H, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 + 0*16, [TBL+41*16], rot32 | |
589 | MD5_STEP1 MAGIC_H, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 + 3*16, [TBL+42*16], rot33 | |
590 | MD5_STEP1 MAGIC_H, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 + 6*16, [TBL+43*16], rot34 | |
591 | MD5_STEP1 MAGIC_H, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 9*16, [TBL+44*16], rot31 | |
592 | MD5_STEP1 MAGIC_H, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 +12*16, [TBL+45*16], rot32 | |
593 | MD5_STEP1 MAGIC_H, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 +15*16, [TBL+46*16], rot33 | |
594 | MD5_STEP1 MAGIC_H, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 + 2*16, [TBL+47*16], rot34 | |
595 | ||
596 | movdqu T2,[inp4+IDX+I*16] | |
597 | movdqu T1,[inp5+IDX+I*16] | |
598 | movdqu T4,[inp6+IDX+I*16] | |
599 | movdqu T3,[inp7+IDX+I*16] | |
600 | TRANSPOSE T2, T1, T4, T3, T0, T5 | |
601 | movdqa [mem2+(I*4+0)*16 + 16*16],T0 | |
602 | movdqa [mem2+(I*4+1)*16 + 16*16],T1 | |
603 | movdqa [mem2+(I*4+2)*16 + 16*16],T2 | |
604 | movdqa [mem2+(I*4+3)*16 + 16*16],T3 | |
605 | %assign I (I+1) | |
606 | ||
607 | MD5_STEP1 MAGIC_I, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 0*16, [TBL+48*16], rot41 | |
608 | MD5_STEP1 MAGIC_I, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 + 7*16, [TBL+49*16], rot42 | |
609 | MD5_STEP1 MAGIC_I, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 +14*16, [TBL+50*16], rot43 | |
610 | MD5_STEP1 MAGIC_I, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 + 5*16, [TBL+51*16], rot44 | |
611 | MD5_STEP1 MAGIC_I, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 +12*16, [TBL+52*16], rot41 | |
612 | MD5_STEP1 MAGIC_I, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 + 3*16, [TBL+53*16], rot42 | |
613 | MD5_STEP1 MAGIC_I, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 +10*16, [TBL+54*16], rot43 | |
614 | MD5_STEP1 MAGIC_I, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 + 1*16, [TBL+55*16], rot44 | |
615 | ||
616 | movdqu T2,[inp0+IDX+I*16] | |
617 | movdqu T1,[inp1+IDX+I*16] | |
618 | movdqu T4,[inp2+IDX+I*16] | |
619 | movdqu T3,[inp3+IDX+I*16] | |
620 | TRANSPOSE T2, T1, T4, T3, T0, T5 | |
621 | movdqa [mem2+(I*4+0)*16],T0 | |
622 | movdqa [mem2+(I*4+1)*16],T1 | |
623 | movdqa [mem2+(I*4+2)*16],T2 | |
624 | movdqa [mem2+(I*4+3)*16],T3 | |
625 | ||
626 | MD5_STEP1 MAGIC_I, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 8*16, [TBL+56*16], rot41 | |
627 | MD5_STEP1 MAGIC_I, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 +15*16, [TBL+57*16], rot42 | |
628 | MD5_STEP1 MAGIC_I, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 + 6*16, [TBL+58*16], rot43 | |
629 | MD5_STEP1 MAGIC_I, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 +13*16, [TBL+59*16], rot44 | |
630 | MD5_STEP1 MAGIC_I, A,B,C,D, A2,B2,C2,D2, FUN,TMP, mem1 + 4*16, [TBL+60*16], rot41 | |
631 | MD5_STEP1 MAGIC_I, D,A,B,C, D2,A2,B2,C2, FUN,TMP, mem1 +11*16, [TBL+61*16], rot42 | |
632 | MD5_STEP1 MAGIC_I, C,D,A,B, C2,D2,A2,B2, FUN,TMP, mem1 + 2*16, [TBL+62*16], rot43 | |
633 | MD5_STEP1 MAGIC_I, B,C,D,A, B2,C2,D2,A2, FUN,TMP, mem1 + 9*16, [TBL+63*16], rot44 | |
634 | ||
635 | movdqu T2,[inp4+IDX+I*16] | |
636 | movdqu T1,[inp5+IDX+I*16] | |
637 | movdqu T4,[inp6+IDX+I*16] | |
638 | movdqu T3,[inp7+IDX+I*16] | |
639 | TRANSPOSE T2, T1, T4, T3, T0, T5 | |
640 | movdqa [mem2+(I*4+0)*16 + 16*16],T0 | |
641 | movdqa [mem2+(I*4+1)*16 + 16*16],T1 | |
642 | movdqa [mem2+(I*4+2)*16 + 16*16],T2 | |
643 | movdqa [mem2+(I*4+3)*16 + 16*16],T3 | |
644 | %assign I (I+1) | |
645 | ||
646 | ||
647 | paddd A,[AA] | |
648 | paddd B,[BB] | |
649 | paddd C,[CC] | |
650 | paddd D,[DD] | |
651 | ||
652 | paddd A2,[AA2] | |
653 | paddd B2,[BB2] | |
654 | paddd C2,[CC2] | |
655 | paddd D2,[DD2] | |
656 | ||
657 | ; swap mem1 and mem2 | |
658 | xchg mem1, mem2 | |
659 | ||
660 | jmp lloop | |
661 | ||
662 | lastblock: | |
663 | ||
664 | MD5_STEP MAGIC_F, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 0*16, [TBL+ 0*16], rot11 | |
665 | MD5_STEP MAGIC_F, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 + 1*16, [TBL+ 1*16], rot12 | |
666 | MD5_STEP MAGIC_F, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 + 2*16, [TBL+ 2*16], rot13 | |
667 | MD5_STEP MAGIC_F, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 + 3*16, [TBL+ 3*16], rot14 | |
668 | MD5_STEP MAGIC_F, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 4*16, [TBL+ 4*16], rot11 | |
669 | MD5_STEP MAGIC_F, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 + 5*16, [TBL+ 5*16], rot12 | |
670 | MD5_STEP MAGIC_F, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 + 6*16, [TBL+ 6*16], rot13 | |
671 | MD5_STEP MAGIC_F, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 + 7*16, [TBL+ 7*16], rot14 | |
672 | MD5_STEP MAGIC_F, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 8*16, [TBL+ 8*16], rot11 | |
673 | MD5_STEP MAGIC_F, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 + 9*16, [TBL+ 9*16], rot12 | |
674 | MD5_STEP MAGIC_F, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 +10*16, [TBL+10*16], rot13 | |
675 | MD5_STEP MAGIC_F, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 +11*16, [TBL+11*16], rot14 | |
676 | MD5_STEP MAGIC_F, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 +12*16, [TBL+12*16], rot11 | |
677 | MD5_STEP MAGIC_F, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 +13*16, [TBL+13*16], rot12 | |
678 | MD5_STEP MAGIC_F, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 +14*16, [TBL+14*16], rot13 | |
679 | MD5_STEP MAGIC_F, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 +15*16, [TBL+15*16], rot14 | |
680 | ||
681 | MD5_STEP MAGIC_G, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 1*16, [TBL+16*16], rot21 | |
682 | MD5_STEP MAGIC_G, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 + 6*16, [TBL+17*16], rot22 | |
683 | MD5_STEP MAGIC_G, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 +11*16, [TBL+18*16], rot23 | |
684 | MD5_STEP MAGIC_G, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 + 0*16, [TBL+19*16], rot24 | |
685 | MD5_STEP MAGIC_G, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 5*16, [TBL+20*16], rot21 | |
686 | MD5_STEP MAGIC_G, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 +10*16, [TBL+21*16], rot22 | |
687 | MD5_STEP MAGIC_G, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 +15*16, [TBL+22*16], rot23 | |
688 | MD5_STEP MAGIC_G, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 + 4*16, [TBL+23*16], rot24 | |
689 | MD5_STEP MAGIC_G, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 9*16, [TBL+24*16], rot21 | |
690 | MD5_STEP MAGIC_G, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 +14*16, [TBL+25*16], rot22 | |
691 | MD5_STEP MAGIC_G, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 + 3*16, [TBL+26*16], rot23 | |
692 | MD5_STEP MAGIC_G, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 + 8*16, [TBL+27*16], rot24 | |
693 | MD5_STEP MAGIC_G, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 +13*16, [TBL+28*16], rot21 | |
694 | MD5_STEP MAGIC_G, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 + 2*16, [TBL+29*16], rot22 | |
695 | MD5_STEP MAGIC_G, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 + 7*16, [TBL+30*16], rot23 | |
696 | MD5_STEP MAGIC_G, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 +12*16, [TBL+31*16], rot24 | |
697 | ||
698 | MD5_STEP MAGIC_H, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 5*16, [TBL+32*16], rot31 | |
699 | MD5_STEP MAGIC_H, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 + 8*16, [TBL+33*16], rot32 | |
700 | MD5_STEP MAGIC_H, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 +11*16, [TBL+34*16], rot33 | |
701 | MD5_STEP MAGIC_H, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 +14*16, [TBL+35*16], rot34 | |
702 | MD5_STEP MAGIC_H, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 1*16, [TBL+36*16], rot31 | |
703 | MD5_STEP MAGIC_H, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 + 4*16, [TBL+37*16], rot32 | |
704 | MD5_STEP MAGIC_H, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 + 7*16, [TBL+38*16], rot33 | |
705 | MD5_STEP MAGIC_H, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 +10*16, [TBL+39*16], rot34 | |
706 | MD5_STEP MAGIC_H, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 +13*16, [TBL+40*16], rot31 | |
707 | MD5_STEP MAGIC_H, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 + 0*16, [TBL+41*16], rot32 | |
708 | MD5_STEP MAGIC_H, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 + 3*16, [TBL+42*16], rot33 | |
709 | MD5_STEP MAGIC_H, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 + 6*16, [TBL+43*16], rot34 | |
710 | MD5_STEP MAGIC_H, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 9*16, [TBL+44*16], rot31 | |
711 | MD5_STEP MAGIC_H, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 +12*16, [TBL+45*16], rot32 | |
712 | MD5_STEP MAGIC_H, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 +15*16, [TBL+46*16], rot33 | |
713 | MD5_STEP MAGIC_H, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 + 2*16, [TBL+47*16], rot34 | |
714 | ||
715 | MD5_STEP MAGIC_I, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 0*16, [TBL+48*16], rot41 | |
716 | MD5_STEP MAGIC_I, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 + 7*16, [TBL+49*16], rot42 | |
717 | MD5_STEP MAGIC_I, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 +14*16, [TBL+50*16], rot43 | |
718 | MD5_STEP MAGIC_I, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 + 5*16, [TBL+51*16], rot44 | |
719 | MD5_STEP MAGIC_I, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 +12*16, [TBL+52*16], rot41 | |
720 | MD5_STEP MAGIC_I, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 + 3*16, [TBL+53*16], rot42 | |
721 | MD5_STEP MAGIC_I, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 +10*16, [TBL+54*16], rot43 | |
722 | MD5_STEP MAGIC_I, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 + 1*16, [TBL+55*16], rot44 | |
723 | MD5_STEP MAGIC_I, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 8*16, [TBL+56*16], rot41 | |
724 | MD5_STEP MAGIC_I, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 +15*16, [TBL+57*16], rot42 | |
725 | MD5_STEP MAGIC_I, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 + 6*16, [TBL+58*16], rot43 | |
726 | MD5_STEP MAGIC_I, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 +13*16, [TBL+59*16], rot44 | |
727 | MD5_STEP MAGIC_I, A,B,C,D, A2,B2,C2,D2, FUN,TMP, FUN2,TMP2, mem1 + 4*16, [TBL+60*16], rot41 | |
728 | MD5_STEP MAGIC_I, D,A,B,C, D2,A2,B2,C2, FUN,TMP, FUN2,TMP2, mem1 +11*16, [TBL+61*16], rot42 | |
729 | MD5_STEP MAGIC_I, C,D,A,B, C2,D2,A2,B2, FUN,TMP, FUN2,TMP2, mem1 + 2*16, [TBL+62*16], rot43 | |
730 | MD5_STEP MAGIC_I, B,C,D,A, B2,C2,D2,A2, FUN,TMP, FUN2,TMP2, mem1 + 9*16, [TBL+63*16], rot44 | |
731 | ||
732 | paddd A,[AA] | |
733 | paddd B,[BB] | |
734 | paddd C,[CC] | |
735 | paddd D,[DD] | |
736 | ||
737 | paddd A2,[AA2] | |
738 | paddd B2,[BB2] | |
739 | paddd C2,[CC2] | |
740 | paddd D2,[DD2] | |
741 | ||
742 | ; write out digests | |
743 | movdqu [arg1 + 0*16 + 0*MD5_DIGEST_ROW_SIZE], A | |
744 | movdqu [arg1 + 0*16 + 1*MD5_DIGEST_ROW_SIZE], B | |
745 | movdqu [arg1 + 0*16 + 2*MD5_DIGEST_ROW_SIZE], C | |
746 | movdqu [arg1 + 0*16 + 3*MD5_DIGEST_ROW_SIZE], D | |
747 | movdqu [arg1 + 1*16 + 0*MD5_DIGEST_ROW_SIZE], A2 | |
748 | movdqu [arg1 + 1*16 + 1*MD5_DIGEST_ROW_SIZE], B2 | |
749 | movdqu [arg1 + 1*16 + 2*MD5_DIGEST_ROW_SIZE], C2 | |
750 | movdqu [arg1 + 1*16 + 3*MD5_DIGEST_ROW_SIZE], D2 | |
751 | ||
752 | ;; update input pointers | |
753 | add inp0, IDX | |
754 | add inp1, IDX | |
755 | add inp2, IDX | |
756 | add inp3, IDX | |
757 | add inp4, IDX | |
758 | add inp5, IDX | |
759 | add inp6, IDX | |
760 | add inp7, IDX | |
761 | mov [arg1 +_data_ptr_md5 + 0*PTR_SZ], inp0 | |
762 | mov [arg1 +_data_ptr_md5 + 1*PTR_SZ], inp1 | |
763 | mov [arg1 +_data_ptr_md5 + 2*PTR_SZ], inp2 | |
764 | mov [arg1 +_data_ptr_md5 + 3*PTR_SZ], inp3 | |
765 | mov [arg1 +_data_ptr_md5 + 4*PTR_SZ], inp4 | |
766 | mov [arg1 +_data_ptr_md5 + 5*PTR_SZ], inp5 | |
767 | mov [arg1 +_data_ptr_md5 + 6*PTR_SZ], inp6 | |
768 | mov [arg1 +_data_ptr_md5 + 7*PTR_SZ], inp7 | |
769 | ||
f67539c2 TL |
770 | ;; Clear stack frame (72*16 bytes) |
771 | %ifdef SAFE_DATA | |
772 | pxor xmm0, xmm0 | |
773 | %assign i 0 | |
774 | %rep (2*2*16+8) | |
775 | movdqa [rsp + i*16], xmm0 | |
776 | %assign i (i+1) | |
777 | %endrep | |
778 | %endif | |
779 | ||
11fdf7f2 TL |
780 | ;;;;;;;;;;;;;;;; |
781 | ;; Postamble | |
782 | add rsp, STACK_SIZE | |
783 | ret | |
784 | ||
785 | %ifdef LINUX | |
786 | section .note.GNU-stack noalloc noexec nowrite progbits | |
787 | %endif |