]>
Commit | Line | Data |
---|---|---|
4102950a CZ |
1 | ; WARNING: do not edit!\r |
2 | ; Generated from openssl/crypto/modes/asm/ghash-x86.pl\r | |
3 | ;\r | |
4 | ; Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.\r | |
5 | ;\r | |
6 | ; Licensed under the OpenSSL license (the "License"). You may not use\r | |
7 | ; this file except in compliance with the License. You can obtain a copy\r | |
8 | ; in the file LICENSE in the source distribution or at\r | |
9 | ; https://www.openssl.org/source/license.html\r | |
10 | \r | |
11 | %ifidn __OUTPUT_FORMAT__,obj\r | |
12 | section code use32 class=code align=64\r | |
13 | %elifidn __OUTPUT_FORMAT__,win32\r | |
14 | $@feat.00 equ 1\r | |
15 | section .text code align=64\r | |
16 | %else\r | |
17 | section .text code\r | |
18 | %endif\r | |
19 | global _gcm_gmult_4bit_x86\r | |
20 | align 16\r | |
21 | _gcm_gmult_4bit_x86:\r | |
22 | L$_gcm_gmult_4bit_x86_begin:\r | |
23 | push ebp\r | |
24 | push ebx\r | |
25 | push esi\r | |
26 | push edi\r | |
27 | sub esp,84\r | |
28 | mov edi,DWORD [104+esp]\r | |
29 | mov esi,DWORD [108+esp]\r | |
30 | mov ebp,DWORD [edi]\r | |
31 | mov edx,DWORD [4+edi]\r | |
32 | mov ecx,DWORD [8+edi]\r | |
33 | mov ebx,DWORD [12+edi]\r | |
34 | mov DWORD [16+esp],0\r | |
35 | mov DWORD [20+esp],471859200\r | |
36 | mov DWORD [24+esp],943718400\r | |
37 | mov DWORD [28+esp],610271232\r | |
38 | mov DWORD [32+esp],1887436800\r | |
39 | mov DWORD [36+esp],1822425088\r | |
40 | mov DWORD [40+esp],1220542464\r | |
41 | mov DWORD [44+esp],1423966208\r | |
42 | mov DWORD [48+esp],3774873600\r | |
43 | mov DWORD [52+esp],4246732800\r | |
44 | mov DWORD [56+esp],3644850176\r | |
45 | mov DWORD [60+esp],3311403008\r | |
46 | mov DWORD [64+esp],2441084928\r | |
47 | mov DWORD [68+esp],2376073216\r | |
48 | mov DWORD [72+esp],2847932416\r | |
49 | mov DWORD [76+esp],3051356160\r | |
50 | mov DWORD [esp],ebp\r | |
51 | mov DWORD [4+esp],edx\r | |
52 | mov DWORD [8+esp],ecx\r | |
53 | mov DWORD [12+esp],ebx\r | |
54 | shr ebx,20\r | |
55 | and ebx,240\r | |
56 | mov ebp,DWORD [4+ebx*1+esi]\r | |
57 | mov edx,DWORD [ebx*1+esi]\r | |
58 | mov ecx,DWORD [12+ebx*1+esi]\r | |
59 | mov ebx,DWORD [8+ebx*1+esi]\r | |
60 | xor eax,eax\r | |
61 | mov edi,15\r | |
62 | jmp NEAR L$000x86_loop\r | |
63 | align 16\r | |
64 | L$000x86_loop:\r | |
65 | mov al,bl\r | |
66 | shrd ebx,ecx,4\r | |
67 | and al,15\r | |
68 | shrd ecx,edx,4\r | |
69 | shrd edx,ebp,4\r | |
70 | shr ebp,4\r | |
71 | xor ebp,DWORD [16+eax*4+esp]\r | |
72 | mov al,BYTE [edi*1+esp]\r | |
73 | and al,240\r | |
74 | xor ebx,DWORD [8+eax*1+esi]\r | |
75 | xor ecx,DWORD [12+eax*1+esi]\r | |
76 | xor edx,DWORD [eax*1+esi]\r | |
77 | xor ebp,DWORD [4+eax*1+esi]\r | |
78 | dec edi\r | |
79 | js NEAR L$001x86_break\r | |
80 | mov al,bl\r | |
81 | shrd ebx,ecx,4\r | |
82 | and al,15\r | |
83 | shrd ecx,edx,4\r | |
84 | shrd edx,ebp,4\r | |
85 | shr ebp,4\r | |
86 | xor ebp,DWORD [16+eax*4+esp]\r | |
87 | mov al,BYTE [edi*1+esp]\r | |
88 | shl al,4\r | |
89 | xor ebx,DWORD [8+eax*1+esi]\r | |
90 | xor ecx,DWORD [12+eax*1+esi]\r | |
91 | xor edx,DWORD [eax*1+esi]\r | |
92 | xor ebp,DWORD [4+eax*1+esi]\r | |
93 | jmp NEAR L$000x86_loop\r | |
94 | align 16\r | |
95 | L$001x86_break:\r | |
96 | bswap ebx\r | |
97 | bswap ecx\r | |
98 | bswap edx\r | |
99 | bswap ebp\r | |
100 | mov edi,DWORD [104+esp]\r | |
101 | mov DWORD [12+edi],ebx\r | |
102 | mov DWORD [8+edi],ecx\r | |
103 | mov DWORD [4+edi],edx\r | |
104 | mov DWORD [edi],ebp\r | |
105 | add esp,84\r | |
106 | pop edi\r | |
107 | pop esi\r | |
108 | pop ebx\r | |
109 | pop ebp\r | |
110 | ret\r | |
111 | global _gcm_ghash_4bit_x86\r | |
112 | align 16\r | |
113 | _gcm_ghash_4bit_x86:\r | |
114 | L$_gcm_ghash_4bit_x86_begin:\r | |
115 | push ebp\r | |
116 | push ebx\r | |
117 | push esi\r | |
118 | push edi\r | |
119 | sub esp,84\r | |
120 | mov ebx,DWORD [104+esp]\r | |
121 | mov esi,DWORD [108+esp]\r | |
122 | mov edi,DWORD [112+esp]\r | |
123 | mov ecx,DWORD [116+esp]\r | |
124 | add ecx,edi\r | |
125 | mov DWORD [116+esp],ecx\r | |
126 | mov ebp,DWORD [ebx]\r | |
127 | mov edx,DWORD [4+ebx]\r | |
128 | mov ecx,DWORD [8+ebx]\r | |
129 | mov ebx,DWORD [12+ebx]\r | |
130 | mov DWORD [16+esp],0\r | |
131 | mov DWORD [20+esp],471859200\r | |
132 | mov DWORD [24+esp],943718400\r | |
133 | mov DWORD [28+esp],610271232\r | |
134 | mov DWORD [32+esp],1887436800\r | |
135 | mov DWORD [36+esp],1822425088\r | |
136 | mov DWORD [40+esp],1220542464\r | |
137 | mov DWORD [44+esp],1423966208\r | |
138 | mov DWORD [48+esp],3774873600\r | |
139 | mov DWORD [52+esp],4246732800\r | |
140 | mov DWORD [56+esp],3644850176\r | |
141 | mov DWORD [60+esp],3311403008\r | |
142 | mov DWORD [64+esp],2441084928\r | |
143 | mov DWORD [68+esp],2376073216\r | |
144 | mov DWORD [72+esp],2847932416\r | |
145 | mov DWORD [76+esp],3051356160\r | |
146 | align 16\r | |
147 | L$002x86_outer_loop:\r | |
148 | xor ebx,DWORD [12+edi]\r | |
149 | xor ecx,DWORD [8+edi]\r | |
150 | xor edx,DWORD [4+edi]\r | |
151 | xor ebp,DWORD [edi]\r | |
152 | mov DWORD [12+esp],ebx\r | |
153 | mov DWORD [8+esp],ecx\r | |
154 | mov DWORD [4+esp],edx\r | |
155 | mov DWORD [esp],ebp\r | |
156 | shr ebx,20\r | |
157 | and ebx,240\r | |
158 | mov ebp,DWORD [4+ebx*1+esi]\r | |
159 | mov edx,DWORD [ebx*1+esi]\r | |
160 | mov ecx,DWORD [12+ebx*1+esi]\r | |
161 | mov ebx,DWORD [8+ebx*1+esi]\r | |
162 | xor eax,eax\r | |
163 | mov edi,15\r | |
164 | jmp NEAR L$003x86_loop\r | |
165 | align 16\r | |
166 | L$003x86_loop:\r | |
167 | mov al,bl\r | |
168 | shrd ebx,ecx,4\r | |
169 | and al,15\r | |
170 | shrd ecx,edx,4\r | |
171 | shrd edx,ebp,4\r | |
172 | shr ebp,4\r | |
173 | xor ebp,DWORD [16+eax*4+esp]\r | |
174 | mov al,BYTE [edi*1+esp]\r | |
175 | and al,240\r | |
176 | xor ebx,DWORD [8+eax*1+esi]\r | |
177 | xor ecx,DWORD [12+eax*1+esi]\r | |
178 | xor edx,DWORD [eax*1+esi]\r | |
179 | xor ebp,DWORD [4+eax*1+esi]\r | |
180 | dec edi\r | |
181 | js NEAR L$004x86_break\r | |
182 | mov al,bl\r | |
183 | shrd ebx,ecx,4\r | |
184 | and al,15\r | |
185 | shrd ecx,edx,4\r | |
186 | shrd edx,ebp,4\r | |
187 | shr ebp,4\r | |
188 | xor ebp,DWORD [16+eax*4+esp]\r | |
189 | mov al,BYTE [edi*1+esp]\r | |
190 | shl al,4\r | |
191 | xor ebx,DWORD [8+eax*1+esi]\r | |
192 | xor ecx,DWORD [12+eax*1+esi]\r | |
193 | xor edx,DWORD [eax*1+esi]\r | |
194 | xor ebp,DWORD [4+eax*1+esi]\r | |
195 | jmp NEAR L$003x86_loop\r | |
196 | align 16\r | |
197 | L$004x86_break:\r | |
198 | bswap ebx\r | |
199 | bswap ecx\r | |
200 | bswap edx\r | |
201 | bswap ebp\r | |
202 | mov edi,DWORD [112+esp]\r | |
203 | lea edi,[16+edi]\r | |
204 | cmp edi,DWORD [116+esp]\r | |
205 | mov DWORD [112+esp],edi\r | |
206 | jb NEAR L$002x86_outer_loop\r | |
207 | mov edi,DWORD [104+esp]\r | |
208 | mov DWORD [12+edi],ebx\r | |
209 | mov DWORD [8+edi],ecx\r | |
210 | mov DWORD [4+edi],edx\r | |
211 | mov DWORD [edi],ebp\r | |
212 | add esp,84\r | |
213 | pop edi\r | |
214 | pop esi\r | |
215 | pop ebx\r | |
216 | pop ebp\r | |
217 | ret\r | |
218 | align 16\r | |
219 | __mmx_gmult_4bit_inner:\r | |
220 | xor ecx,ecx\r | |
221 | mov edx,ebx\r | |
222 | mov cl,dl\r | |
223 | shl cl,4\r | |
224 | and edx,240\r | |
225 | movq mm0,[8+ecx*1+esi]\r | |
226 | movq mm1,[ecx*1+esi]\r | |
227 | movd ebp,mm0\r | |
228 | psrlq mm0,4\r | |
229 | movq mm2,mm1\r | |
230 | psrlq mm1,4\r | |
231 | pxor mm0,[8+edx*1+esi]\r | |
232 | mov cl,BYTE [14+edi]\r | |
233 | psllq mm2,60\r | |
234 | and ebp,15\r | |
235 | pxor mm1,[edx*1+esi]\r | |
236 | mov edx,ecx\r | |
237 | movd ebx,mm0\r | |
238 | pxor mm0,mm2\r | |
239 | shl cl,4\r | |
240 | psrlq mm0,4\r | |
241 | movq mm2,mm1\r | |
242 | psrlq mm1,4\r | |
243 | pxor mm0,[8+ecx*1+esi]\r | |
244 | psllq mm2,60\r | |
245 | and edx,240\r | |
246 | pxor mm1,[ebp*8+eax]\r | |
247 | and ebx,15\r | |
248 | pxor mm1,[ecx*1+esi]\r | |
249 | movd ebp,mm0\r | |
250 | pxor mm0,mm2\r | |
251 | psrlq mm0,4\r | |
252 | movq mm2,mm1\r | |
253 | psrlq mm1,4\r | |
254 | pxor mm0,[8+edx*1+esi]\r | |
255 | mov cl,BYTE [13+edi]\r | |
256 | psllq mm2,60\r | |
257 | pxor mm1,[ebx*8+eax]\r | |
258 | and ebp,15\r | |
259 | pxor mm1,[edx*1+esi]\r | |
260 | mov edx,ecx\r | |
261 | movd ebx,mm0\r | |
262 | pxor mm0,mm2\r | |
263 | shl cl,4\r | |
264 | psrlq mm0,4\r | |
265 | movq mm2,mm1\r | |
266 | psrlq mm1,4\r | |
267 | pxor mm0,[8+ecx*1+esi]\r | |
268 | psllq mm2,60\r | |
269 | and edx,240\r | |
270 | pxor mm1,[ebp*8+eax]\r | |
271 | and ebx,15\r | |
272 | pxor mm1,[ecx*1+esi]\r | |
273 | movd ebp,mm0\r | |
274 | pxor mm0,mm2\r | |
275 | psrlq mm0,4\r | |
276 | movq mm2,mm1\r | |
277 | psrlq mm1,4\r | |
278 | pxor mm0,[8+edx*1+esi]\r | |
279 | mov cl,BYTE [12+edi]\r | |
280 | psllq mm2,60\r | |
281 | pxor mm1,[ebx*8+eax]\r | |
282 | and ebp,15\r | |
283 | pxor mm1,[edx*1+esi]\r | |
284 | mov edx,ecx\r | |
285 | movd ebx,mm0\r | |
286 | pxor mm0,mm2\r | |
287 | shl cl,4\r | |
288 | psrlq mm0,4\r | |
289 | movq mm2,mm1\r | |
290 | psrlq mm1,4\r | |
291 | pxor mm0,[8+ecx*1+esi]\r | |
292 | psllq mm2,60\r | |
293 | and edx,240\r | |
294 | pxor mm1,[ebp*8+eax]\r | |
295 | and ebx,15\r | |
296 | pxor mm1,[ecx*1+esi]\r | |
297 | movd ebp,mm0\r | |
298 | pxor mm0,mm2\r | |
299 | psrlq mm0,4\r | |
300 | movq mm2,mm1\r | |
301 | psrlq mm1,4\r | |
302 | pxor mm0,[8+edx*1+esi]\r | |
303 | mov cl,BYTE [11+edi]\r | |
304 | psllq mm2,60\r | |
305 | pxor mm1,[ebx*8+eax]\r | |
306 | and ebp,15\r | |
307 | pxor mm1,[edx*1+esi]\r | |
308 | mov edx,ecx\r | |
309 | movd ebx,mm0\r | |
310 | pxor mm0,mm2\r | |
311 | shl cl,4\r | |
312 | psrlq mm0,4\r | |
313 | movq mm2,mm1\r | |
314 | psrlq mm1,4\r | |
315 | pxor mm0,[8+ecx*1+esi]\r | |
316 | psllq mm2,60\r | |
317 | and edx,240\r | |
318 | pxor mm1,[ebp*8+eax]\r | |
319 | and ebx,15\r | |
320 | pxor mm1,[ecx*1+esi]\r | |
321 | movd ebp,mm0\r | |
322 | pxor mm0,mm2\r | |
323 | psrlq mm0,4\r | |
324 | movq mm2,mm1\r | |
325 | psrlq mm1,4\r | |
326 | pxor mm0,[8+edx*1+esi]\r | |
327 | mov cl,BYTE [10+edi]\r | |
328 | psllq mm2,60\r | |
329 | pxor mm1,[ebx*8+eax]\r | |
330 | and ebp,15\r | |
331 | pxor mm1,[edx*1+esi]\r | |
332 | mov edx,ecx\r | |
333 | movd ebx,mm0\r | |
334 | pxor mm0,mm2\r | |
335 | shl cl,4\r | |
336 | psrlq mm0,4\r | |
337 | movq mm2,mm1\r | |
338 | psrlq mm1,4\r | |
339 | pxor mm0,[8+ecx*1+esi]\r | |
340 | psllq mm2,60\r | |
341 | and edx,240\r | |
342 | pxor mm1,[ebp*8+eax]\r | |
343 | and ebx,15\r | |
344 | pxor mm1,[ecx*1+esi]\r | |
345 | movd ebp,mm0\r | |
346 | pxor mm0,mm2\r | |
347 | psrlq mm0,4\r | |
348 | movq mm2,mm1\r | |
349 | psrlq mm1,4\r | |
350 | pxor mm0,[8+edx*1+esi]\r | |
351 | mov cl,BYTE [9+edi]\r | |
352 | psllq mm2,60\r | |
353 | pxor mm1,[ebx*8+eax]\r | |
354 | and ebp,15\r | |
355 | pxor mm1,[edx*1+esi]\r | |
356 | mov edx,ecx\r | |
357 | movd ebx,mm0\r | |
358 | pxor mm0,mm2\r | |
359 | shl cl,4\r | |
360 | psrlq mm0,4\r | |
361 | movq mm2,mm1\r | |
362 | psrlq mm1,4\r | |
363 | pxor mm0,[8+ecx*1+esi]\r | |
364 | psllq mm2,60\r | |
365 | and edx,240\r | |
366 | pxor mm1,[ebp*8+eax]\r | |
367 | and ebx,15\r | |
368 | pxor mm1,[ecx*1+esi]\r | |
369 | movd ebp,mm0\r | |
370 | pxor mm0,mm2\r | |
371 | psrlq mm0,4\r | |
372 | movq mm2,mm1\r | |
373 | psrlq mm1,4\r | |
374 | pxor mm0,[8+edx*1+esi]\r | |
375 | mov cl,BYTE [8+edi]\r | |
376 | psllq mm2,60\r | |
377 | pxor mm1,[ebx*8+eax]\r | |
378 | and ebp,15\r | |
379 | pxor mm1,[edx*1+esi]\r | |
380 | mov edx,ecx\r | |
381 | movd ebx,mm0\r | |
382 | pxor mm0,mm2\r | |
383 | shl cl,4\r | |
384 | psrlq mm0,4\r | |
385 | movq mm2,mm1\r | |
386 | psrlq mm1,4\r | |
387 | pxor mm0,[8+ecx*1+esi]\r | |
388 | psllq mm2,60\r | |
389 | and edx,240\r | |
390 | pxor mm1,[ebp*8+eax]\r | |
391 | and ebx,15\r | |
392 | pxor mm1,[ecx*1+esi]\r | |
393 | movd ebp,mm0\r | |
394 | pxor mm0,mm2\r | |
395 | psrlq mm0,4\r | |
396 | movq mm2,mm1\r | |
397 | psrlq mm1,4\r | |
398 | pxor mm0,[8+edx*1+esi]\r | |
399 | mov cl,BYTE [7+edi]\r | |
400 | psllq mm2,60\r | |
401 | pxor mm1,[ebx*8+eax]\r | |
402 | and ebp,15\r | |
403 | pxor mm1,[edx*1+esi]\r | |
404 | mov edx,ecx\r | |
405 | movd ebx,mm0\r | |
406 | pxor mm0,mm2\r | |
407 | shl cl,4\r | |
408 | psrlq mm0,4\r | |
409 | movq mm2,mm1\r | |
410 | psrlq mm1,4\r | |
411 | pxor mm0,[8+ecx*1+esi]\r | |
412 | psllq mm2,60\r | |
413 | and edx,240\r | |
414 | pxor mm1,[ebp*8+eax]\r | |
415 | and ebx,15\r | |
416 | pxor mm1,[ecx*1+esi]\r | |
417 | movd ebp,mm0\r | |
418 | pxor mm0,mm2\r | |
419 | psrlq mm0,4\r | |
420 | movq mm2,mm1\r | |
421 | psrlq mm1,4\r | |
422 | pxor mm0,[8+edx*1+esi]\r | |
423 | mov cl,BYTE [6+edi]\r | |
424 | psllq mm2,60\r | |
425 | pxor mm1,[ebx*8+eax]\r | |
426 | and ebp,15\r | |
427 | pxor mm1,[edx*1+esi]\r | |
428 | mov edx,ecx\r | |
429 | movd ebx,mm0\r | |
430 | pxor mm0,mm2\r | |
431 | shl cl,4\r | |
432 | psrlq mm0,4\r | |
433 | movq mm2,mm1\r | |
434 | psrlq mm1,4\r | |
435 | pxor mm0,[8+ecx*1+esi]\r | |
436 | psllq mm2,60\r | |
437 | and edx,240\r | |
438 | pxor mm1,[ebp*8+eax]\r | |
439 | and ebx,15\r | |
440 | pxor mm1,[ecx*1+esi]\r | |
441 | movd ebp,mm0\r | |
442 | pxor mm0,mm2\r | |
443 | psrlq mm0,4\r | |
444 | movq mm2,mm1\r | |
445 | psrlq mm1,4\r | |
446 | pxor mm0,[8+edx*1+esi]\r | |
447 | mov cl,BYTE [5+edi]\r | |
448 | psllq mm2,60\r | |
449 | pxor mm1,[ebx*8+eax]\r | |
450 | and ebp,15\r | |
451 | pxor mm1,[edx*1+esi]\r | |
452 | mov edx,ecx\r | |
453 | movd ebx,mm0\r | |
454 | pxor mm0,mm2\r | |
455 | shl cl,4\r | |
456 | psrlq mm0,4\r | |
457 | movq mm2,mm1\r | |
458 | psrlq mm1,4\r | |
459 | pxor mm0,[8+ecx*1+esi]\r | |
460 | psllq mm2,60\r | |
461 | and edx,240\r | |
462 | pxor mm1,[ebp*8+eax]\r | |
463 | and ebx,15\r | |
464 | pxor mm1,[ecx*1+esi]\r | |
465 | movd ebp,mm0\r | |
466 | pxor mm0,mm2\r | |
467 | psrlq mm0,4\r | |
468 | movq mm2,mm1\r | |
469 | psrlq mm1,4\r | |
470 | pxor mm0,[8+edx*1+esi]\r | |
471 | mov cl,BYTE [4+edi]\r | |
472 | psllq mm2,60\r | |
473 | pxor mm1,[ebx*8+eax]\r | |
474 | and ebp,15\r | |
475 | pxor mm1,[edx*1+esi]\r | |
476 | mov edx,ecx\r | |
477 | movd ebx,mm0\r | |
478 | pxor mm0,mm2\r | |
479 | shl cl,4\r | |
480 | psrlq mm0,4\r | |
481 | movq mm2,mm1\r | |
482 | psrlq mm1,4\r | |
483 | pxor mm0,[8+ecx*1+esi]\r | |
484 | psllq mm2,60\r | |
485 | and edx,240\r | |
486 | pxor mm1,[ebp*8+eax]\r | |
487 | and ebx,15\r | |
488 | pxor mm1,[ecx*1+esi]\r | |
489 | movd ebp,mm0\r | |
490 | pxor mm0,mm2\r | |
491 | psrlq mm0,4\r | |
492 | movq mm2,mm1\r | |
493 | psrlq mm1,4\r | |
494 | pxor mm0,[8+edx*1+esi]\r | |
495 | mov cl,BYTE [3+edi]\r | |
496 | psllq mm2,60\r | |
497 | pxor mm1,[ebx*8+eax]\r | |
498 | and ebp,15\r | |
499 | pxor mm1,[edx*1+esi]\r | |
500 | mov edx,ecx\r | |
501 | movd ebx,mm0\r | |
502 | pxor mm0,mm2\r | |
503 | shl cl,4\r | |
504 | psrlq mm0,4\r | |
505 | movq mm2,mm1\r | |
506 | psrlq mm1,4\r | |
507 | pxor mm0,[8+ecx*1+esi]\r | |
508 | psllq mm2,60\r | |
509 | and edx,240\r | |
510 | pxor mm1,[ebp*8+eax]\r | |
511 | and ebx,15\r | |
512 | pxor mm1,[ecx*1+esi]\r | |
513 | movd ebp,mm0\r | |
514 | pxor mm0,mm2\r | |
515 | psrlq mm0,4\r | |
516 | movq mm2,mm1\r | |
517 | psrlq mm1,4\r | |
518 | pxor mm0,[8+edx*1+esi]\r | |
519 | mov cl,BYTE [2+edi]\r | |
520 | psllq mm2,60\r | |
521 | pxor mm1,[ebx*8+eax]\r | |
522 | and ebp,15\r | |
523 | pxor mm1,[edx*1+esi]\r | |
524 | mov edx,ecx\r | |
525 | movd ebx,mm0\r | |
526 | pxor mm0,mm2\r | |
527 | shl cl,4\r | |
528 | psrlq mm0,4\r | |
529 | movq mm2,mm1\r | |
530 | psrlq mm1,4\r | |
531 | pxor mm0,[8+ecx*1+esi]\r | |
532 | psllq mm2,60\r | |
533 | and edx,240\r | |
534 | pxor mm1,[ebp*8+eax]\r | |
535 | and ebx,15\r | |
536 | pxor mm1,[ecx*1+esi]\r | |
537 | movd ebp,mm0\r | |
538 | pxor mm0,mm2\r | |
539 | psrlq mm0,4\r | |
540 | movq mm2,mm1\r | |
541 | psrlq mm1,4\r | |
542 | pxor mm0,[8+edx*1+esi]\r | |
543 | mov cl,BYTE [1+edi]\r | |
544 | psllq mm2,60\r | |
545 | pxor mm1,[ebx*8+eax]\r | |
546 | and ebp,15\r | |
547 | pxor mm1,[edx*1+esi]\r | |
548 | mov edx,ecx\r | |
549 | movd ebx,mm0\r | |
550 | pxor mm0,mm2\r | |
551 | shl cl,4\r | |
552 | psrlq mm0,4\r | |
553 | movq mm2,mm1\r | |
554 | psrlq mm1,4\r | |
555 | pxor mm0,[8+ecx*1+esi]\r | |
556 | psllq mm2,60\r | |
557 | and edx,240\r | |
558 | pxor mm1,[ebp*8+eax]\r | |
559 | and ebx,15\r | |
560 | pxor mm1,[ecx*1+esi]\r | |
561 | movd ebp,mm0\r | |
562 | pxor mm0,mm2\r | |
563 | psrlq mm0,4\r | |
564 | movq mm2,mm1\r | |
565 | psrlq mm1,4\r | |
566 | pxor mm0,[8+edx*1+esi]\r | |
567 | mov cl,BYTE [edi]\r | |
568 | psllq mm2,60\r | |
569 | pxor mm1,[ebx*8+eax]\r | |
570 | and ebp,15\r | |
571 | pxor mm1,[edx*1+esi]\r | |
572 | mov edx,ecx\r | |
573 | movd ebx,mm0\r | |
574 | pxor mm0,mm2\r | |
575 | shl cl,4\r | |
576 | psrlq mm0,4\r | |
577 | movq mm2,mm1\r | |
578 | psrlq mm1,4\r | |
579 | pxor mm0,[8+ecx*1+esi]\r | |
580 | psllq mm2,60\r | |
581 | and edx,240\r | |
582 | pxor mm1,[ebp*8+eax]\r | |
583 | and ebx,15\r | |
584 | pxor mm1,[ecx*1+esi]\r | |
585 | movd ebp,mm0\r | |
586 | pxor mm0,mm2\r | |
587 | psrlq mm0,4\r | |
588 | movq mm2,mm1\r | |
589 | psrlq mm1,4\r | |
590 | pxor mm0,[8+edx*1+esi]\r | |
591 | psllq mm2,60\r | |
592 | pxor mm1,[ebx*8+eax]\r | |
593 | and ebp,15\r | |
594 | pxor mm1,[edx*1+esi]\r | |
595 | movd ebx,mm0\r | |
596 | pxor mm0,mm2\r | |
597 | mov edi,DWORD [4+ebp*8+eax]\r | |
598 | psrlq mm0,32\r | |
599 | movd edx,mm1\r | |
600 | psrlq mm1,32\r | |
601 | movd ecx,mm0\r | |
602 | movd ebp,mm1\r | |
603 | shl edi,4\r | |
604 | bswap ebx\r | |
605 | bswap edx\r | |
606 | bswap ecx\r | |
607 | xor ebp,edi\r | |
608 | bswap ebp\r | |
609 | ret\r | |
610 | global _gcm_gmult_4bit_mmx\r | |
611 | align 16\r | |
612 | _gcm_gmult_4bit_mmx:\r | |
613 | L$_gcm_gmult_4bit_mmx_begin:\r | |
614 | push ebp\r | |
615 | push ebx\r | |
616 | push esi\r | |
617 | push edi\r | |
618 | mov edi,DWORD [20+esp]\r | |
619 | mov esi,DWORD [24+esp]\r | |
620 | call L$005pic_point\r | |
621 | L$005pic_point:\r | |
622 | pop eax\r | |
623 | lea eax,[(L$rem_4bit-L$005pic_point)+eax]\r | |
624 | movzx ebx,BYTE [15+edi]\r | |
625 | call __mmx_gmult_4bit_inner\r | |
626 | mov edi,DWORD [20+esp]\r | |
627 | emms\r | |
628 | mov DWORD [12+edi],ebx\r | |
629 | mov DWORD [4+edi],edx\r | |
630 | mov DWORD [8+edi],ecx\r | |
631 | mov DWORD [edi],ebp\r | |
632 | pop edi\r | |
633 | pop esi\r | |
634 | pop ebx\r | |
635 | pop ebp\r | |
636 | ret\r | |
637 | global _gcm_ghash_4bit_mmx\r | |
638 | align 16\r | |
639 | _gcm_ghash_4bit_mmx:\r | |
640 | L$_gcm_ghash_4bit_mmx_begin:\r | |
641 | push ebp\r | |
642 | push ebx\r | |
643 | push esi\r | |
644 | push edi\r | |
645 | mov ebp,DWORD [20+esp]\r | |
646 | mov esi,DWORD [24+esp]\r | |
647 | mov edi,DWORD [28+esp]\r | |
648 | mov ecx,DWORD [32+esp]\r | |
649 | call L$006pic_point\r | |
650 | L$006pic_point:\r | |
651 | pop eax\r | |
652 | lea eax,[(L$rem_4bit-L$006pic_point)+eax]\r | |
653 | add ecx,edi\r | |
654 | mov DWORD [32+esp],ecx\r | |
655 | sub esp,20\r | |
656 | mov ebx,DWORD [12+ebp]\r | |
657 | mov edx,DWORD [4+ebp]\r | |
658 | mov ecx,DWORD [8+ebp]\r | |
659 | mov ebp,DWORD [ebp]\r | |
660 | jmp NEAR L$007mmx_outer_loop\r | |
661 | align 16\r | |
662 | L$007mmx_outer_loop:\r | |
663 | xor ebx,DWORD [12+edi]\r | |
664 | xor edx,DWORD [4+edi]\r | |
665 | xor ecx,DWORD [8+edi]\r | |
666 | xor ebp,DWORD [edi]\r | |
667 | mov DWORD [48+esp],edi\r | |
668 | mov DWORD [12+esp],ebx\r | |
669 | mov DWORD [4+esp],edx\r | |
670 | mov DWORD [8+esp],ecx\r | |
671 | mov DWORD [esp],ebp\r | |
672 | mov edi,esp\r | |
673 | shr ebx,24\r | |
674 | call __mmx_gmult_4bit_inner\r | |
675 | mov edi,DWORD [48+esp]\r | |
676 | lea edi,[16+edi]\r | |
677 | cmp edi,DWORD [52+esp]\r | |
678 | jb NEAR L$007mmx_outer_loop\r | |
679 | mov edi,DWORD [40+esp]\r | |
680 | emms\r | |
681 | mov DWORD [12+edi],ebx\r | |
682 | mov DWORD [4+edi],edx\r | |
683 | mov DWORD [8+edi],ecx\r | |
684 | mov DWORD [edi],ebp\r | |
685 | add esp,20\r | |
686 | pop edi\r | |
687 | pop esi\r | |
688 | pop ebx\r | |
689 | pop ebp\r | |
690 | ret\r | |
691 | align 64\r | |
692 | L$rem_4bit:\r | |
693 | dd 0,0,0,29491200,0,58982400,0,38141952\r | |
694 | dd 0,117964800,0,113901568,0,76283904,0,88997888\r | |
695 | dd 0,235929600,0,265420800,0,227803136,0,206962688\r | |
696 | dd 0,152567808,0,148504576,0,177995776,0,190709760\r | |
697 | db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67\r | |
698 | db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112\r | |
699 | db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62\r | |
700 | db 0\r |