--- /dev/null
+# WARNING: do not edit!\r
+# Generated from openssl/crypto/modes/asm/ghash-x86.pl\r
+#\r
+# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.\r
+#\r
+# Licensed under the OpenSSL license (the "License"). You may not use\r
+# this file except in compliance with the License. You can obtain a copy\r
+# in the file LICENSE in the source distribution or at\r
+# https://www.openssl.org/source/license.html\r
+\r
+.text\r
+.globl gcm_gmult_4bit_x86\r
+.type gcm_gmult_4bit_x86,@function\r
+.align 16\r
+gcm_gmult_4bit_x86:\r
+.L_gcm_gmult_4bit_x86_begin:\r
+ pushl %ebp\r
+ pushl %ebx\r
+ pushl %esi\r
+ pushl %edi\r
+ subl $84,%esp\r
+ movl 104(%esp),%edi\r
+ movl 108(%esp),%esi\r
+ movl (%edi),%ebp\r
+ movl 4(%edi),%edx\r
+ movl 8(%edi),%ecx\r
+ movl 12(%edi),%ebx\r
+ movl $0,16(%esp)\r
+ movl $471859200,20(%esp)\r
+ movl $943718400,24(%esp)\r
+ movl $610271232,28(%esp)\r
+ movl $1887436800,32(%esp)\r
+ movl $1822425088,36(%esp)\r
+ movl $1220542464,40(%esp)\r
+ movl $1423966208,44(%esp)\r
+ movl $3774873600,48(%esp)\r
+ movl $4246732800,52(%esp)\r
+ movl $3644850176,56(%esp)\r
+ movl $3311403008,60(%esp)\r
+ movl $2441084928,64(%esp)\r
+ movl $2376073216,68(%esp)\r
+ movl $2847932416,72(%esp)\r
+ movl $3051356160,76(%esp)\r
+ movl %ebp,(%esp)\r
+ movl %edx,4(%esp)\r
+ movl %ecx,8(%esp)\r
+ movl %ebx,12(%esp)\r
+ shrl $20,%ebx\r
+ andl $240,%ebx\r
+ movl 4(%esi,%ebx,1),%ebp\r
+ movl (%esi,%ebx,1),%edx\r
+ movl 12(%esi,%ebx,1),%ecx\r
+ movl 8(%esi,%ebx,1),%ebx\r
+ xorl %eax,%eax\r
+ movl $15,%edi\r
+ jmp .L000x86_loop\r
+.align 16\r
+.L000x86_loop:\r
+ movb %bl,%al\r
+ shrdl $4,%ecx,%ebx\r
+ andb $15,%al\r
+ shrdl $4,%edx,%ecx\r
+ shrdl $4,%ebp,%edx\r
+ shrl $4,%ebp\r
+ xorl 16(%esp,%eax,4),%ebp\r
+ movb (%esp,%edi,1),%al\r
+ andb $240,%al\r
+ xorl 8(%esi,%eax,1),%ebx\r
+ xorl 12(%esi,%eax,1),%ecx\r
+ xorl (%esi,%eax,1),%edx\r
+ xorl 4(%esi,%eax,1),%ebp\r
+ decl %edi\r
+ js .L001x86_break\r
+ movb %bl,%al\r
+ shrdl $4,%ecx,%ebx\r
+ andb $15,%al\r
+ shrdl $4,%edx,%ecx\r
+ shrdl $4,%ebp,%edx\r
+ shrl $4,%ebp\r
+ xorl 16(%esp,%eax,4),%ebp\r
+ movb (%esp,%edi,1),%al\r
+ shlb $4,%al\r
+ xorl 8(%esi,%eax,1),%ebx\r
+ xorl 12(%esi,%eax,1),%ecx\r
+ xorl (%esi,%eax,1),%edx\r
+ xorl 4(%esi,%eax,1),%ebp\r
+ jmp .L000x86_loop\r
+.align 16\r
+.L001x86_break:\r
+ bswap %ebx\r
+ bswap %ecx\r
+ bswap %edx\r
+ bswap %ebp\r
+ movl 104(%esp),%edi\r
+ movl %ebx,12(%edi)\r
+ movl %ecx,8(%edi)\r
+ movl %edx,4(%edi)\r
+ movl %ebp,(%edi)\r
+ addl $84,%esp\r
+ popl %edi\r
+ popl %esi\r
+ popl %ebx\r
+ popl %ebp\r
+ ret\r
+.size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin\r
+.globl gcm_ghash_4bit_x86\r
+.type gcm_ghash_4bit_x86,@function\r
+.align 16\r
+gcm_ghash_4bit_x86:\r
+.L_gcm_ghash_4bit_x86_begin:\r
+ pushl %ebp\r
+ pushl %ebx\r
+ pushl %esi\r
+ pushl %edi\r
+ subl $84,%esp\r
+ movl 104(%esp),%ebx\r
+ movl 108(%esp),%esi\r
+ movl 112(%esp),%edi\r
+ movl 116(%esp),%ecx\r
+ addl %edi,%ecx\r
+ movl %ecx,116(%esp)\r
+ movl (%ebx),%ebp\r
+ movl 4(%ebx),%edx\r
+ movl 8(%ebx),%ecx\r
+ movl 12(%ebx),%ebx\r
+ movl $0,16(%esp)\r
+ movl $471859200,20(%esp)\r
+ movl $943718400,24(%esp)\r
+ movl $610271232,28(%esp)\r
+ movl $1887436800,32(%esp)\r
+ movl $1822425088,36(%esp)\r
+ movl $1220542464,40(%esp)\r
+ movl $1423966208,44(%esp)\r
+ movl $3774873600,48(%esp)\r
+ movl $4246732800,52(%esp)\r
+ movl $3644850176,56(%esp)\r
+ movl $3311403008,60(%esp)\r
+ movl $2441084928,64(%esp)\r
+ movl $2376073216,68(%esp)\r
+ movl $2847932416,72(%esp)\r
+ movl $3051356160,76(%esp)\r
+.align 16\r
+.L002x86_outer_loop:\r
+ xorl 12(%edi),%ebx\r
+ xorl 8(%edi),%ecx\r
+ xorl 4(%edi),%edx\r
+ xorl (%edi),%ebp\r
+ movl %ebx,12(%esp)\r
+ movl %ecx,8(%esp)\r
+ movl %edx,4(%esp)\r
+ movl %ebp,(%esp)\r
+ shrl $20,%ebx\r
+ andl $240,%ebx\r
+ movl 4(%esi,%ebx,1),%ebp\r
+ movl (%esi,%ebx,1),%edx\r
+ movl 12(%esi,%ebx,1),%ecx\r
+ movl 8(%esi,%ebx,1),%ebx\r
+ xorl %eax,%eax\r
+ movl $15,%edi\r
+ jmp .L003x86_loop\r
+.align 16\r
+.L003x86_loop:\r
+ movb %bl,%al\r
+ shrdl $4,%ecx,%ebx\r
+ andb $15,%al\r
+ shrdl $4,%edx,%ecx\r
+ shrdl $4,%ebp,%edx\r
+ shrl $4,%ebp\r
+ xorl 16(%esp,%eax,4),%ebp\r
+ movb (%esp,%edi,1),%al\r
+ andb $240,%al\r
+ xorl 8(%esi,%eax,1),%ebx\r
+ xorl 12(%esi,%eax,1),%ecx\r
+ xorl (%esi,%eax,1),%edx\r
+ xorl 4(%esi,%eax,1),%ebp\r
+ decl %edi\r
+ js .L004x86_break\r
+ movb %bl,%al\r
+ shrdl $4,%ecx,%ebx\r
+ andb $15,%al\r
+ shrdl $4,%edx,%ecx\r
+ shrdl $4,%ebp,%edx\r
+ shrl $4,%ebp\r
+ xorl 16(%esp,%eax,4),%ebp\r
+ movb (%esp,%edi,1),%al\r
+ shlb $4,%al\r
+ xorl 8(%esi,%eax,1),%ebx\r
+ xorl 12(%esi,%eax,1),%ecx\r
+ xorl (%esi,%eax,1),%edx\r
+ xorl 4(%esi,%eax,1),%ebp\r
+ jmp .L003x86_loop\r
+.align 16\r
+.L004x86_break:\r
+ bswap %ebx\r
+ bswap %ecx\r
+ bswap %edx\r
+ bswap %ebp\r
+ movl 112(%esp),%edi\r
+ leal 16(%edi),%edi\r
+ cmpl 116(%esp),%edi\r
+ movl %edi,112(%esp)\r
+ jb .L002x86_outer_loop\r
+ movl 104(%esp),%edi\r
+ movl %ebx,12(%edi)\r
+ movl %ecx,8(%edi)\r
+ movl %edx,4(%edi)\r
+ movl %ebp,(%edi)\r
+ addl $84,%esp\r
+ popl %edi\r
+ popl %esi\r
+ popl %ebx\r
+ popl %ebp\r
+ ret\r
+.size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin\r
+.type _mmx_gmult_4bit_inner,@function\r
+.align 16\r
+_mmx_gmult_4bit_inner:\r
+ xorl %ecx,%ecx\r
+ movl %ebx,%edx\r
+ movb %dl,%cl\r
+ shlb $4,%cl\r
+ andl $240,%edx\r
+ movq 8(%esi,%ecx,1),%mm0\r
+ movq (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 14(%edi),%cl\r
+ psllq $60,%mm2\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 13(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 12(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 11(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 10(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 9(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 8(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 7(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 6(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 5(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 4(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 3(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 2(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb 1(%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ movb (%edi),%cl\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movl %ecx,%edx\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ shlb $4,%cl\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%ecx,1),%mm0\r
+ psllq $60,%mm2\r
+ andl $240,%edx\r
+ pxor (%eax,%ebp,8),%mm1\r
+ andl $15,%ebx\r
+ pxor (%esi,%ecx,1),%mm1\r
+ movd %mm0,%ebp\r
+ pxor %mm2,%mm0\r
+ psrlq $4,%mm0\r
+ movq %mm1,%mm2\r
+ psrlq $4,%mm1\r
+ pxor 8(%esi,%edx,1),%mm0\r
+ psllq $60,%mm2\r
+ pxor (%eax,%ebx,8),%mm1\r
+ andl $15,%ebp\r
+ pxor (%esi,%edx,1),%mm1\r
+ movd %mm0,%ebx\r
+ pxor %mm2,%mm0\r
+ movl 4(%eax,%ebp,8),%edi\r
+ psrlq $32,%mm0\r
+ movd %mm1,%edx\r
+ psrlq $32,%mm1\r
+ movd %mm0,%ecx\r
+ movd %mm1,%ebp\r
+ shll $4,%edi\r
+ bswap %ebx\r
+ bswap %edx\r
+ bswap %ecx\r
+ xorl %edi,%ebp\r
+ bswap %ebp\r
+ ret\r
+.size _mmx_gmult_4bit_inner,.-_mmx_gmult_4bit_inner\r
+.globl gcm_gmult_4bit_mmx\r
+.type gcm_gmult_4bit_mmx,@function\r
+.align 16\r
+gcm_gmult_4bit_mmx:\r
+.L_gcm_gmult_4bit_mmx_begin:\r
+ pushl %ebp\r
+ pushl %ebx\r
+ pushl %esi\r
+ pushl %edi\r
+ movl 20(%esp),%edi\r
+ movl 24(%esp),%esi\r
+ call .L005pic_point\r
+.L005pic_point:\r
+ popl %eax\r
+ leal .Lrem_4bit-.L005pic_point(%eax),%eax\r
+ movzbl 15(%edi),%ebx\r
+ call _mmx_gmult_4bit_inner\r
+ movl 20(%esp),%edi\r
+ emms\r
+ movl %ebx,12(%edi)\r
+ movl %edx,4(%edi)\r
+ movl %ecx,8(%edi)\r
+ movl %ebp,(%edi)\r
+ popl %edi\r
+ popl %esi\r
+ popl %ebx\r
+ popl %ebp\r
+ ret\r
+.size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin\r
+.globl gcm_ghash_4bit_mmx\r
+.type gcm_ghash_4bit_mmx,@function\r
+.align 16\r
+gcm_ghash_4bit_mmx:\r
+.L_gcm_ghash_4bit_mmx_begin:\r
+ pushl %ebp\r
+ pushl %ebx\r
+ pushl %esi\r
+ pushl %edi\r
+ movl 20(%esp),%ebp\r
+ movl 24(%esp),%esi\r
+ movl 28(%esp),%edi\r
+ movl 32(%esp),%ecx\r
+ call .L006pic_point\r
+.L006pic_point:\r
+ popl %eax\r
+ leal .Lrem_4bit-.L006pic_point(%eax),%eax\r
+ addl %edi,%ecx\r
+ movl %ecx,32(%esp)\r
+ subl $20,%esp\r
+ movl 12(%ebp),%ebx\r
+ movl 4(%ebp),%edx\r
+ movl 8(%ebp),%ecx\r
+ movl (%ebp),%ebp\r
+ jmp .L007mmx_outer_loop\r
+.align 16\r
+.L007mmx_outer_loop:\r
+ xorl 12(%edi),%ebx\r
+ xorl 4(%edi),%edx\r
+ xorl 8(%edi),%ecx\r
+ xorl (%edi),%ebp\r
+ movl %edi,48(%esp)\r
+ movl %ebx,12(%esp)\r
+ movl %edx,4(%esp)\r
+ movl %ecx,8(%esp)\r
+ movl %ebp,(%esp)\r
+ movl %esp,%edi\r
+ shrl $24,%ebx\r
+ call _mmx_gmult_4bit_inner\r
+ movl 48(%esp),%edi\r
+ leal 16(%edi),%edi\r
+ cmpl 52(%esp),%edi\r
+ jb .L007mmx_outer_loop\r
+ movl 40(%esp),%edi\r
+ emms\r
+ movl %ebx,12(%edi)\r
+ movl %edx,4(%edi)\r
+ movl %ecx,8(%edi)\r
+ movl %ebp,(%edi)\r
+ addl $20,%esp\r
+ popl %edi\r
+ popl %esi\r
+ popl %ebx\r
+ popl %ebp\r
+ ret\r
+.size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin\r
+.align 64\r
+.Lrem_4bit:\r
+.long 0,0,0,29491200,0,58982400,0,38141952\r
+.long 0,117964800,0,113901568,0,76283904,0,88997888\r
+.long 0,235929600,0,265420800,0,227803136,0,206962688\r
+.long 0,152567808,0,148504576,0,177995776,0,190709760\r
+.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67\r
+.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112\r
+.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62\r
+.byte 0\r