]> git.proxmox.com Git - mirror_edk2.git/blobdiff - CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/modes/ghash-x86.S
CryptoPkg/OpensslLib: Commit the auto-generated assembly files for IA32
[mirror_edk2.git] / CryptoPkg / Library / OpensslLib / IA32Gcc / crypto / modes / ghash-x86.S
diff --git a/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/modes/ghash-x86.S b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/modes/ghash-x86.S
new file mode 100644 (file)
index 0000000..f52b445
--- /dev/null
@@ -0,0 +1,703 @@
+# WARNING: do not edit!\r
+# Generated from openssl/crypto/modes/asm/ghash-x86.pl\r
+#\r
+# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.\r
+#\r
+# Licensed under the OpenSSL license (the "License").  You may not use\r
+# this file except in compliance with the License.  You can obtain a copy\r
+# in the file LICENSE in the source distribution or at\r
+# https://www.openssl.org/source/license.html\r
+\r
+.text\r
+.globl  gcm_gmult_4bit_x86\r
+.type   gcm_gmult_4bit_x86,@function\r
+.align  16\r
+gcm_gmult_4bit_x86:\r
+.L_gcm_gmult_4bit_x86_begin:\r
+        pushl   %ebp\r
+        pushl   %ebx\r
+        pushl   %esi\r
+        pushl   %edi\r
+        subl    $84,%esp\r
+        movl    104(%esp),%edi\r
+        movl    108(%esp),%esi\r
+        movl    (%edi),%ebp\r
+        movl    4(%edi),%edx\r
+        movl    8(%edi),%ecx\r
+        movl    12(%edi),%ebx\r
+        movl    $0,16(%esp)\r
+        movl    $471859200,20(%esp)\r
+        movl    $943718400,24(%esp)\r
+        movl    $610271232,28(%esp)\r
+        movl    $1887436800,32(%esp)\r
+        movl    $1822425088,36(%esp)\r
+        movl    $1220542464,40(%esp)\r
+        movl    $1423966208,44(%esp)\r
+        movl    $3774873600,48(%esp)\r
+        movl    $4246732800,52(%esp)\r
+        movl    $3644850176,56(%esp)\r
+        movl    $3311403008,60(%esp)\r
+        movl    $2441084928,64(%esp)\r
+        movl    $2376073216,68(%esp)\r
+        movl    $2847932416,72(%esp)\r
+        movl    $3051356160,76(%esp)\r
+        movl    %ebp,(%esp)\r
+        movl    %edx,4(%esp)\r
+        movl    %ecx,8(%esp)\r
+        movl    %ebx,12(%esp)\r
+        shrl    $20,%ebx\r
+        andl    $240,%ebx\r
+        movl    4(%esi,%ebx,1),%ebp\r
+        movl    (%esi,%ebx,1),%edx\r
+        movl    12(%esi,%ebx,1),%ecx\r
+        movl    8(%esi,%ebx,1),%ebx\r
+        xorl    %eax,%eax\r
+        movl    $15,%edi\r
+        jmp     .L000x86_loop\r
+.align  16\r
+.L000x86_loop:\r
+        movb    %bl,%al\r
+        shrdl   $4,%ecx,%ebx\r
+        andb    $15,%al\r
+        shrdl   $4,%edx,%ecx\r
+        shrdl   $4,%ebp,%edx\r
+        shrl    $4,%ebp\r
+        xorl    16(%esp,%eax,4),%ebp\r
+        movb    (%esp,%edi,1),%al\r
+        andb    $240,%al\r
+        xorl    8(%esi,%eax,1),%ebx\r
+        xorl    12(%esi,%eax,1),%ecx\r
+        xorl    (%esi,%eax,1),%edx\r
+        xorl    4(%esi,%eax,1),%ebp\r
+        decl    %edi\r
+        js      .L001x86_break\r
+        movb    %bl,%al\r
+        shrdl   $4,%ecx,%ebx\r
+        andb    $15,%al\r
+        shrdl   $4,%edx,%ecx\r
+        shrdl   $4,%ebp,%edx\r
+        shrl    $4,%ebp\r
+        xorl    16(%esp,%eax,4),%ebp\r
+        movb    (%esp,%edi,1),%al\r
+        shlb    $4,%al\r
+        xorl    8(%esi,%eax,1),%ebx\r
+        xorl    12(%esi,%eax,1),%ecx\r
+        xorl    (%esi,%eax,1),%edx\r
+        xorl    4(%esi,%eax,1),%ebp\r
+        jmp     .L000x86_loop\r
+.align  16\r
+.L001x86_break:\r
+        bswap   %ebx\r
+        bswap   %ecx\r
+        bswap   %edx\r
+        bswap   %ebp\r
+        movl    104(%esp),%edi\r
+        movl    %ebx,12(%edi)\r
+        movl    %ecx,8(%edi)\r
+        movl    %edx,4(%edi)\r
+        movl    %ebp,(%edi)\r
+        addl    $84,%esp\r
+        popl    %edi\r
+        popl    %esi\r
+        popl    %ebx\r
+        popl    %ebp\r
+        ret\r
+.size   gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin\r
+.globl  gcm_ghash_4bit_x86\r
+.type   gcm_ghash_4bit_x86,@function\r
+.align  16\r
+gcm_ghash_4bit_x86:\r
+.L_gcm_ghash_4bit_x86_begin:\r
+        pushl   %ebp\r
+        pushl   %ebx\r
+        pushl   %esi\r
+        pushl   %edi\r
+        subl    $84,%esp\r
+        movl    104(%esp),%ebx\r
+        movl    108(%esp),%esi\r
+        movl    112(%esp),%edi\r
+        movl    116(%esp),%ecx\r
+        addl    %edi,%ecx\r
+        movl    %ecx,116(%esp)\r
+        movl    (%ebx),%ebp\r
+        movl    4(%ebx),%edx\r
+        movl    8(%ebx),%ecx\r
+        movl    12(%ebx),%ebx\r
+        movl    $0,16(%esp)\r
+        movl    $471859200,20(%esp)\r
+        movl    $943718400,24(%esp)\r
+        movl    $610271232,28(%esp)\r
+        movl    $1887436800,32(%esp)\r
+        movl    $1822425088,36(%esp)\r
+        movl    $1220542464,40(%esp)\r
+        movl    $1423966208,44(%esp)\r
+        movl    $3774873600,48(%esp)\r
+        movl    $4246732800,52(%esp)\r
+        movl    $3644850176,56(%esp)\r
+        movl    $3311403008,60(%esp)\r
+        movl    $2441084928,64(%esp)\r
+        movl    $2376073216,68(%esp)\r
+        movl    $2847932416,72(%esp)\r
+        movl    $3051356160,76(%esp)\r
+.align  16\r
+.L002x86_outer_loop:\r
+        xorl    12(%edi),%ebx\r
+        xorl    8(%edi),%ecx\r
+        xorl    4(%edi),%edx\r
+        xorl    (%edi),%ebp\r
+        movl    %ebx,12(%esp)\r
+        movl    %ecx,8(%esp)\r
+        movl    %edx,4(%esp)\r
+        movl    %ebp,(%esp)\r
+        shrl    $20,%ebx\r
+        andl    $240,%ebx\r
+        movl    4(%esi,%ebx,1),%ebp\r
+        movl    (%esi,%ebx,1),%edx\r
+        movl    12(%esi,%ebx,1),%ecx\r
+        movl    8(%esi,%ebx,1),%ebx\r
+        xorl    %eax,%eax\r
+        movl    $15,%edi\r
+        jmp     .L003x86_loop\r
+.align  16\r
+.L003x86_loop:\r
+        movb    %bl,%al\r
+        shrdl   $4,%ecx,%ebx\r
+        andb    $15,%al\r
+        shrdl   $4,%edx,%ecx\r
+        shrdl   $4,%ebp,%edx\r
+        shrl    $4,%ebp\r
+        xorl    16(%esp,%eax,4),%ebp\r
+        movb    (%esp,%edi,1),%al\r
+        andb    $240,%al\r
+        xorl    8(%esi,%eax,1),%ebx\r
+        xorl    12(%esi,%eax,1),%ecx\r
+        xorl    (%esi,%eax,1),%edx\r
+        xorl    4(%esi,%eax,1),%ebp\r
+        decl    %edi\r
+        js      .L004x86_break\r
+        movb    %bl,%al\r
+        shrdl   $4,%ecx,%ebx\r
+        andb    $15,%al\r
+        shrdl   $4,%edx,%ecx\r
+        shrdl   $4,%ebp,%edx\r
+        shrl    $4,%ebp\r
+        xorl    16(%esp,%eax,4),%ebp\r
+        movb    (%esp,%edi,1),%al\r
+        shlb    $4,%al\r
+        xorl    8(%esi,%eax,1),%ebx\r
+        xorl    12(%esi,%eax,1),%ecx\r
+        xorl    (%esi,%eax,1),%edx\r
+        xorl    4(%esi,%eax,1),%ebp\r
+        jmp     .L003x86_loop\r
+.align  16\r
+.L004x86_break:\r
+        bswap   %ebx\r
+        bswap   %ecx\r
+        bswap   %edx\r
+        bswap   %ebp\r
+        movl    112(%esp),%edi\r
+        leal    16(%edi),%edi\r
+        cmpl    116(%esp),%edi\r
+        movl    %edi,112(%esp)\r
+        jb      .L002x86_outer_loop\r
+        movl    104(%esp),%edi\r
+        movl    %ebx,12(%edi)\r
+        movl    %ecx,8(%edi)\r
+        movl    %edx,4(%edi)\r
+        movl    %ebp,(%edi)\r
+        addl    $84,%esp\r
+        popl    %edi\r
+        popl    %esi\r
+        popl    %ebx\r
+        popl    %ebp\r
+        ret\r
+.size   gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin\r
+.type   _mmx_gmult_4bit_inner,@function\r
+.align  16\r
+_mmx_gmult_4bit_inner:\r
+        xorl    %ecx,%ecx\r
+        movl    %ebx,%edx\r
+        movb    %dl,%cl\r
+        shlb    $4,%cl\r
+        andl    $240,%edx\r
+        movq    8(%esi,%ecx,1),%mm0\r
+        movq    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    14(%edi),%cl\r
+        psllq   $60,%mm2\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    13(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    12(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    11(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    10(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    9(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    8(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    7(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    6(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    5(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    4(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    3(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    2(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    1(%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        movb    (%edi),%cl\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movl    %ecx,%edx\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        shlb    $4,%cl\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%ecx,1),%mm0\r
+        psllq   $60,%mm2\r
+        andl    $240,%edx\r
+        pxor    (%eax,%ebp,8),%mm1\r
+        andl    $15,%ebx\r
+        pxor    (%esi,%ecx,1),%mm1\r
+        movd    %mm0,%ebp\r
+        pxor    %mm2,%mm0\r
+        psrlq   $4,%mm0\r
+        movq    %mm1,%mm2\r
+        psrlq   $4,%mm1\r
+        pxor    8(%esi,%edx,1),%mm0\r
+        psllq   $60,%mm2\r
+        pxor    (%eax,%ebx,8),%mm1\r
+        andl    $15,%ebp\r
+        pxor    (%esi,%edx,1),%mm1\r
+        movd    %mm0,%ebx\r
+        pxor    %mm2,%mm0\r
+        movl    4(%eax,%ebp,8),%edi\r
+        psrlq   $32,%mm0\r
+        movd    %mm1,%edx\r
+        psrlq   $32,%mm1\r
+        movd    %mm0,%ecx\r
+        movd    %mm1,%ebp\r
+        shll    $4,%edi\r
+        bswap   %ebx\r
+        bswap   %edx\r
+        bswap   %ecx\r
+        xorl    %edi,%ebp\r
+        bswap   %ebp\r
+        ret\r
+.size   _mmx_gmult_4bit_inner,.-_mmx_gmult_4bit_inner\r
+.globl  gcm_gmult_4bit_mmx\r
+.type   gcm_gmult_4bit_mmx,@function\r
+.align  16\r
+gcm_gmult_4bit_mmx:\r
+.L_gcm_gmult_4bit_mmx_begin:\r
+        pushl   %ebp\r
+        pushl   %ebx\r
+        pushl   %esi\r
+        pushl   %edi\r
+        movl    20(%esp),%edi\r
+        movl    24(%esp),%esi\r
+        call    .L005pic_point\r
+.L005pic_point:\r
+        popl    %eax\r
+        leal    .Lrem_4bit-.L005pic_point(%eax),%eax\r
+        movzbl  15(%edi),%ebx\r
+        call    _mmx_gmult_4bit_inner\r
+        movl    20(%esp),%edi\r
+        emms\r
+        movl    %ebx,12(%edi)\r
+        movl    %edx,4(%edi)\r
+        movl    %ecx,8(%edi)\r
+        movl    %ebp,(%edi)\r
+        popl    %edi\r
+        popl    %esi\r
+        popl    %ebx\r
+        popl    %ebp\r
+        ret\r
+.size   gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin\r
+.globl  gcm_ghash_4bit_mmx\r
+.type   gcm_ghash_4bit_mmx,@function\r
+.align  16\r
+gcm_ghash_4bit_mmx:\r
+.L_gcm_ghash_4bit_mmx_begin:\r
+        pushl   %ebp\r
+        pushl   %ebx\r
+        pushl   %esi\r
+        pushl   %edi\r
+        movl    20(%esp),%ebp\r
+        movl    24(%esp),%esi\r
+        movl    28(%esp),%edi\r
+        movl    32(%esp),%ecx\r
+        call    .L006pic_point\r
+.L006pic_point:\r
+        popl    %eax\r
+        leal    .Lrem_4bit-.L006pic_point(%eax),%eax\r
+        addl    %edi,%ecx\r
+        movl    %ecx,32(%esp)\r
+        subl    $20,%esp\r
+        movl    12(%ebp),%ebx\r
+        movl    4(%ebp),%edx\r
+        movl    8(%ebp),%ecx\r
+        movl    (%ebp),%ebp\r
+        jmp     .L007mmx_outer_loop\r
+.align  16\r
+.L007mmx_outer_loop:\r
+        xorl    12(%edi),%ebx\r
+        xorl    4(%edi),%edx\r
+        xorl    8(%edi),%ecx\r
+        xorl    (%edi),%ebp\r
+        movl    %edi,48(%esp)\r
+        movl    %ebx,12(%esp)\r
+        movl    %edx,4(%esp)\r
+        movl    %ecx,8(%esp)\r
+        movl    %ebp,(%esp)\r
+        movl    %esp,%edi\r
+        shrl    $24,%ebx\r
+        call    _mmx_gmult_4bit_inner\r
+        movl    48(%esp),%edi\r
+        leal    16(%edi),%edi\r
+        cmpl    52(%esp),%edi\r
+        jb      .L007mmx_outer_loop\r
+        movl    40(%esp),%edi\r
+        emms\r
+        movl    %ebx,12(%edi)\r
+        movl    %edx,4(%edi)\r
+        movl    %ecx,8(%edi)\r
+        movl    %ebp,(%edi)\r
+        addl    $20,%esp\r
+        popl    %edi\r
+        popl    %esi\r
+        popl    %ebx\r
+        popl    %ebp\r
+        ret\r
+.size   gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin\r
+.align  64\r
+.Lrem_4bit:\r
+.long   0,0,0,29491200,0,58982400,0,38141952\r
+.long   0,117964800,0,113901568,0,76283904,0,88997888\r
+.long   0,235929600,0,265420800,0,227803136,0,206962688\r
+.long   0,152567808,0,148504576,0,177995776,0,190709760\r
+.byte   71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67\r
+.byte   82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112\r
+.byte   112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62\r
+.byte   0\r