crypto: x86/salsa20 - remove x86 salsa20 implementations

author Eric Biggers <ebiggers@google.com>

Sat, 26 May 2018 07:08:58 +0000 (00:08 -0700)

committer Kleber Sacilotto de Souza <kleber.souza@canonical.com>

Mon, 14 Jan 2019 09:28:55 +0000 (09:28 +0000)
author Eric Biggers <ebiggers@google.com>
Sat, 26 May 2018 07:08:58 +0000 (00:08 -0700)
committer Kleber Sacilotto de Souza <kleber.souza@canonical.com>
Mon, 14 Jan 2019 09:28:55 +0000 (09:28 +0000)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile

index 5f07333bb224c12c8c8d23407faa1265db9afa7b..9c903a420cdab7e86f480f11335c10ceea75d512 100644 (file)
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -15,7 +15,6 @@ obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
  
  obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
  obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
-obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
  obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
  
  obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
@@ -24,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
  obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
  obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
  obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
-obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
  obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
  obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
  obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
@@ -59,7 +57,6 @@ endif
  
  aes-i586-y := aes-i586-asm_32.o aes_glue.o
  twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
-salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
  serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
  
  aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
@@ -68,7 +65,6 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
  blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
  twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
  twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
-salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
  chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
  serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
  
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S

deleted file mode 100644 (file)

index 329452b..0000000
--- a/arch/x86/crypto/salsa20-i586-asm_32.S
+++ /dev/null
@@ -1,1114 +0,0 @@
-# salsa20_pm.s version 20051229
-# D. J. Bernstein
-# Public domain.
-
-#include <linux/linkage.h>
-
-.text
-
-# enter salsa20_encrypt_bytes
-ENTRY(salsa20_encrypt_bytes)
-       mov     %esp,%eax
-       and     $31,%eax
-       add     $256,%eax
-       sub     %eax,%esp
-       # eax_stack = eax
-       movl    %eax,80(%esp)
-       # ebx_stack = ebx
-       movl    %ebx,84(%esp)
-       # esi_stack = esi
-       movl    %esi,88(%esp)
-       # edi_stack = edi
-       movl    %edi,92(%esp)
-       # ebp_stack = ebp
-       movl    %ebp,96(%esp)
-       # x = arg1
-       movl    4(%esp,%eax),%edx
-       # m = arg2
-       movl    8(%esp,%eax),%esi
-       # out = arg3
-       movl    12(%esp,%eax),%edi
-       # bytes = arg4
-       movl    16(%esp,%eax),%ebx
-       # bytes -= 0
-       sub     $0,%ebx
-       # goto done if unsigned<=
-       jbe     ._done
-._start:
-       # in0 = *(uint32 *) (x + 0)
-       movl    0(%edx),%eax
-       # in1 = *(uint32 *) (x + 4)
-       movl    4(%edx),%ecx
-       # in2 = *(uint32 *) (x + 8)
-       movl    8(%edx),%ebp
-       # j0 = in0
-       movl    %eax,164(%esp)
-       # in3 = *(uint32 *) (x + 12)
-       movl    12(%edx),%eax
-       # j1 = in1
-       movl    %ecx,168(%esp)
-       # in4 = *(uint32 *) (x + 16)
-       movl    16(%edx),%ecx
-       # j2 = in2
-       movl    %ebp,172(%esp)
-       # in5 = *(uint32 *) (x + 20)
-       movl    20(%edx),%ebp
-       # j3 = in3
-       movl    %eax,176(%esp)
-       # in6 = *(uint32 *) (x + 24)
-       movl    24(%edx),%eax
-       # j4 = in4
-       movl    %ecx,180(%esp)
-       # in7 = *(uint32 *) (x + 28)
-       movl    28(%edx),%ecx
-       # j5 = in5
-       movl    %ebp,184(%esp)
-       # in8 = *(uint32 *) (x + 32)
-       movl    32(%edx),%ebp
-       # j6 = in6
-       movl    %eax,188(%esp)
-       # in9 = *(uint32 *) (x + 36)
-       movl    36(%edx),%eax
-       # j7 = in7
-       movl    %ecx,192(%esp)
-       # in10 = *(uint32 *) (x + 40)
-       movl    40(%edx),%ecx
-       # j8 = in8
-       movl    %ebp,196(%esp)
-       # in11 = *(uint32 *) (x + 44)
-       movl    44(%edx),%ebp
-       # j9 = in9
-       movl    %eax,200(%esp)
-       # in12 = *(uint32 *) (x + 48)
-       movl    48(%edx),%eax
-       # j10 = in10
-       movl    %ecx,204(%esp)
-       # in13 = *(uint32 *) (x + 52)
-       movl    52(%edx),%ecx
-       # j11 = in11
-       movl    %ebp,208(%esp)
-       # in14 = *(uint32 *) (x + 56)
-       movl    56(%edx),%ebp
-       # j12 = in12
-       movl    %eax,212(%esp)
-       # in15 = *(uint32 *) (x + 60)
-       movl    60(%edx),%eax
-       # j13 = in13
-       movl    %ecx,216(%esp)
-       # j14 = in14
-       movl    %ebp,220(%esp)
-       # j15 = in15
-       movl    %eax,224(%esp)
-       # x_backup = x
-       movl    %edx,64(%esp)
-._bytesatleast1:
-       #   bytes - 64
-       cmp     $64,%ebx
-       #   goto nocopy if unsigned>=
-       jae     ._nocopy
-       #     ctarget = out
-       movl    %edi,228(%esp)
-       #     out = &tmp
-       leal    0(%esp),%edi
-       #     i = bytes
-       mov     %ebx,%ecx
-       #     while (i) { *out++ = *m++; --i }
-       rep     movsb
-       #     out = &tmp
-       leal    0(%esp),%edi
-       #     m = &tmp
-       leal    0(%esp),%esi
-._nocopy:
-       #   out_backup = out
-       movl    %edi,72(%esp)
-       #   m_backup = m
-       movl    %esi,68(%esp)
-       #   bytes_backup = bytes
-       movl    %ebx,76(%esp)
-       #   in0 = j0
-       movl    164(%esp),%eax
-       #   in1 = j1
-       movl    168(%esp),%ecx
-       #   in2 = j2
-       movl    172(%esp),%edx
-       #   in3 = j3
-       movl    176(%esp),%ebx
-       #   x0 = in0
-       movl    %eax,100(%esp)
-       #   x1 = in1
-       movl    %ecx,104(%esp)
-       #   x2 = in2
-       movl    %edx,108(%esp)
-       #   x3 = in3
-       movl    %ebx,112(%esp)
-       #   in4 = j4
-       movl    180(%esp),%eax
-       #   in5 = j5
-       movl    184(%esp),%ecx
-       #   in6 = j6
-       movl    188(%esp),%edx
-       #   in7 = j7
-       movl    192(%esp),%ebx
-       #   x4 = in4
-       movl    %eax,116(%esp)
-       #   x5 = in5
-       movl    %ecx,120(%esp)
-       #   x6 = in6
-       movl    %edx,124(%esp)
-       #   x7 = in7
-       movl    %ebx,128(%esp)
-       #   in8 = j8
-       movl    196(%esp),%eax
-       #   in9 = j9
-       movl    200(%esp),%ecx
-       #   in10 = j10
-       movl    204(%esp),%edx
-       #   in11 = j11
-       movl    208(%esp),%ebx
-       #   x8 = in8
-       movl    %eax,132(%esp)
-       #   x9 = in9
-       movl    %ecx,136(%esp)
-       #   x10 = in10
-       movl    %edx,140(%esp)
-       #   x11 = in11
-       movl    %ebx,144(%esp)
-       #   in12 = j12
-       movl    212(%esp),%eax
-       #   in13 = j13
-       movl    216(%esp),%ecx
-       #   in14 = j14
-       movl    220(%esp),%edx
-       #   in15 = j15
-       movl    224(%esp),%ebx
-       #   x12 = in12
-       movl    %eax,148(%esp)
-       #   x13 = in13
-       movl    %ecx,152(%esp)
-       #   x14 = in14
-       movl    %edx,156(%esp)
-       #   x15 = in15
-       movl    %ebx,160(%esp)
-       #   i = 20
-       mov     $20,%ebp
-       # p = x0
-       movl    100(%esp),%eax
-       # s = x5
-       movl    120(%esp),%ecx
-       # t = x10
-       movl    140(%esp),%edx
-       # w = x15
-       movl    160(%esp),%ebx
-._mainloop:
-       # x0 = p
-       movl    %eax,100(%esp)
-       #                               x10 = t
-       movl    %edx,140(%esp)
-       # p += x12
-       addl    148(%esp),%eax
-       #               x5 = s
-       movl    %ecx,120(%esp)
-       #                               t += x6
-       addl    124(%esp),%edx
-       #                                               x15 = w
-       movl    %ebx,160(%esp)
-       #               r = x1
-       movl    104(%esp),%esi
-       #               r += s
-       add     %ecx,%esi
-       #                                               v = x11
-       movl    144(%esp),%edi
-       #                                               v += w
-       add     %ebx,%edi
-       # p <<<= 7
-       rol     $7,%eax
-       # p ^= x4
-       xorl    116(%esp),%eax
-       #                               t <<<= 7
-       rol     $7,%edx
-       #                               t ^= x14
-       xorl    156(%esp),%edx
-       #               r <<<= 7
-       rol     $7,%esi
-       #               r ^= x9
-       xorl    136(%esp),%esi
-       #                                               v <<<= 7
-       rol     $7,%edi
-       #                                               v ^= x3
-       xorl    112(%esp),%edi
-       # x4 = p
-       movl    %eax,116(%esp)
-       #                               x14 = t
-       movl    %edx,156(%esp)
-       # p += x0
-       addl    100(%esp),%eax
-       #               x9 = r
-       movl    %esi,136(%esp)
-       #                               t += x10
-       addl    140(%esp),%edx
-       #                                               x3 = v
-       movl    %edi,112(%esp)
-       # p <<<= 9
-       rol     $9,%eax
-       # p ^= x8
-       xorl    132(%esp),%eax
-       #                               t <<<= 9
-       rol     $9,%edx
-       #                               t ^= x2
-       xorl    108(%esp),%edx
-       #               s += r
-       add     %esi,%ecx
-       #               s <<<= 9
-       rol     $9,%ecx
-       #               s ^= x13
-       xorl    152(%esp),%ecx
-       #                                               w += v
-       add     %edi,%ebx
-       #                                               w <<<= 9
-       rol     $9,%ebx
-       #                                               w ^= x7
-       xorl    128(%esp),%ebx
-       # x8 = p
-       movl    %eax,132(%esp)
-       #                               x2 = t
-       movl    %edx,108(%esp)
-       # p += x4
-       addl    116(%esp),%eax
-       #               x13 = s
-       movl    %ecx,152(%esp)
-       #                               t += x14
-       addl    156(%esp),%edx
-       #                                               x7 = w
-       movl    %ebx,128(%esp)
-       # p <<<= 13
-       rol     $13,%eax
-       # p ^= x12
-       xorl    148(%esp),%eax
-       #                               t <<<= 13
-       rol     $13,%edx
-       #                               t ^= x6
-       xorl    124(%esp),%edx
-       #               r += s
-       add     %ecx,%esi
-       #               r <<<= 13
-       rol     $13,%esi
-       #               r ^= x1
-       xorl    104(%esp),%esi
-       #                                               v += w
-       add     %ebx,%edi
-       #                                               v <<<= 13
-       rol     $13,%edi
-       #                                               v ^= x11
-       xorl    144(%esp),%edi
-       # x12 = p
-       movl    %eax,148(%esp)
-       #                               x6 = t
-       movl    %edx,124(%esp)
-       # p += x8
-       addl    132(%esp),%eax
-       #               x1 = r
-       movl    %esi,104(%esp)
-       #                               t += x2
-       addl    108(%esp),%edx
-       #                                               x11 = v
-       movl    %edi,144(%esp)
-       # p <<<= 18
-       rol     $18,%eax
-       # p ^= x0
-       xorl    100(%esp),%eax
-       #                               t <<<= 18
-       rol     $18,%edx
-       #                               t ^= x10
-       xorl    140(%esp),%edx
-       #               s += r
-       add     %esi,%ecx
-       #               s <<<= 18
-       rol     $18,%ecx
-       #               s ^= x5
-       xorl    120(%esp),%ecx
-       #                                               w += v
-       add     %edi,%ebx
-       #                                               w <<<= 18
-       rol     $18,%ebx
-       #                                               w ^= x15
-       xorl    160(%esp),%ebx
-       # x0 = p
-       movl    %eax,100(%esp)
-       #                               x10 = t
-       movl    %edx,140(%esp)
-       # p += x3
-       addl    112(%esp),%eax
-       # p <<<= 7
-       rol     $7,%eax
-       #               x5 = s
-       movl    %ecx,120(%esp)
-       #                               t += x9
-       addl    136(%esp),%edx
-       #                                               x15 = w
-       movl    %ebx,160(%esp)
-       #               r = x4
-       movl    116(%esp),%esi
-       #               r += s
-       add     %ecx,%esi
-       #                                               v = x14
-       movl    156(%esp),%edi
-       #                                               v += w
-       add     %ebx,%edi
-       # p ^= x1
-       xorl    104(%esp),%eax
-       #                               t <<<= 7
-       rol     $7,%edx
-       #                               t ^= x11
-       xorl    144(%esp),%edx
-       #               r <<<= 7
-       rol     $7,%esi
-       #               r ^= x6
-       xorl    124(%esp),%esi
-       #                                               v <<<= 7
-       rol     $7,%edi
-       #                                               v ^= x12
-       xorl    148(%esp),%edi
-       # x1 = p
-       movl    %eax,104(%esp)
-       #                               x11 = t
-       movl    %edx,144(%esp)
-       # p += x0
-       addl    100(%esp),%eax
-       #               x6 = r
-       movl    %esi,124(%esp)
-       #                               t += x10
-       addl    140(%esp),%edx
-       #                                               x12 = v
-       movl    %edi,148(%esp)
-       # p <<<= 9
-       rol     $9,%eax
-       # p ^= x2
-       xorl    108(%esp),%eax
-       #                               t <<<= 9
-       rol     $9,%edx
-       #                               t ^= x8
-       xorl    132(%esp),%edx
-       #               s += r
-       add     %esi,%ecx
-       #               s <<<= 9
-       rol     $9,%ecx
-       #               s ^= x7
-       xorl    128(%esp),%ecx
-       #                                               w += v
-       add     %edi,%ebx
-       #                                               w <<<= 9
-       rol     $9,%ebx
-       #                                               w ^= x13
-       xorl    152(%esp),%ebx
-       # x2 = p
-       movl    %eax,108(%esp)
-       #                               x8 = t
-       movl    %edx,132(%esp)
-       # p += x1
-       addl    104(%esp),%eax
-       #               x7 = s
-       movl    %ecx,128(%esp)
-       #                               t += x11
-       addl    144(%esp),%edx
-       #                                               x13 = w
-       movl    %ebx,152(%esp)
-       # p <<<= 13
-       rol     $13,%eax
-       # p ^= x3
-       xorl    112(%esp),%eax
-       #                               t <<<= 13
-       rol     $13,%edx
-       #                               t ^= x9
-       xorl    136(%esp),%edx
-       #               r += s
-       add     %ecx,%esi
-       #               r <<<= 13
-       rol     $13,%esi
-       #               r ^= x4
-       xorl    116(%esp),%esi
-       #                                               v += w
-       add     %ebx,%edi
-       #                                               v <<<= 13
-       rol     $13,%edi
-       #                                               v ^= x14
-       xorl    156(%esp),%edi
-       # x3 = p
-       movl    %eax,112(%esp)
-       #                               x9 = t
-       movl    %edx,136(%esp)
-       # p += x2
-       addl    108(%esp),%eax
-       #               x4 = r
-       movl    %esi,116(%esp)
-       #                               t += x8
-       addl    132(%esp),%edx
-       #                                               x14 = v
-       movl    %edi,156(%esp)
-       # p <<<= 18
-       rol     $18,%eax
-       # p ^= x0
-       xorl    100(%esp),%eax
-       #                               t <<<= 18
-       rol     $18,%edx
-       #                               t ^= x10
-       xorl    140(%esp),%edx
-       #               s += r
-       add     %esi,%ecx
-       #               s <<<= 18
-       rol     $18,%ecx
-       #               s ^= x5
-       xorl    120(%esp),%ecx
-       #                                               w += v
-       add     %edi,%ebx
-       #                                               w <<<= 18
-       rol     $18,%ebx
-       #                                               w ^= x15
-       xorl    160(%esp),%ebx
-       # x0 = p
-       movl    %eax,100(%esp)
-       #                               x10 = t
-       movl    %edx,140(%esp)
-       # p += x12
-       addl    148(%esp),%eax
-       #               x5 = s
-       movl    %ecx,120(%esp)
-       #                               t += x6
-       addl    124(%esp),%edx
-       #                                               x15 = w
-       movl    %ebx,160(%esp)
-       #               r = x1
-       movl    104(%esp),%esi
-       #               r += s
-       add     %ecx,%esi
-       #                                               v = x11
-       movl    144(%esp),%edi
-       #                                               v += w
-       add     %ebx,%edi
-       # p <<<= 7
-       rol     $7,%eax
-       # p ^= x4
-       xorl    116(%esp),%eax
-       #                               t <<<= 7
-       rol     $7,%edx
-       #                               t ^= x14
-       xorl    156(%esp),%edx
-       #               r <<<= 7
-       rol     $7,%esi
-       #               r ^= x9
-       xorl    136(%esp),%esi
-       #                                               v <<<= 7
-       rol     $7,%edi
-       #                                               v ^= x3
-       xorl    112(%esp),%edi
-       # x4 = p
-       movl    %eax,116(%esp)
-       #                               x14 = t
-       movl    %edx,156(%esp)
-       # p += x0
-       addl    100(%esp),%eax
-       #               x9 = r
-       movl    %esi,136(%esp)
-       #                               t += x10
-       addl    140(%esp),%edx
-       #                                               x3 = v
-       movl    %edi,112(%esp)
-       # p <<<= 9
-       rol     $9,%eax
-       # p ^= x8
-       xorl    132(%esp),%eax
-       #                               t <<<= 9
-       rol     $9,%edx
-       #                               t ^= x2
-       xorl    108(%esp),%edx
-       #               s += r
-       add     %esi,%ecx
-       #               s <<<= 9
-       rol     $9,%ecx
-       #               s ^= x13
-       xorl    152(%esp),%ecx
-       #                                               w += v
-       add     %edi,%ebx
-       #                                               w <<<= 9
-       rol     $9,%ebx
-       #                                               w ^= x7
-       xorl    128(%esp),%ebx
-       # x8 = p
-       movl    %eax,132(%esp)
-       #                               x2 = t
-       movl    %edx,108(%esp)
-       # p += x4
-       addl    116(%esp),%eax
-       #               x13 = s
-       movl    %ecx,152(%esp)
-       #                               t += x14
-       addl    156(%esp),%edx
-       #                                               x7 = w
-       movl    %ebx,128(%esp)
-       # p <<<= 13
-       rol     $13,%eax
-       # p ^= x12
-       xorl    148(%esp),%eax
-       #                               t <<<= 13
-       rol     $13,%edx
-       #                               t ^= x6
-       xorl    124(%esp),%edx
-       #               r += s
-       add     %ecx,%esi
-       #               r <<<= 13
-       rol     $13,%esi
-       #               r ^= x1
-       xorl    104(%esp),%esi
-       #                                               v += w
-       add     %ebx,%edi
-       #                                               v <<<= 13
-       rol     $13,%edi
-       #                                               v ^= x11
-       xorl    144(%esp),%edi
-       # x12 = p
-       movl    %eax,148(%esp)
-       #                               x6 = t
-       movl    %edx,124(%esp)
-       # p += x8
-       addl    132(%esp),%eax
-       #               x1 = r
-       movl    %esi,104(%esp)
-       #                               t += x2
-       addl    108(%esp),%edx
-       #                                               x11 = v
-       movl    %edi,144(%esp)
-       # p <<<= 18
-       rol     $18,%eax
-       # p ^= x0
-       xorl    100(%esp),%eax
-       #                               t <<<= 18
-       rol     $18,%edx
-       #                               t ^= x10
-       xorl    140(%esp),%edx
-       #               s += r
-       add     %esi,%ecx
-       #               s <<<= 18
-       rol     $18,%ecx
-       #               s ^= x5
-       xorl    120(%esp),%ecx
-       #                                               w += v
-       add     %edi,%ebx
-       #                                               w <<<= 18
-       rol     $18,%ebx
-       #                                               w ^= x15
-       xorl    160(%esp),%ebx
-       # x0 = p
-       movl    %eax,100(%esp)
-       #                               x10 = t
-       movl    %edx,140(%esp)
-       # p += x3
-       addl    112(%esp),%eax
-       # p <<<= 7
-       rol     $7,%eax
-       #               x5 = s
-       movl    %ecx,120(%esp)
-       #                               t += x9
-       addl    136(%esp),%edx
-       #                                               x15 = w
-       movl    %ebx,160(%esp)
-       #               r = x4
-       movl    116(%esp),%esi
-       #               r += s
-       add     %ecx,%esi
-       #                                               v = x14
-       movl    156(%esp),%edi
-       #                                               v += w
-       add     %ebx,%edi
-       # p ^= x1
-       xorl    104(%esp),%eax
-       #                               t <<<= 7
-       rol     $7,%edx
-       #                               t ^= x11
-       xorl    144(%esp),%edx
-       #               r <<<= 7
-       rol     $7,%esi
-       #               r ^= x6
-       xorl    124(%esp),%esi
-       #                                               v <<<= 7
-       rol     $7,%edi
-       #                                               v ^= x12
-       xorl    148(%esp),%edi
-       # x1 = p
-       movl    %eax,104(%esp)
-       #                               x11 = t
-       movl    %edx,144(%esp)
-       # p += x0
-       addl    100(%esp),%eax
-       #               x6 = r
-       movl    %esi,124(%esp)
-       #                               t += x10
-       addl    140(%esp),%edx
-       #                                               x12 = v
-       movl    %edi,148(%esp)
-       # p <<<= 9
-       rol     $9,%eax
-       # p ^= x2
-       xorl    108(%esp),%eax
-       #                               t <<<= 9
-       rol     $9,%edx
-       #                               t ^= x8
-       xorl    132(%esp),%edx
-       #               s += r
-       add     %esi,%ecx
-       #               s <<<= 9
-       rol     $9,%ecx
-       #               s ^= x7
-       xorl    128(%esp),%ecx
-       #                                               w += v
-       add     %edi,%ebx
-       #                                               w <<<= 9
-       rol     $9,%ebx
-       #                                               w ^= x13
-       xorl    152(%esp),%ebx
-       # x2 = p
-       movl    %eax,108(%esp)
-       #                               x8 = t
-       movl    %edx,132(%esp)
-       # p += x1
-       addl    104(%esp),%eax
-       #               x7 = s
-       movl    %ecx,128(%esp)
-       #                               t += x11
-       addl    144(%esp),%edx
-       #                                               x13 = w
-       movl    %ebx,152(%esp)
-       # p <<<= 13
-       rol     $13,%eax
-       # p ^= x3
-       xorl    112(%esp),%eax
-       #                               t <<<= 13
-       rol     $13,%edx
-       #                               t ^= x9
-       xorl    136(%esp),%edx
-       #               r += s
-       add     %ecx,%esi
-       #               r <<<= 13
-       rol     $13,%esi
-       #               r ^= x4
-       xorl    116(%esp),%esi
-       #                                               v += w
-       add     %ebx,%edi
-       #                                               v <<<= 13
-       rol     $13,%edi
-       #                                               v ^= x14
-       xorl    156(%esp),%edi
-       # x3 = p
-       movl    %eax,112(%esp)
-       #                               x9 = t
-       movl    %edx,136(%esp)
-       # p += x2
-       addl    108(%esp),%eax
-       #               x4 = r
-       movl    %esi,116(%esp)
-       #                               t += x8
-       addl    132(%esp),%edx
-       #                                               x14 = v
-       movl    %edi,156(%esp)
-       # p <<<= 18
-       rol     $18,%eax
-       # p ^= x0
-       xorl    100(%esp),%eax
-       #                               t <<<= 18
-       rol     $18,%edx
-       #                               t ^= x10
-       xorl    140(%esp),%edx
-       #               s += r
-       add     %esi,%ecx
-       #               s <<<= 18
-       rol     $18,%ecx
-       #               s ^= x5
-       xorl    120(%esp),%ecx
-       #                                               w += v
-       add     %edi,%ebx
-       #                                               w <<<= 18
-       rol     $18,%ebx
-       #                                               w ^= x15
-       xorl    160(%esp),%ebx
-       # i -= 4
-       sub     $4,%ebp
-       # goto mainloop if unsigned >
-       ja      ._mainloop
-       # x0 = p
-       movl    %eax,100(%esp)
-       # x5 = s
-       movl    %ecx,120(%esp)
-       # x10 = t
-       movl    %edx,140(%esp)
-       # x15 = w
-       movl    %ebx,160(%esp)
-       #   out = out_backup
-       movl    72(%esp),%edi
-       #   m = m_backup
-       movl    68(%esp),%esi
-       #   in0 = x0
-       movl    100(%esp),%eax
-       #   in1 = x1
-       movl    104(%esp),%ecx
-       #   in0 += j0
-       addl    164(%esp),%eax
-       #   in1 += j1
-       addl    168(%esp),%ecx
-       #   in0 ^= *(uint32 *) (m + 0)
-       xorl    0(%esi),%eax
-       #   in1 ^= *(uint32 *) (m + 4)
-       xorl    4(%esi),%ecx
-       #   *(uint32 *) (out + 0) = in0
-       movl    %eax,0(%edi)
-       #   *(uint32 *) (out + 4) = in1
-       movl    %ecx,4(%edi)
-       #   in2 = x2
-       movl    108(%esp),%eax
-       #   in3 = x3
-       movl    112(%esp),%ecx
-       #   in2 += j2
-       addl    172(%esp),%eax
-       #   in3 += j3
-       addl    176(%esp),%ecx
-       #   in2 ^= *(uint32 *) (m + 8)
-       xorl    8(%esi),%eax
-       #   in3 ^= *(uint32 *) (m + 12)
-       xorl    12(%esi),%ecx
-       #   *(uint32 *) (out + 8) = in2
-       movl    %eax,8(%edi)
-       #   *(uint32 *) (out + 12) = in3
-       movl    %ecx,12(%edi)
-       #   in4 = x4
-       movl    116(%esp),%eax
-       #   in5 = x5
-       movl    120(%esp),%ecx
-       #   in4 += j4
-       addl    180(%esp),%eax
-       #   in5 += j5
-       addl    184(%esp),%ecx
-       #   in4 ^= *(uint32 *) (m + 16)
-       xorl    16(%esi),%eax
-       #   in5 ^= *(uint32 *) (m + 20)
-       xorl    20(%esi),%ecx
-       #   *(uint32 *) (out + 16) = in4
-       movl    %eax,16(%edi)
-       #   *(uint32 *) (out + 20) = in5
-       movl    %ecx,20(%edi)
-       #   in6 = x6
-       movl    124(%esp),%eax
-       #   in7 = x7
-       movl    128(%esp),%ecx
-       #   in6 += j6
-       addl    188(%esp),%eax
-       #   in7 += j7
-       addl    192(%esp),%ecx
-       #   in6 ^= *(uint32 *) (m + 24)
-       xorl    24(%esi),%eax
-       #   in7 ^= *(uint32 *) (m + 28)
-       xorl    28(%esi),%ecx
-       #   *(uint32 *) (out + 24) = in6
-       movl    %eax,24(%edi)
-       #   *(uint32 *) (out + 28) = in7
-       movl    %ecx,28(%edi)
-       #   in8 = x8
-       movl    132(%esp),%eax
-       #   in9 = x9
-       movl    136(%esp),%ecx
-       #   in8 += j8
-       addl    196(%esp),%eax
-       #   in9 += j9
-       addl    200(%esp),%ecx
-       #   in8 ^= *(uint32 *) (m + 32)
-       xorl    32(%esi),%eax
-       #   in9 ^= *(uint32 *) (m + 36)
-       xorl    36(%esi),%ecx
-       #   *(uint32 *) (out + 32) = in8
-       movl    %eax,32(%edi)
-       #   *(uint32 *) (out + 36) = in9
-       movl    %ecx,36(%edi)
-       #   in10 = x10
-       movl    140(%esp),%eax
-       #   in11 = x11
-       movl    144(%esp),%ecx
-       #   in10 += j10
-       addl    204(%esp),%eax
-       #   in11 += j11
-       addl    208(%esp),%ecx
-       #   in10 ^= *(uint32 *) (m + 40)
-       xorl    40(%esi),%eax
-       #   in11 ^= *(uint32 *) (m + 44)
-       xorl    44(%esi),%ecx
-       #   *(uint32 *) (out + 40) = in10
-       movl    %eax,40(%edi)
-       #   *(uint32 *) (out + 44) = in11
-       movl    %ecx,44(%edi)
-       #   in12 = x12
-       movl    148(%esp),%eax
-       #   in13 = x13
-       movl    152(%esp),%ecx
-       #   in12 += j12
-       addl    212(%esp),%eax
-       #   in13 += j13
-       addl    216(%esp),%ecx
-       #   in12 ^= *(uint32 *) (m + 48)
-       xorl    48(%esi),%eax
-       #   in13 ^= *(uint32 *) (m + 52)
-       xorl    52(%esi),%ecx
-       #   *(uint32 *) (out + 48) = in12
-       movl    %eax,48(%edi)
-       #   *(uint32 *) (out + 52) = in13
-       movl    %ecx,52(%edi)
-       #   in14 = x14
-       movl    156(%esp),%eax
-       #   in15 = x15
-       movl    160(%esp),%ecx
-       #   in14 += j14
-       addl    220(%esp),%eax
-       #   in15 += j15
-       addl    224(%esp),%ecx
-       #   in14 ^= *(uint32 *) (m + 56)
-       xorl    56(%esi),%eax
-       #   in15 ^= *(uint32 *) (m + 60)
-       xorl    60(%esi),%ecx
-       #   *(uint32 *) (out + 56) = in14
-       movl    %eax,56(%edi)
-       #   *(uint32 *) (out + 60) = in15
-       movl    %ecx,60(%edi)
-       #   bytes = bytes_backup
-       movl    76(%esp),%ebx
-       #   in8 = j8
-       movl    196(%esp),%eax
-       #   in9 = j9
-       movl    200(%esp),%ecx
-       #   in8 += 1
-       add     $1,%eax
-       #   in9 += 0 + carry
-       adc     $0,%ecx
-       #   j8 = in8
-       movl    %eax,196(%esp)
-       #   j9 = in9
-       movl    %ecx,200(%esp)
-       #   bytes - 64
-       cmp     $64,%ebx
-       #   goto bytesatleast65 if unsigned>
-       ja      ._bytesatleast65
-       #     goto bytesatleast64 if unsigned>=
-       jae     ._bytesatleast64
-       #       m = out
-       mov     %edi,%esi
-       #       out = ctarget
-       movl    228(%esp),%edi
-       #       i = bytes
-       mov     %ebx,%ecx
-       #       while (i) { *out++ = *m++; --i }
-       rep     movsb
-._bytesatleast64:
-       #     x = x_backup
-       movl    64(%esp),%eax
-       #     in8 = j8
-       movl    196(%esp),%ecx
-       #     in9 = j9
-       movl    200(%esp),%edx
-       #     *(uint32 *) (x + 32) = in8
-       movl    %ecx,32(%eax)
-       #     *(uint32 *) (x + 36) = in9
-       movl    %edx,36(%eax)
-._done:
-       #     eax = eax_stack
-       movl    80(%esp),%eax
-       #     ebx = ebx_stack
-       movl    84(%esp),%ebx
-       #     esi = esi_stack
-       movl    88(%esp),%esi
-       #     edi = edi_stack
-       movl    92(%esp),%edi
-       #     ebp = ebp_stack
-       movl    96(%esp),%ebp
-       #     leave
-       add     %eax,%esp
-       ret
-._bytesatleast65:
-       #   bytes -= 64
-       sub     $64,%ebx
-       #   out += 64
-       add     $64,%edi
-       #   m += 64
-       add     $64,%esi
-       # goto bytesatleast1
-       jmp     ._bytesatleast1
-ENDPROC(salsa20_encrypt_bytes)
-
-# enter salsa20_keysetup
-ENTRY(salsa20_keysetup)
-       mov     %esp,%eax
-       and     $31,%eax
-       add     $256,%eax
-       sub     %eax,%esp
-       #   eax_stack = eax
-       movl    %eax,64(%esp)
-       #   ebx_stack = ebx
-       movl    %ebx,68(%esp)
-       #   esi_stack = esi
-       movl    %esi,72(%esp)
-       #   edi_stack = edi
-       movl    %edi,76(%esp)
-       #   ebp_stack = ebp
-       movl    %ebp,80(%esp)
-       #   k = arg2
-       movl    8(%esp,%eax),%ecx
-       #   kbits = arg3
-       movl    12(%esp,%eax),%edx
-       #   x = arg1
-       movl    4(%esp,%eax),%eax
-       #   in1 = *(uint32 *) (k + 0)
-       movl    0(%ecx),%ebx
-       #   in2 = *(uint32 *) (k + 4)
-       movl    4(%ecx),%esi
-       #   in3 = *(uint32 *) (k + 8)
-       movl    8(%ecx),%edi
-       #   in4 = *(uint32 *) (k + 12)
-       movl    12(%ecx),%ebp
-       #   *(uint32 *) (x + 4) = in1
-       movl    %ebx,4(%eax)
-       #   *(uint32 *) (x + 8) = in2
-       movl    %esi,8(%eax)
-       #   *(uint32 *) (x + 12) = in3
-       movl    %edi,12(%eax)
-       #   *(uint32 *) (x + 16) = in4
-       movl    %ebp,16(%eax)
-       #   kbits - 256
-       cmp     $256,%edx
-       #   goto kbits128 if unsigned<
-       jb      ._kbits128
-._kbits256:
-       #     in11 = *(uint32 *) (k + 16)
-       movl    16(%ecx),%edx
-       #     in12 = *(uint32 *) (k + 20)
-       movl    20(%ecx),%ebx
-       #     in13 = *(uint32 *) (k + 24)
-       movl    24(%ecx),%esi
-       #     in14 = *(uint32 *) (k + 28)
-       movl    28(%ecx),%ecx
-       #     *(uint32 *) (x + 44) = in11
-       movl    %edx,44(%eax)
-       #     *(uint32 *) (x + 48) = in12
-       movl    %ebx,48(%eax)
-       #     *(uint32 *) (x + 52) = in13
-       movl    %esi,52(%eax)
-       #     *(uint32 *) (x + 56) = in14
-       movl    %ecx,56(%eax)
-       #     in0 = 1634760805
-       mov     $1634760805,%ecx
-       #     in5 = 857760878
-       mov     $857760878,%edx
-       #     in10 = 2036477234
-       mov     $2036477234,%ebx
-       #     in15 = 1797285236
-       mov     $1797285236,%esi
-       #     *(uint32 *) (x + 0) = in0
-       movl    %ecx,0(%eax)
-       #     *(uint32 *) (x + 20) = in5
-       movl    %edx,20(%eax)
-       #     *(uint32 *) (x + 40) = in10
-       movl    %ebx,40(%eax)
-       #     *(uint32 *) (x + 60) = in15
-       movl    %esi,60(%eax)
-       #   goto keysetupdone
-       jmp     ._keysetupdone
-._kbits128:
-       #     in11 = *(uint32 *) (k + 0)
-       movl    0(%ecx),%edx
-       #     in12 = *(uint32 *) (k + 4)
-       movl    4(%ecx),%ebx
-       #     in13 = *(uint32 *) (k + 8)
-       movl    8(%ecx),%esi
-       #     in14 = *(uint32 *) (k + 12)
-       movl    12(%ecx),%ecx
-       #     *(uint32 *) (x + 44) = in11
-       movl    %edx,44(%eax)
-       #     *(uint32 *) (x + 48) = in12
-       movl    %ebx,48(%eax)
-       #     *(uint32 *) (x + 52) = in13
-       movl    %esi,52(%eax)
-       #     *(uint32 *) (x + 56) = in14
-       movl    %ecx,56(%eax)
-       #     in0 = 1634760805
-       mov     $1634760805,%ecx
-       #     in5 = 824206446
-       mov     $824206446,%edx
-       #     in10 = 2036477238
-       mov     $2036477238,%ebx
-       #     in15 = 1797285236
-       mov     $1797285236,%esi
-       #     *(uint32 *) (x + 0) = in0
-       movl    %ecx,0(%eax)
-       #     *(uint32 *) (x + 20) = in5
-       movl    %edx,20(%eax)
-       #     *(uint32 *) (x + 40) = in10
-       movl    %ebx,40(%eax)
-       #     *(uint32 *) (x + 60) = in15
-       movl    %esi,60(%eax)
-._keysetupdone:
-       #   eax = eax_stack
-       movl    64(%esp),%eax
-       #   ebx = ebx_stack
-       movl    68(%esp),%ebx
-       #   esi = esi_stack
-       movl    72(%esp),%esi
-       #   edi = edi_stack
-       movl    76(%esp),%edi
-       #   ebp = ebp_stack
-       movl    80(%esp),%ebp
-       # leave
-       add     %eax,%esp
-       ret
-ENDPROC(salsa20_keysetup)
-
-# enter salsa20_ivsetup
-ENTRY(salsa20_ivsetup)
-       mov     %esp,%eax
-       and     $31,%eax
-       add     $256,%eax
-       sub     %eax,%esp
-       #   eax_stack = eax
-       movl    %eax,64(%esp)
-       #   ebx_stack = ebx
-       movl    %ebx,68(%esp)
-       #   esi_stack = esi
-       movl    %esi,72(%esp)
-       #   edi_stack = edi
-       movl    %edi,76(%esp)
-       #   ebp_stack = ebp
-       movl    %ebp,80(%esp)
-       #   iv = arg2
-       movl    8(%esp,%eax),%ecx
-       #   x = arg1
-       movl    4(%esp,%eax),%eax
-       #   in6 = *(uint32 *) (iv + 0)
-       movl    0(%ecx),%edx
-       #   in7 = *(uint32 *) (iv + 4)
-       movl    4(%ecx),%ecx
-       #   in8 = 0
-       mov     $0,%ebx
-       #   in9 = 0
-       mov     $0,%esi
-       #   *(uint32 *) (x + 24) = in6
-       movl    %edx,24(%eax)
-       #   *(uint32 *) (x + 28) = in7
-       movl    %ecx,28(%eax)
-       #   *(uint32 *) (x + 32) = in8
-       movl    %ebx,32(%eax)
-       #   *(uint32 *) (x + 36) = in9
-       movl    %esi,36(%eax)
-       #   eax = eax_stack
-       movl    64(%esp),%eax
-       #   ebx = ebx_stack
-       movl    68(%esp),%ebx
-       #   esi = esi_stack
-       movl    72(%esp),%esi
-       #   edi = edi_stack
-       movl    76(%esp),%edi
-       #   ebp = ebp_stack
-       movl    80(%esp),%ebp
-       # leave
-       add     %eax,%esp
-       ret
-ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S

deleted file mode 100644 (file)

index 10db30d..0000000
--- a/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ /dev/null
@@ -1,919 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-# enter salsa20_encrypt_bytes
-ENTRY(salsa20_encrypt_bytes)
-       mov     %rsp,%r11
-       and     $31,%r11
-       add     $256,%r11
-       sub     %r11,%rsp
-       # x = arg1
-       mov     %rdi,%r8
-       # m = arg2
-       mov     %rsi,%rsi
-       # out = arg3
-       mov     %rdx,%rdi
-       # bytes = arg4
-       mov     %rcx,%rdx
-       #               unsigned>? bytes - 0
-       cmp     $0,%rdx
-       # comment:fp stack unchanged by jump
-       # goto done if !unsigned>
-       jbe     ._done
-       # comment:fp stack unchanged by fallthrough
-# start:
-._start:
-       # r11_stack = r11
-       movq    %r11,0(%rsp)
-       # r12_stack = r12
-       movq    %r12,8(%rsp)
-       # r13_stack = r13
-       movq    %r13,16(%rsp)
-       # r14_stack = r14
-       movq    %r14,24(%rsp)
-       # r15_stack = r15
-       movq    %r15,32(%rsp)
-       # rbx_stack = rbx
-       movq    %rbx,40(%rsp)
-       # rbp_stack = rbp
-       movq    %rbp,48(%rsp)
-       # in0 = *(uint64 *) (x + 0)
-       movq    0(%r8),%rcx
-       # in2 = *(uint64 *) (x + 8)
-       movq    8(%r8),%r9
-       # in4 = *(uint64 *) (x + 16)
-       movq    16(%r8),%rax
-       # in6 = *(uint64 *) (x + 24)
-       movq    24(%r8),%r10
-       # in8 = *(uint64 *) (x + 32)
-       movq    32(%r8),%r11
-       # in10 = *(uint64 *) (x + 40)
-       movq    40(%r8),%r12
-       # in12 = *(uint64 *) (x + 48)
-       movq    48(%r8),%r13
-       # in14 = *(uint64 *) (x + 56)
-       movq    56(%r8),%r14
-       # j0 = in0
-       movq    %rcx,56(%rsp)
-       # j2 = in2
-       movq    %r9,64(%rsp)
-       # j4 = in4
-       movq    %rax,72(%rsp)
-       # j6 = in6
-       movq    %r10,80(%rsp)
-       # j8 = in8
-       movq    %r11,88(%rsp)
-       # j10 = in10
-       movq    %r12,96(%rsp)
-       # j12 = in12
-       movq    %r13,104(%rsp)
-       # j14 = in14
-       movq    %r14,112(%rsp)
-       # x_backup = x
-       movq    %r8,120(%rsp)
-# bytesatleast1:
-._bytesatleast1:
-       #                   unsigned<? bytes - 64
-       cmp     $64,%rdx
-       # comment:fp stack unchanged by jump
-       #   goto nocopy if !unsigned<
-       jae     ._nocopy
-       #     ctarget = out
-       movq    %rdi,128(%rsp)
-       #     out = &tmp
-       leaq    192(%rsp),%rdi
-       #     i = bytes
-       mov     %rdx,%rcx
-       #     while (i) { *out++ = *m++; --i }
-       rep     movsb
-       #     out = &tmp
-       leaq    192(%rsp),%rdi
-       #     m = &tmp
-       leaq    192(%rsp),%rsi
-       # comment:fp stack unchanged by fallthrough
-#   nocopy:
-._nocopy:
-       #   out_backup = out
-       movq    %rdi,136(%rsp)
-       #   m_backup = m
-       movq    %rsi,144(%rsp)
-       #   bytes_backup = bytes
-       movq    %rdx,152(%rsp)
-       #   x1 = j0
-       movq    56(%rsp),%rdi
-       #   x0 = x1
-       mov     %rdi,%rdx
-       #   (uint64) x1 >>= 32
-       shr     $32,%rdi
-       #               x3 = j2
-       movq    64(%rsp),%rsi
-       #               x2 = x3
-       mov     %rsi,%rcx
-       #               (uint64) x3 >>= 32
-       shr     $32,%rsi
-       #   x5 = j4
-       movq    72(%rsp),%r8
-       #   x4 = x5
-       mov     %r8,%r9
-       #   (uint64) x5 >>= 32
-       shr     $32,%r8
-       #   x5_stack = x5
-       movq    %r8,160(%rsp)
-       #               x7 = j6
-       movq    80(%rsp),%r8
-       #               x6 = x7
-       mov     %r8,%rax
-       #               (uint64) x7 >>= 32
-       shr     $32,%r8
-       #   x9 = j8
-       movq    88(%rsp),%r10
-       #   x8 = x9
-       mov     %r10,%r11
-       #   (uint64) x9 >>= 32
-       shr     $32,%r10
-       #               x11 = j10
-       movq    96(%rsp),%r12
-       #               x10 = x11
-       mov     %r12,%r13
-       #               x10_stack = x10
-       movq    %r13,168(%rsp)
-       #               (uint64) x11 >>= 32
-       shr     $32,%r12
-       #   x13 = j12
-       movq    104(%rsp),%r13
-       #   x12 = x13
-       mov     %r13,%r14
-       #   (uint64) x13 >>= 32
-       shr     $32,%r13
-       #               x15 = j14
-       movq    112(%rsp),%r15
-       #               x14 = x15
-       mov     %r15,%rbx
-       #               (uint64) x15 >>= 32
-       shr     $32,%r15
-       #               x15_stack = x15
-       movq    %r15,176(%rsp)
-       #   i = 20
-       mov     $20,%r15
-#   mainloop:
-._mainloop:
-       #   i_backup = i
-       movq    %r15,184(%rsp)
-       #               x5 = x5_stack
-       movq    160(%rsp),%r15
-       # a = x12 + x0
-       lea     (%r14,%rdx),%rbp
-       # (uint32) a <<<= 7
-       rol     $7,%ebp
-       # x4 ^= a
-       xor     %rbp,%r9
-       #               b = x1 + x5
-       lea     (%rdi,%r15),%rbp
-       #               (uint32) b <<<= 7
-       rol     $7,%ebp
-       #               x9 ^= b
-       xor     %rbp,%r10
-       # a = x0 + x4
-       lea     (%rdx,%r9),%rbp
-       # (uint32) a <<<= 9
-       rol     $9,%ebp
-       # x8 ^= a
-       xor     %rbp,%r11
-       #               b = x5 + x9
-       lea     (%r15,%r10),%rbp
-       #               (uint32) b <<<= 9
-       rol     $9,%ebp
-       #               x13 ^= b
-       xor     %rbp,%r13
-       # a = x4 + x8
-       lea     (%r9,%r11),%rbp
-       # (uint32) a <<<= 13
-       rol     $13,%ebp
-       # x12 ^= a
-       xor     %rbp,%r14
-       #               b = x9 + x13
-       lea     (%r10,%r13),%rbp
-       #               (uint32) b <<<= 13
-       rol     $13,%ebp
-       #               x1 ^= b
-       xor     %rbp,%rdi
-       # a = x8 + x12
-       lea     (%r11,%r14),%rbp
-       # (uint32) a <<<= 18
-       rol     $18,%ebp
-       # x0 ^= a
-       xor     %rbp,%rdx
-       #               b = x13 + x1
-       lea     (%r13,%rdi),%rbp
-       #               (uint32) b <<<= 18
-       rol     $18,%ebp
-       #               x5 ^= b
-       xor     %rbp,%r15
-       #                               x10 = x10_stack
-       movq    168(%rsp),%rbp
-       #               x5_stack = x5
-       movq    %r15,160(%rsp)
-       #                               c = x6 + x10
-       lea     (%rax,%rbp),%r15
-       #                               (uint32) c <<<= 7
-       rol     $7,%r15d
-       #                               x14 ^= c
-       xor     %r15,%rbx
-       #                               c = x10 + x14
-       lea     (%rbp,%rbx),%r15
-       #                               (uint32) c <<<= 9
-       rol     $9,%r15d
-       #                               x2 ^= c
-       xor     %r15,%rcx
-       #                               c = x14 + x2
-       lea     (%rbx,%rcx),%r15
-       #                               (uint32) c <<<= 13
-       rol     $13,%r15d
-       #                               x6 ^= c
-       xor     %r15,%rax
-       #                               c = x2 + x6
-       lea     (%rcx,%rax),%r15
-       #                               (uint32) c <<<= 18
-       rol     $18,%r15d
-       #                               x10 ^= c
-       xor     %r15,%rbp
-       #                                               x15 = x15_stack
-       movq    176(%rsp),%r15
-       #                               x10_stack = x10
-       movq    %rbp,168(%rsp)
-       #                                               d = x11 + x15
-       lea     (%r12,%r15),%rbp
-       #                                               (uint32) d <<<= 7
-       rol     $7,%ebp
-       #                                               x3 ^= d
-       xor     %rbp,%rsi
-       #                                               d = x15 + x3
-       lea     (%r15,%rsi),%rbp
-       #                                               (uint32) d <<<= 9
-       rol     $9,%ebp
-       #                                               x7 ^= d
-       xor     %rbp,%r8
-       #                                               d = x3 + x7
-       lea     (%rsi,%r8),%rbp
-       #                                               (uint32) d <<<= 13
-       rol     $13,%ebp
-       #                                               x11 ^= d
-       xor     %rbp,%r12
-       #                                               d = x7 + x11
-       lea     (%r8,%r12),%rbp
-       #                                               (uint32) d <<<= 18
-       rol     $18,%ebp
-       #                                               x15 ^= d
-       xor     %rbp,%r15
-       #                                               x15_stack = x15
-       movq    %r15,176(%rsp)
-       #               x5 = x5_stack
-       movq    160(%rsp),%r15
-       # a = x3 + x0
-       lea     (%rsi,%rdx),%rbp
-       # (uint32) a <<<= 7
-       rol     $7,%ebp
-       # x1 ^= a
-       xor     %rbp,%rdi
-       #               b = x4 + x5
-       lea     (%r9,%r15),%rbp
-       #               (uint32) b <<<= 7
-       rol     $7,%ebp
-       #               x6 ^= b
-       xor     %rbp,%rax
-       # a = x0 + x1
-       lea     (%rdx,%rdi),%rbp
-       # (uint32) a <<<= 9
-       rol     $9,%ebp
-       # x2 ^= a
-       xor     %rbp,%rcx
-       #               b = x5 + x6
-       lea     (%r15,%rax),%rbp
-       #               (uint32) b <<<= 9
-       rol     $9,%ebp
-       #               x7 ^= b
-       xor     %rbp,%r8
-       # a = x1 + x2
-       lea     (%rdi,%rcx),%rbp
-       # (uint32) a <<<= 13
-       rol     $13,%ebp
-       # x3 ^= a
-       xor     %rbp,%rsi
-       #               b = x6 + x7
-       lea     (%rax,%r8),%rbp
-       #               (uint32) b <<<= 13
-       rol     $13,%ebp
-       #               x4 ^= b
-       xor     %rbp,%r9
-       # a = x2 + x3
-       lea     (%rcx,%rsi),%rbp
-       # (uint32) a <<<= 18
-       rol     $18,%ebp
-       # x0 ^= a
-       xor     %rbp,%rdx
-       #               b = x7 + x4
-       lea     (%r8,%r9),%rbp
-       #               (uint32) b <<<= 18
-       rol     $18,%ebp
-       #               x5 ^= b
-       xor     %rbp,%r15
-       #                               x10 = x10_stack
-       movq    168(%rsp),%rbp
-       #               x5_stack = x5
-       movq    %r15,160(%rsp)
-       #                               c = x9 + x10
-       lea     (%r10,%rbp),%r15
-       #                               (uint32) c <<<= 7
-       rol     $7,%r15d
-       #                               x11 ^= c
-       xor     %r15,%r12
-       #                               c = x10 + x11
-       lea     (%rbp,%r12),%r15
-       #                               (uint32) c <<<= 9
-       rol     $9,%r15d
-       #                               x8 ^= c
-       xor     %r15,%r11
-       #                               c = x11 + x8
-       lea     (%r12,%r11),%r15
-       #                               (uint32) c <<<= 13
-       rol     $13,%r15d
-       #                               x9 ^= c
-       xor     %r15,%r10
-       #                               c = x8 + x9
-       lea     (%r11,%r10),%r15
-       #                               (uint32) c <<<= 18
-       rol     $18,%r15d
-       #                               x10 ^= c
-       xor     %r15,%rbp
-       #                                               x15 = x15_stack
-       movq    176(%rsp),%r15
-       #                               x10_stack = x10
-       movq    %rbp,168(%rsp)
-       #                                               d = x14 + x15
-       lea     (%rbx,%r15),%rbp
-       #                                               (uint32) d <<<= 7
-       rol     $7,%ebp
-       #                                               x12 ^= d
-       xor     %rbp,%r14
-       #                                               d = x15 + x12
-       lea     (%r15,%r14),%rbp
-       #                                               (uint32) d <<<= 9
-       rol     $9,%ebp
-       #                                               x13 ^= d
-       xor     %rbp,%r13
-       #                                               d = x12 + x13
-       lea     (%r14,%r13),%rbp
-       #                                               (uint32) d <<<= 13
-       rol     $13,%ebp
-       #                                               x14 ^= d
-       xor     %rbp,%rbx
-       #                                               d = x13 + x14
-       lea     (%r13,%rbx),%rbp
-       #                                               (uint32) d <<<= 18
-       rol     $18,%ebp
-       #                                               x15 ^= d
-       xor     %rbp,%r15
-       #                                               x15_stack = x15
-       movq    %r15,176(%rsp)
-       #               x5 = x5_stack
-       movq    160(%rsp),%r15
-       # a = x12 + x0
-       lea     (%r14,%rdx),%rbp
-       # (uint32) a <<<= 7
-       rol     $7,%ebp
-       # x4 ^= a
-       xor     %rbp,%r9
-       #               b = x1 + x5
-       lea     (%rdi,%r15),%rbp
-       #               (uint32) b <<<= 7
-       rol     $7,%ebp
-       #               x9 ^= b
-       xor     %rbp,%r10
-       # a = x0 + x4
-       lea     (%rdx,%r9),%rbp
-       # (uint32) a <<<= 9
-       rol     $9,%ebp
-       # x8 ^= a
-       xor     %rbp,%r11
-       #               b = x5 + x9
-       lea     (%r15,%r10),%rbp
-       #               (uint32) b <<<= 9
-       rol     $9,%ebp
-       #               x13 ^= b
-       xor     %rbp,%r13
-       # a = x4 + x8
-       lea     (%r9,%r11),%rbp
-       # (uint32) a <<<= 13
-       rol     $13,%ebp
-       # x12 ^= a
-       xor     %rbp,%r14
-       #               b = x9 + x13
-       lea     (%r10,%r13),%rbp
-       #               (uint32) b <<<= 13
-       rol     $13,%ebp
-       #               x1 ^= b
-       xor     %rbp,%rdi
-       # a = x8 + x12
-       lea     (%r11,%r14),%rbp
-       # (uint32) a <<<= 18
-       rol     $18,%ebp
-       # x0 ^= a
-       xor     %rbp,%rdx
-       #               b = x13 + x1
-       lea     (%r13,%rdi),%rbp
-       #               (uint32) b <<<= 18
-       rol     $18,%ebp
-       #               x5 ^= b
-       xor     %rbp,%r15
-       #                               x10 = x10_stack
-       movq    168(%rsp),%rbp
-       #               x5_stack = x5
-       movq    %r15,160(%rsp)
-       #                               c = x6 + x10
-       lea     (%rax,%rbp),%r15
-       #                               (uint32) c <<<= 7
-       rol     $7,%r15d
-       #                               x14 ^= c
-       xor     %r15,%rbx
-       #                               c = x10 + x14
-       lea     (%rbp,%rbx),%r15
-       #                               (uint32) c <<<= 9
-       rol     $9,%r15d
-       #                               x2 ^= c
-       xor     %r15,%rcx
-       #                               c = x14 + x2
-       lea     (%rbx,%rcx),%r15
-       #                               (uint32) c <<<= 13
-       rol     $13,%r15d
-       #                               x6 ^= c
-       xor     %r15,%rax
-       #                               c = x2 + x6
-       lea     (%rcx,%rax),%r15
-       #                               (uint32) c <<<= 18
-       rol     $18,%r15d
-       #                               x10 ^= c
-       xor     %r15,%rbp
-       #                                               x15 = x15_stack
-       movq    176(%rsp),%r15
-       #                               x10_stack = x10
-       movq    %rbp,168(%rsp)
-       #                                               d = x11 + x15
-       lea     (%r12,%r15),%rbp
-       #                                               (uint32) d <<<= 7
-       rol     $7,%ebp
-       #                                               x3 ^= d
-       xor     %rbp,%rsi
-       #                                               d = x15 + x3
-       lea     (%r15,%rsi),%rbp
-       #                                               (uint32) d <<<= 9
-       rol     $9,%ebp
-       #                                               x7 ^= d
-       xor     %rbp,%r8
-       #                                               d = x3 + x7
-       lea     (%rsi,%r8),%rbp
-       #                                               (uint32) d <<<= 13
-       rol     $13,%ebp
-       #                                               x11 ^= d
-       xor     %rbp,%r12
-       #                                               d = x7 + x11
-       lea     (%r8,%r12),%rbp
-       #                                               (uint32) d <<<= 18
-       rol     $18,%ebp
-       #                                               x15 ^= d
-       xor     %rbp,%r15
-       #                                               x15_stack = x15
-       movq    %r15,176(%rsp)
-       #               x5 = x5_stack
-       movq    160(%rsp),%r15
-       # a = x3 + x0
-       lea     (%rsi,%rdx),%rbp
-       # (uint32) a <<<= 7
-       rol     $7,%ebp
-       # x1 ^= a
-       xor     %rbp,%rdi
-       #               b = x4 + x5
-       lea     (%r9,%r15),%rbp
-       #               (uint32) b <<<= 7
-       rol     $7,%ebp
-       #               x6 ^= b
-       xor     %rbp,%rax
-       # a = x0 + x1
-       lea     (%rdx,%rdi),%rbp
-       # (uint32) a <<<= 9
-       rol     $9,%ebp
-       # x2 ^= a
-       xor     %rbp,%rcx
-       #               b = x5 + x6
-       lea     (%r15,%rax),%rbp
-       #               (uint32) b <<<= 9
-       rol     $9,%ebp
-       #               x7 ^= b
-       xor     %rbp,%r8
-       # a = x1 + x2
-       lea     (%rdi,%rcx),%rbp
-       # (uint32) a <<<= 13
-       rol     $13,%ebp
-       # x3 ^= a
-       xor     %rbp,%rsi
-       #               b = x6 + x7
-       lea     (%rax,%r8),%rbp
-       #               (uint32) b <<<= 13
-       rol     $13,%ebp
-       #               x4 ^= b
-       xor     %rbp,%r9
-       # a = x2 + x3
-       lea     (%rcx,%rsi),%rbp
-       # (uint32) a <<<= 18
-       rol     $18,%ebp
-       # x0 ^= a
-       xor     %rbp,%rdx
-       #               b = x7 + x4
-       lea     (%r8,%r9),%rbp
-       #               (uint32) b <<<= 18
-       rol     $18,%ebp
-       #               x5 ^= b
-       xor     %rbp,%r15
-       #                               x10 = x10_stack
-       movq    168(%rsp),%rbp
-       #               x5_stack = x5
-       movq    %r15,160(%rsp)
-       #                               c = x9 + x10
-       lea     (%r10,%rbp),%r15
-       #                               (uint32) c <<<= 7
-       rol     $7,%r15d
-       #                               x11 ^= c
-       xor     %r15,%r12
-       #                               c = x10 + x11
-       lea     (%rbp,%r12),%r15
-       #                               (uint32) c <<<= 9
-       rol     $9,%r15d
-       #                               x8 ^= c
-       xor     %r15,%r11
-       #                               c = x11 + x8
-       lea     (%r12,%r11),%r15
-       #                               (uint32) c <<<= 13
-       rol     $13,%r15d
-       #                               x9 ^= c
-       xor     %r15,%r10
-       #                               c = x8 + x9
-       lea     (%r11,%r10),%r15
-       #                               (uint32) c <<<= 18
-       rol     $18,%r15d
-       #                               x10 ^= c
-       xor     %r15,%rbp
-       #                                               x15 = x15_stack
-       movq    176(%rsp),%r15
-       #                               x10_stack = x10
-       movq    %rbp,168(%rsp)
-       #                                               d = x14 + x15
-       lea     (%rbx,%r15),%rbp
-       #                                               (uint32) d <<<= 7
-       rol     $7,%ebp
-       #                                               x12 ^= d
-       xor     %rbp,%r14
-       #                                               d = x15 + x12
-       lea     (%r15,%r14),%rbp
-       #                                               (uint32) d <<<= 9
-       rol     $9,%ebp
-       #                                               x13 ^= d
-       xor     %rbp,%r13
-       #                                               d = x12 + x13
-       lea     (%r14,%r13),%rbp
-       #                                               (uint32) d <<<= 13
-       rol     $13,%ebp
-       #                                               x14 ^= d
-       xor     %rbp,%rbx
-       #                                               d = x13 + x14
-       lea     (%r13,%rbx),%rbp
-       #                                               (uint32) d <<<= 18
-       rol     $18,%ebp
-       #                                               x15 ^= d
-       xor     %rbp,%r15
-       #                                               x15_stack = x15
-       movq    %r15,176(%rsp)
-       #   i = i_backup
-       movq    184(%rsp),%r15
-       #                  unsigned>? i -= 4
-       sub     $4,%r15
-       # comment:fp stack unchanged by jump
-       # goto mainloop if unsigned>
-       ja      ._mainloop
-       #   (uint32) x2 += j2
-       addl    64(%rsp),%ecx
-       #   x3 <<= 32
-       shl     $32,%rsi
-       #   x3 += j2
-       addq    64(%rsp),%rsi
-       #   (uint64) x3 >>= 32
-       shr     $32,%rsi
-       #   x3 <<= 32
-       shl     $32,%rsi
-       #   x2 += x3
-       add     %rsi,%rcx
-       #   (uint32) x6 += j6
-       addl    80(%rsp),%eax
-       #   x7 <<= 32
-       shl     $32,%r8
-       #   x7 += j6
-       addq    80(%rsp),%r8
-       #   (uint64) x7 >>= 32
-       shr     $32,%r8
-       #   x7 <<= 32
-       shl     $32,%r8
-       #   x6 += x7
-       add     %r8,%rax
-       #   (uint32) x8 += j8
-       addl    88(%rsp),%r11d
-       #   x9 <<= 32
-       shl     $32,%r10
-       #   x9 += j8
-       addq    88(%rsp),%r10
-       #   (uint64) x9 >>= 32
-       shr     $32,%r10
-       #   x9 <<= 32
-       shl     $32,%r10
-       #   x8 += x9
-       add     %r10,%r11
-       #   (uint32) x12 += j12
-       addl    104(%rsp),%r14d
-       #   x13 <<= 32
-       shl     $32,%r13
-       #   x13 += j12
-       addq    104(%rsp),%r13
-       #   (uint64) x13 >>= 32
-       shr     $32,%r13
-       #   x13 <<= 32
-       shl     $32,%r13
-       #   x12 += x13
-       add     %r13,%r14
-       #   (uint32) x0 += j0
-       addl    56(%rsp),%edx
-       #   x1 <<= 32
-       shl     $32,%rdi
-       #   x1 += j0
-       addq    56(%rsp),%rdi
-       #   (uint64) x1 >>= 32
-       shr     $32,%rdi
-       #   x1 <<= 32
-       shl     $32,%rdi
-       #   x0 += x1
-       add     %rdi,%rdx
-       #   x5 = x5_stack
-       movq    160(%rsp),%rdi
-       #   (uint32) x4 += j4
-       addl    72(%rsp),%r9d
-       #   x5 <<= 32
-       shl     $32,%rdi
-       #   x5 += j4
-       addq    72(%rsp),%rdi
-       #   (uint64) x5 >>= 32
-       shr     $32,%rdi
-       #   x5 <<= 32
-       shl     $32,%rdi
-       #   x4 += x5
-       add     %rdi,%r9
-       #   x10 = x10_stack
-       movq    168(%rsp),%r8
-       #   (uint32) x10 += j10
-       addl    96(%rsp),%r8d
-       #   x11 <<= 32
-       shl     $32,%r12
-       #   x11 += j10
-       addq    96(%rsp),%r12
-       #   (uint64) x11 >>= 32
-       shr     $32,%r12
-       #   x11 <<= 32
-       shl     $32,%r12
-       #   x10 += x11
-       add     %r12,%r8
-       #   x15 = x15_stack
-       movq    176(%rsp),%rdi
-       #   (uint32) x14 += j14
-       addl    112(%rsp),%ebx
-       #   x15 <<= 32
-       shl     $32,%rdi
-       #   x15 += j14
-       addq    112(%rsp),%rdi
-       #   (uint64) x15 >>= 32
-       shr     $32,%rdi
-       #   x15 <<= 32
-       shl     $32,%rdi
-       #   x14 += x15
-       add     %rdi,%rbx
-       #   out = out_backup
-       movq    136(%rsp),%rdi
-       #   m = m_backup
-       movq    144(%rsp),%rsi
-       #   x0 ^= *(uint64 *) (m + 0)
-       xorq    0(%rsi),%rdx
-       #   *(uint64 *) (out + 0) = x0
-       movq    %rdx,0(%rdi)
-       #   x2 ^= *(uint64 *) (m + 8)
-       xorq    8(%rsi),%rcx
-       #   *(uint64 *) (out + 8) = x2
-       movq    %rcx,8(%rdi)
-       #   x4 ^= *(uint64 *) (m + 16)
-       xorq    16(%rsi),%r9
-       #   *(uint64 *) (out + 16) = x4
-       movq    %r9,16(%rdi)
-       #   x6 ^= *(uint64 *) (m + 24)
-       xorq    24(%rsi),%rax
-       #   *(uint64 *) (out + 24) = x6
-       movq    %rax,24(%rdi)
-       #   x8 ^= *(uint64 *) (m + 32)
-       xorq    32(%rsi),%r11
-       #   *(uint64 *) (out + 32) = x8
-       movq    %r11,32(%rdi)
-       #   x10 ^= *(uint64 *) (m + 40)
-       xorq    40(%rsi),%r8
-       #   *(uint64 *) (out + 40) = x10
-       movq    %r8,40(%rdi)
-       #   x12 ^= *(uint64 *) (m + 48)
-       xorq    48(%rsi),%r14
-       #   *(uint64 *) (out + 48) = x12
-       movq    %r14,48(%rdi)
-       #   x14 ^= *(uint64 *) (m + 56)
-       xorq    56(%rsi),%rbx
-       #   *(uint64 *) (out + 56) = x14
-       movq    %rbx,56(%rdi)
-       #   bytes = bytes_backup
-       movq    152(%rsp),%rdx
-       #   in8 = j8
-       movq    88(%rsp),%rcx
-       #   in8 += 1
-       add     $1,%rcx
-       #   j8 = in8
-       movq    %rcx,88(%rsp)
-       #                          unsigned>? unsigned<? bytes - 64
-       cmp     $64,%rdx
-       # comment:fp stack unchanged by jump
-       #   goto bytesatleast65 if unsigned>
-       ja      ._bytesatleast65
-       # comment:fp stack unchanged by jump
-       #     goto bytesatleast64 if !unsigned<
-       jae     ._bytesatleast64
-       #       m = out
-       mov     %rdi,%rsi
-       #       out = ctarget
-       movq    128(%rsp),%rdi
-       #       i = bytes
-       mov     %rdx,%rcx
-       #       while (i) { *out++ = *m++; --i }
-       rep     movsb
-       # comment:fp stack unchanged by fallthrough
-#     bytesatleast64:
-._bytesatleast64:
-       #     x = x_backup
-       movq    120(%rsp),%rdi
-       #     in8 = j8
-       movq    88(%rsp),%rsi
-       #     *(uint64 *) (x + 32) = in8
-       movq    %rsi,32(%rdi)
-       #     r11 = r11_stack
-       movq    0(%rsp),%r11
-       #     r12 = r12_stack
-       movq    8(%rsp),%r12
-       #     r13 = r13_stack
-       movq    16(%rsp),%r13
-       #     r14 = r14_stack
-       movq    24(%rsp),%r14
-       #     r15 = r15_stack
-       movq    32(%rsp),%r15
-       #     rbx = rbx_stack
-       movq    40(%rsp),%rbx
-       #     rbp = rbp_stack
-       movq    48(%rsp),%rbp
-       # comment:fp stack unchanged by fallthrough
-#     done:
-._done:
-       #     leave
-       add     %r11,%rsp
-       mov     %rdi,%rax
-       mov     %rsi,%rdx
-       ret
-#   bytesatleast65:
-._bytesatleast65:
-       #   bytes -= 64
-       sub     $64,%rdx
-       #   out += 64
-       add     $64,%rdi
-       #   m += 64
-       add     $64,%rsi
-       # comment:fp stack unchanged by jump
-       # goto bytesatleast1
-       jmp     ._bytesatleast1
-ENDPROC(salsa20_encrypt_bytes)
-
-# enter salsa20_keysetup
-ENTRY(salsa20_keysetup)
-       mov     %rsp,%r11
-       and     $31,%r11
-       add     $256,%r11
-       sub     %r11,%rsp
-       #   k = arg2
-       mov     %rsi,%rsi
-       #   kbits = arg3
-       mov     %rdx,%rdx
-       #   x = arg1
-       mov     %rdi,%rdi
-       #   in0 = *(uint64 *) (k + 0)
-       movq    0(%rsi),%r8
-       #   in2 = *(uint64 *) (k + 8)
-       movq    8(%rsi),%r9
-       #   *(uint64 *) (x + 4) = in0
-       movq    %r8,4(%rdi)
-       #   *(uint64 *) (x + 12) = in2
-       movq    %r9,12(%rdi)
-       #                    unsigned<? kbits - 256
-       cmp     $256,%rdx
-       # comment:fp stack unchanged by jump
-       #   goto kbits128 if unsigned<
-       jb      ._kbits128
-#   kbits256:
-._kbits256:
-       #     in10 = *(uint64 *) (k + 16)
-       movq    16(%rsi),%rdx
-       #     in12 = *(uint64 *) (k + 24)
-       movq    24(%rsi),%rsi
-       #     *(uint64 *) (x + 44) = in10
-       movq    %rdx,44(%rdi)
-       #     *(uint64 *) (x + 52) = in12
-       movq    %rsi,52(%rdi)
-       #     in0 = 1634760805
-       mov     $1634760805,%rsi
-       #     in4 = 857760878
-       mov     $857760878,%rdx
-       #     in10 = 2036477234
-       mov     $2036477234,%rcx
-       #     in14 = 1797285236
-       mov     $1797285236,%r8
-       #     *(uint32 *) (x + 0) = in0
-       movl    %esi,0(%rdi)
-       #     *(uint32 *) (x + 20) = in4
-       movl    %edx,20(%rdi)
-       #     *(uint32 *) (x + 40) = in10
-       movl    %ecx,40(%rdi)
-       #     *(uint32 *) (x + 60) = in14
-       movl    %r8d,60(%rdi)
-       # comment:fp stack unchanged by jump
-       #   goto keysetupdone
-       jmp     ._keysetupdone
-#   kbits128:
-._kbits128:
-       #     in10 = *(uint64 *) (k + 0)
-       movq    0(%rsi),%rdx
-       #     in12 = *(uint64 *) (k + 8)
-       movq    8(%rsi),%rsi
-       #     *(uint64 *) (x + 44) = in10
-       movq    %rdx,44(%rdi)
-       #     *(uint64 *) (x + 52) = in12
-       movq    %rsi,52(%rdi)
-       #     in0 = 1634760805
-       mov     $1634760805,%rsi
-       #     in4 = 824206446
-       mov     $824206446,%rdx
-       #     in10 = 2036477238
-       mov     $2036477238,%rcx
-       #     in14 = 1797285236
-       mov     $1797285236,%r8
-       #     *(uint32 *) (x + 0) = in0
-       movl    %esi,0(%rdi)
-       #     *(uint32 *) (x + 20) = in4
-       movl    %edx,20(%rdi)
-       #     *(uint32 *) (x + 40) = in10
-       movl    %ecx,40(%rdi)
-       #     *(uint32 *) (x + 60) = in14
-       movl    %r8d,60(%rdi)
-#   keysetupdone:
-._keysetupdone:
-       # leave
-       add     %r11,%rsp
-       mov     %rdi,%rax
-       mov     %rsi,%rdx
-       ret
-ENDPROC(salsa20_keysetup)
-
-# enter salsa20_ivsetup
-ENTRY(salsa20_ivsetup)
-       mov     %rsp,%r11
-       and     $31,%r11
-       add     $256,%r11
-       sub     %r11,%rsp
-       #   iv = arg2
-       mov     %rsi,%rsi
-       #   x = arg1
-       mov     %rdi,%rdi
-       #   in6 = *(uint64 *) (iv + 0)
-       movq    0(%rsi),%rsi
-       #   in8 = 0
-       mov     $0,%r8
-       #   *(uint64 *) (x + 24) = in6
-       movq    %rsi,24(%rdi)
-       #   *(uint64 *) (x + 32) = in8
-       movq    %r8,32(%rdi)
-       # leave
-       add     %r11,%rsp
-       mov     %rdi,%rax
-       mov     %rsi,%rdx
-       ret
-ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c

deleted file mode 100644 (file)

index cb91a64..0000000
--- a/arch/x86/crypto/salsa20_glue.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Glue code for optimized assembly version of  Salsa20.
- *
- * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
- *
- * The assembly codes are public domain assembly codes written by Daniel. J.
- * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
- * and to remove extraneous comments and functions that are not needed.
- * - i586 version, renamed as salsa20-i586-asm_32.S
- *   available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
- * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
- *   available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/algapi.h>
-#include <linux/module.h>
-#include <linux/crypto.h>
-
-#define SALSA20_IV_SIZE        8U
-#define SALSA20_MIN_KEY_SIZE  16U
-#define SALSA20_MAX_KEY_SIZE  32U
-
-struct salsa20_ctx
-{
-       u32 input[16];
-};
-
-asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
-                                u32 keysize, u32 ivsize);
-asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
-asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
-                                     const u8 *src, u8 *dst, u32 bytes);
-
-static int setkey(struct crypto_tfm *tfm, const u8 *key,
-                 unsigned int keysize)
-{
-       struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
-       salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
-       return 0;
-}
-
-static int encrypt(struct blkcipher_desc *desc,
-                  struct scatterlist *dst, struct scatterlist *src,
-                  unsigned int nbytes)
-{
-       struct blkcipher_walk walk;
-       struct crypto_blkcipher *tfm = desc->tfm;
-       struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
-       int err;
-
-       blkcipher_walk_init(&walk, dst, src, nbytes);
-       err = blkcipher_walk_virt_block(desc, &walk, 64);
-
-       salsa20_ivsetup(ctx, walk.iv);
-
-       while (walk.nbytes >= 64) {
-               salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-                                     walk.dst.virt.addr,
-                                     walk.nbytes - (walk.nbytes % 64));
-               err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
-       }
-
-       if (walk.nbytes) {
-               salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-                                     walk.dst.virt.addr, walk.nbytes);
-               err = blkcipher_walk_done(desc, &walk, 0);
-       }
-
-       return err;
-}
-
-static struct crypto_alg alg = {
-       .cra_name           =   "salsa20",
-       .cra_driver_name    =   "salsa20-asm",
-       .cra_priority       =   200,
-       .cra_flags          =   CRYPTO_ALG_TYPE_BLKCIPHER,
-       .cra_type           =   &crypto_blkcipher_type,
-       .cra_blocksize      =   1,
-       .cra_ctxsize        =   sizeof(struct salsa20_ctx),
-       .cra_alignmask      =   3,
-       .cra_module         =   THIS_MODULE,
-       .cra_u              =   {
-               .blkcipher = {
-                       .setkey         =   setkey,
-                       .encrypt        =   encrypt,
-                       .decrypt        =   encrypt,
-                       .min_keysize    =   SALSA20_MIN_KEY_SIZE,
-                       .max_keysize    =   SALSA20_MAX_KEY_SIZE,
-                       .ivsize         =   SALSA20_IV_SIZE,
-               }
-       }
-};
-
-static int __init init(void)
-{
-       return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
-       crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
-MODULE_ALIAS_CRYPTO("salsa20");
-MODULE_ALIAS_CRYPTO("salsa20-asm");
diff --git a/crypto/Kconfig b/crypto/Kconfig

index 9327fbfccf5ac94ee62b32771e39c1e90d754554..556307f4936da54eea793b7f07fc970f01cd2c8b 100644 (file)
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1335,32 +1335,6 @@ config CRYPTO_SALSA20
           The Salsa20 stream cipher algorithm is designed by Daniel J.
           Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
  
-config CRYPTO_SALSA20_586
-       tristate "Salsa20 stream cipher algorithm (i586)"
-       depends on (X86 || UML_X86) && !64BIT
-       select CRYPTO_BLKCIPHER
-       help
-         Salsa20 stream cipher algorithm.
-
-         Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
-         Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
-
-         The Salsa20 stream cipher algorithm is designed by Daniel J.
-         Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
-
-config CRYPTO_SALSA20_X86_64
-       tristate "Salsa20 stream cipher algorithm (x86_64)"
-       depends on (X86 || UML_X86) && 64BIT
-       select CRYPTO_BLKCIPHER
-       help
-         Salsa20 stream cipher algorithm.
-
-         Salsa20 is a stream cipher submitted to eSTREAM, the ECRYPT
-         Stream Cipher Project. See <http://www.ecrypt.eu.org/stream/>
-
-         The Salsa20 stream cipher algorithm is designed by Daniel J.
-         Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
-
  config CRYPTO_CHACHA20
         tristate "ChaCha20 cipher algorithm"
         select CRYPTO_BLKCIPHER
author	Eric Biggers <ebiggers@google.com>
	Sat, 26 May 2018 07:08:58 +0000 (00:08 -0700)
committer	Kleber Sacilotto de Souza <kleber.souza@canonical.com>
	Mon, 14 Jan 2019 09:28:55 +0000 (09:28 +0000)
arch/x86/crypto/Makefile		patch \| blob \| blame \| history
arch/x86/crypto/salsa20-i586-asm_32.S	[deleted file]	patch \| blob \| blame \| history
arch/x86/crypto/salsa20-x86_64-asm_64.S	[deleted file]	patch \| blob \| blame \| history
arch/x86/crypto/salsa20_glue.c	[deleted file]	patch \| blob \| blame \| history
crypto/Kconfig		patch \| blob \| blame \| history