]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Remove old or redundant SHA2 files
authorTino Reichardt <milky-zfs@mcmilk.de>
Mon, 27 Feb 2023 15:11:51 +0000 (16:11 +0100)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 2 Mar 2023 21:50:21 +0000 (13:50 -0800)
We had three sha2.h headers in different places.
The FreeBSD version, the Linux version and the generic solaris version.

The only assembly used for acceleration was some old x86-64 openssl
implementation for sha256 within the icp module.

For FreeBSD the whole SHA2 files of FreeBSD were copied into OpenZFS,
these files got removed also.

Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741

18 files changed:
include/os/freebsd/Makefile.am
include/os/freebsd/zfs/sys/sha2.h [deleted file]
include/os/linux/Makefile.am
include/os/linux/zfs/sys/sha2.h [deleted file]
lib/libspl/include/sys/sha2.h [deleted file]
module/Kbuild.in
module/Makefile.bsd
module/icp/algs/sha2/sha2.c [deleted file]
module/icp/asm-x86_64/sha2/sha256_impl.S [deleted file]
module/icp/asm-x86_64/sha2/sha512_impl.S [deleted file]
module/icp/include/sha2/sha2_consts.h [deleted file]
module/os/freebsd/spl/sha224.h [deleted file]
module/os/freebsd/spl/sha256.h [deleted file]
module/os/freebsd/spl/sha256c.c [deleted file]
module/os/freebsd/spl/sha384.h [deleted file]
module/os/freebsd/spl/sha512.h [deleted file]
module/os/freebsd/spl/sha512c.c [deleted file]
module/os/freebsd/spl/sha512t.h [deleted file]

index 89d4ef564d5fe9e0cdb8398f728111075bc32fda..c1ad40e2bc16f0b83f51f2ae98a51953451cde4d 100644 (file)
@@ -79,7 +79,6 @@ noinst_HEADERS = \
        %D%/spl/sys/zone.h \
        \
        %D%/zfs/sys/freebsd_crypto.h \
-       %D%/zfs/sys/sha2.h \
        %D%/zfs/sys/vdev_os.h \
        %D%/zfs/sys/zfs_bootenv_os.h \
        %D%/zfs/sys/zfs_context_os.h \
diff --git a/include/os/freebsd/zfs/sys/sha2.h b/include/os/freebsd/zfs/sys/sha2.h
deleted file mode 100644 (file)
index 1f520eb..0000000
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
-
-#ifndef _SYS_SHA2_H
-#define        _SYS_SHA2_H
-
-#include <sys/types.h>         /* for uint_* */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define        SHA256_DIGEST_LENGTH    32      /* SHA256 digest length in bytes */
-#define        SHA384_DIGEST_LENGTH    48      /* SHA384 digest length in bytes */
-#define        SHA512_DIGEST_LENGTH    64      /* SHA512 digest length in bytes */
-
-/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
-#define        SHA512_224_DIGEST_LENGTH        28      /* SHA512/224 digest length */
-#define        SHA512_256_DIGEST_LENGTH        32      /* SHA512/256 digest length */
-
-#define        SHA256_HMAC_BLOCK_SIZE  64      /* SHA256-HMAC block size */
-#define        SHA512_HMAC_BLOCK_SIZE  128     /* SHA512-HMAC block size */
-
-#define        SHA256                  0
-#define        SHA256_HMAC             1
-#define        SHA256_HMAC_GEN         2
-#define        SHA384                  3
-#define        SHA384_HMAC             4
-#define        SHA384_HMAC_GEN         5
-#define        SHA512                  6
-#define        SHA512_HMAC             7
-#define        SHA512_HMAC_GEN         8
-#define        SHA512_224              9
-#define        SHA512_256              10
-
-/*
- * SHA2 context.
- * The contents of this structure are a private interface between the
- * Init/Update/Final calls of the functions defined below.
- * Callers must never attempt to read or write any of the fields
- * in this structure directly.
- */
-
-#include <crypto/sha2/sha256.h>
-#include <crypto/sha2/sha384.h>
-#include <crypto/sha2/sha512.h>
-#include <crypto/sha2/sha512t.h>
-typedef struct         {
-       uint32_t algotype;              /* Algorithm Type */
-       union {
-               SHA256_CTX SHA256_ctx;
-               SHA384_CTX SHA384_ctx;
-               SHA512_CTX SHA512_ctx;
-       };
-} SHA2_CTX;
-
-extern void SHA256Init(SHA256_CTX *);
-
-extern void SHA256Update(SHA256_CTX *, const void *, size_t);
-
-extern void SHA256Final(void *, SHA256_CTX *);
-
-extern void SHA384Init(SHA384_CTX *);
-
-extern void SHA384Update(SHA384_CTX *, const void *, size_t);
-
-extern void SHA384Final(void *, SHA384_CTX *);
-
-extern void SHA512Init(SHA512_CTX *);
-
-extern void SHA512Update(SHA512_CTX *, const void *, size_t);
-
-extern void SHA512Final(void *, SHA512_CTX *);
-
-
-static inline void
-SHA2Init(uint64_t mech, SHA2_CTX *c)
-{
-       switch (mech) {
-               case SHA256:
-                       SHA256_Init(&c->SHA256_ctx);
-                       break;
-               case SHA384:
-                       SHA384_Init(&c->SHA384_ctx);
-                       break;
-               case SHA512:
-                       SHA512_Init(&c->SHA512_ctx);
-                       break;
-               case SHA512_256:
-                       SHA512_256_Init(&c->SHA512_ctx);
-                       break;
-               default:
-                       panic("unknown mechanism %ju", (uintmax_t)mech);
-       }
-       c->algotype = (uint32_t)mech;
-}
-
-static inline void
-SHA2Update(SHA2_CTX *c, const void *p, size_t s)
-{
-       switch (c->algotype) {
-               case SHA256:
-                       SHA256_Update(&c->SHA256_ctx, p, s);
-                       break;
-               case SHA384:
-                       SHA384_Update(&c->SHA384_ctx, p, s);
-                       break;
-               case SHA512:
-                       SHA512_Update(&c->SHA512_ctx, p, s);
-                       break;
-               case SHA512_256:
-                       SHA512_256_Update(&c->SHA512_ctx, p, s);
-                       break;
-               default:
-                       panic("unknown mechanism %d", c->algotype);
-       }
-}
-
-static inline void
-SHA2Final(void *p, SHA2_CTX *c)
-{
-       switch (c->algotype) {
-               case SHA256:
-                       SHA256_Final(p, &c->SHA256_ctx);
-                       break;
-               case SHA384:
-                       SHA384_Final(p, &c->SHA384_ctx);
-                       break;
-               case SHA512:
-                       SHA512_Final(p, &c->SHA512_ctx);
-                       break;
-               case SHA512_256:
-                       SHA512_256_Final(p, &c->SHA512_ctx);
-                       break;
-               default:
-                       panic("unknown mechanism %d", c->algotype);
-       }
-}
-
-#ifdef _SHA2_IMPL
-/*
- * The following types/functions are all private to the implementation
- * of the SHA2 functions and must not be used by consumers of the interface
- */
-
-/*
- * List of support mechanisms in this module.
- *
- * It is important to note that in the module, division or modulus calculations
- * are used on the enumerated type to determine which mechanism is being used;
- * therefore, changing the order or additional mechanisms should be done
- * carefully
- */
-typedef enum sha2_mech_type {
-       SHA256_MECH_INFO_TYPE,          /* SUN_CKM_SHA256 */
-       SHA256_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA256_HMAC */
-       SHA256_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA256_HMAC_GENERAL */
-       SHA384_MECH_INFO_TYPE,          /* SUN_CKM_SHA384 */
-       SHA384_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA384_HMAC */
-       SHA384_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA384_HMAC_GENERAL */
-       SHA512_MECH_INFO_TYPE,          /* SUN_CKM_SHA512 */
-       SHA512_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA512_HMAC */
-       SHA512_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA512_HMAC_GENERAL */
-       SHA512_224_MECH_INFO_TYPE,      /* SUN_CKM_SHA512_224 */
-       SHA512_256_MECH_INFO_TYPE       /* SUN_CKM_SHA512_256 */
-} sha2_mech_type_t;
-
-#endif /* _SHA2_IMPL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA2_H */
index e20702d332acec8924eca033ff3fdad99039175e..e821e075de4fbf0ddf4f9b5ad114802e8c3ce6c7 100644 (file)
@@ -19,7 +19,6 @@ kernel_linux_HEADERS = \
 kernel_sysdir = $(kerneldir)/sys
 kernel_sys_HEADERS = \
        %D%/zfs/sys/policy.h \
-       %D%/zfs/sys/sha2.h \
        %D%/zfs/sys/trace_acl.h \
        %D%/zfs/sys/trace_arc.h \
        %D%/zfs/sys/trace_common.h \
diff --git a/include/os/linux/zfs/sys/sha2.h b/include/os/linux/zfs/sys/sha2.h
deleted file mode 100644 (file)
index ef37139..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
-
-#ifndef _SYS_SHA2_H
-#define        _SYS_SHA2_H
-
-#include <sys/types.h>         /* for uint_* */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define        SHA256_DIGEST_LENGTH    32      /* SHA256 digest length in bytes */
-#define        SHA384_DIGEST_LENGTH    48      /* SHA384 digest length in bytes */
-#define        SHA512_DIGEST_LENGTH    64      /* SHA512 digest length in bytes */
-
-/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
-#define        SHA512_224_DIGEST_LENGTH        28      /* SHA512/224 digest length */
-#define        SHA512_256_DIGEST_LENGTH        32      /* SHA512/256 digest length */
-
-#define        SHA256_HMAC_BLOCK_SIZE  64      /* SHA256-HMAC block size */
-#define        SHA512_HMAC_BLOCK_SIZE  128     /* SHA512-HMAC block size */
-
-#define        SHA256                  0
-#define        SHA256_HMAC             1
-#define        SHA256_HMAC_GEN         2
-#define        SHA384                  3
-#define        SHA384_HMAC             4
-#define        SHA384_HMAC_GEN         5
-#define        SHA512                  6
-#define        SHA512_HMAC             7
-#define        SHA512_HMAC_GEN         8
-#define        SHA512_224              9
-#define        SHA512_256              10
-
-/*
- * SHA2 context.
- * The contents of this structure are a private interface between the
- * Init/Update/Final calls of the functions defined below.
- * Callers must never attempt to read or write any of the fields
- * in this structure directly.
- */
-typedef struct         {
-       uint32_t algotype;              /* Algorithm Type */
-
-       /* state (ABCDEFGH) */
-       union {
-               uint32_t s32[8];        /* for SHA256 */
-               uint64_t s64[8];        /* for SHA384/512 */
-       } state;
-       /* number of bits */
-       union {
-               uint32_t c32[2];        /* for SHA256 , modulo 2^64 */
-               uint64_t c64[2];        /* for SHA384/512, modulo 2^128 */
-       } count;
-       union {
-               uint8_t         buf8[128];      /* undigested input */
-               uint32_t        buf32[32];      /* realigned input */
-               uint64_t        buf64[16];      /* realigned input */
-       } buf_un;
-} SHA2_CTX;
-
-typedef SHA2_CTX SHA256_CTX;
-typedef SHA2_CTX SHA384_CTX;
-typedef SHA2_CTX SHA512_CTX;
-
-extern void SHA2Init(uint64_t mech, SHA2_CTX *);
-
-extern void SHA2Update(SHA2_CTX *, const void *, size_t);
-
-extern void SHA2Final(void *, SHA2_CTX *);
-
-extern void SHA256Init(SHA256_CTX *);
-
-extern void SHA256Update(SHA256_CTX *, const void *, size_t);
-
-extern void SHA256Final(void *, SHA256_CTX *);
-
-extern void SHA384Init(SHA384_CTX *);
-
-extern void SHA384Update(SHA384_CTX *, const void *, size_t);
-
-extern void SHA384Final(void *, SHA384_CTX *);
-
-extern void SHA512Init(SHA512_CTX *);
-
-extern void SHA512Update(SHA512_CTX *, const void *, size_t);
-
-extern void SHA512Final(void *, SHA512_CTX *);
-
-#ifdef _SHA2_IMPL
-/*
- * The following types/functions are all private to the implementation
- * of the SHA2 functions and must not be used by consumers of the interface
- */
-
-/*
- * List of support mechanisms in this module.
- *
- * It is important to note that in the module, division or modulus calculations
- * are used on the enumerated type to determine which mechanism is being used;
- * therefore, changing the order or additional mechanisms should be done
- * carefully
- */
-typedef enum sha2_mech_type {
-       SHA256_MECH_INFO_TYPE,          /* SUN_CKM_SHA256 */
-       SHA256_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA256_HMAC */
-       SHA256_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA256_HMAC_GENERAL */
-       SHA384_MECH_INFO_TYPE,          /* SUN_CKM_SHA384 */
-       SHA384_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA384_HMAC */
-       SHA384_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA384_HMAC_GENERAL */
-       SHA512_MECH_INFO_TYPE,          /* SUN_CKM_SHA512 */
-       SHA512_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA512_HMAC */
-       SHA512_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA512_HMAC_GENERAL */
-       SHA512_224_MECH_INFO_TYPE,      /* SUN_CKM_SHA512_224 */
-       SHA512_256_MECH_INFO_TYPE       /* SUN_CKM_SHA512_256 */
-} sha2_mech_type_t;
-
-#endif /* _SHA2_IMPL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA2_H */
diff --git a/lib/libspl/include/sys/sha2.h b/lib/libspl/include/sys/sha2.h
deleted file mode 100644 (file)
index 40db1a6..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
-
-#ifndef _SYS_SHA2_H
-#define        _SYS_SHA2_H
-
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define        SHA256_DIGEST_LENGTH    32      /* SHA256 digest length in bytes */
-#define        SHA384_DIGEST_LENGTH    48      /* SHA384 digest length in bytes */
-#define        SHA512_DIGEST_LENGTH    64      /* SHA512 digest length in bytes */
-
-/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
-#define        SHA512_224_DIGEST_LENGTH        28      /* SHA512/224 digest length */
-#define        SHA512_256_DIGEST_LENGTH        32      /* SHA512/256 digest length */
-
-#define        SHA256_HMAC_BLOCK_SIZE  64      /* SHA256-HMAC block size */
-#define        SHA512_HMAC_BLOCK_SIZE  128     /* SHA512-HMAC block size */
-
-#define        SHA256                  0
-#define        SHA256_HMAC             1
-#define        SHA256_HMAC_GEN         2
-#define        SHA384                  3
-#define        SHA384_HMAC             4
-#define        SHA384_HMAC_GEN         5
-#define        SHA512                  6
-#define        SHA512_HMAC             7
-#define        SHA512_HMAC_GEN         8
-#define        SHA512_224              9
-#define        SHA512_256              10
-
-/*
- * SHA2 context.
- * The contents of this structure are a private interface between the
- * Init/Update/Final calls of the functions defined below.
- * Callers must never attempt to read or write any of the fields
- * in this structure directly.
- */
-typedef struct         {
-       uint32_t algotype;              /* Algorithm Type */
-
-       /* state (ABCDEFGH) */
-       union {
-               uint32_t s32[8];        /* for SHA256 */
-               uint64_t s64[8];        /* for SHA384/512 */
-       } state;
-       /* number of bits */
-       union {
-               uint32_t c32[2];        /* for SHA256 , modulo 2^64 */
-               uint64_t c64[2];        /* for SHA384/512, modulo 2^128 */
-       } count;
-       union {
-               uint8_t         buf8[128];      /* undigested input */
-               uint32_t        buf32[32];      /* realigned input */
-               uint64_t        buf64[16];      /* realigned input */
-       } buf_un;
-} SHA2_CTX;
-
-typedef SHA2_CTX SHA256_CTX;
-typedef SHA2_CTX SHA384_CTX;
-typedef SHA2_CTX SHA512_CTX;
-
-extern void SHA256Init(SHA256_CTX *);
-
-extern void SHA256Update(SHA256_CTX *, const void *, size_t);
-
-extern void SHA256Final(void *, SHA256_CTX *);
-
-extern void SHA384Init(SHA384_CTX *);
-
-extern void SHA384Update(SHA384_CTX *, const void *, size_t);
-
-extern void SHA384Final(void *, SHA384_CTX *);
-
-extern void SHA512Init(SHA512_CTX *);
-
-extern void SHA512Update(SHA512_CTX *, const void *, size_t);
-
-extern void SHA512Final(void *, SHA512_CTX *);
-
-extern void SHA2Init(uint64_t mech, SHA2_CTX *);
-
-extern void SHA2Update(SHA2_CTX *, const void *, size_t);
-
-extern void SHA2Final(void *, SHA2_CTX *);
-
-#ifdef _SHA2_IMPL
-/*
- * The following types/functions are all private to the implementation
- * of the SHA2 functions and must not be used by consumers of the interface
- */
-
-/*
- * List of support mechanisms in this module.
- *
- * It is important to note that in the module, division or modulus calculations
- * are used on the enumerated type to determine which mechanism is being used;
- * therefore, changing the order or additional mechanisms should be done
- * carefully
- */
-typedef enum sha2_mech_type {
-       SHA256_MECH_INFO_TYPE,          /* SUN_CKM_SHA256 */
-       SHA256_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA256_HMAC */
-       SHA256_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA256_HMAC_GENERAL */
-       SHA384_MECH_INFO_TYPE,          /* SUN_CKM_SHA384 */
-       SHA384_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA384_HMAC */
-       SHA384_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA384_HMAC_GENERAL */
-       SHA512_MECH_INFO_TYPE,          /* SUN_CKM_SHA512 */
-       SHA512_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA512_HMAC */
-       SHA512_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA512_HMAC_GENERAL */
-       SHA512_224_MECH_INFO_TYPE,      /* SUN_CKM_SHA512_224 */
-       SHA512_256_MECH_INFO_TYPE       /* SUN_CKM_SHA512_256 */
-} sha2_mech_type_t;
-
-#endif /* _SHA2_IMPL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA2_H */
index a1ea08cd4348bfb89922c22b551f4194e2f1d9c5..6b1c9c48b1e6e2a26ffb2adb68d4a79dfdfacceb 100644 (file)
@@ -94,7 +94,6 @@ ICP_OBJS := \
        algs/modes/gcm.o \
        algs/modes/gcm_generic.o \
        algs/modes/modes.o \
-       algs/sha2/sha2.o \
        algs/skein/skein.o \
        algs/skein/skein_block.o \
        algs/skein/skein_iv.o \
@@ -122,9 +121,7 @@ ICP_OBJS_X86_64 := \
        asm-x86_64/blake3/blake3_sse41.o \
        asm-x86_64/modes/aesni-gcm-x86_64.o \
        asm-x86_64/modes/gcm_pclmulqdq.o \
-       asm-x86_64/modes/ghash-x86_64.o \
-       asm-x86_64/sha2/sha256_impl.o \
-       asm-x86_64/sha2/sha512_impl.o
+       asm-x86_64/modes/ghash-x86_64.o
 
 
 ICP_OBJS_X86 := \
@@ -159,13 +156,6 @@ $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
 # Suppress objtool "return with modified stack frame" warnings.
 OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
 
-# Suppress objtool "unsupported stack pointer realignment" warnings. We are
-# not using a DRAP register while aligning the stack to a 64 byte boundary.
-# See #6950 for the reasoning.
-OBJECT_FILES_NON_STANDARD_sha256_impl.o := y
-OBJECT_FILES_NON_STANDARD_sha512_impl.o := y
-
-
 LUA_OBJS := \
        lapi.o \
        lauxlib.o \
@@ -344,7 +334,7 @@ ZFS_OBJS := \
        refcount.o \
        rrwlock.o \
        sa.o \
-       sha256.o \
+       sha2_zfs.o \
        skein_zfs.o \
        spa.o \
        spa_checkpoint.o \
index 999dc90ff59f5f5f35af3f5eae3fb87542e0244c..1663dcec63c7491a717fef37345c70cbd67c5be5 100644 (file)
@@ -141,8 +141,6 @@ SRCS+=      nvpair.c \
 SRCS+= acl_common.c \
        callb.c \
        list.c \
-       sha256c.c \
-       sha512c.c \
        spl_acl.c \
        spl_cmn_err.c \
        spl_dtrace.c \
@@ -268,7 +266,7 @@ SRCS+=      abd.c \
        refcount.c \
        rrwlock.c \
        sa.c \
-       sha256.c \
+       sha2_zfs.c \
        skein_zfs.c \
        spa.c \
        spa_checkpoint.c \
diff --git a/module/icp/algs/sha2/sha2.c b/module/icp/algs/sha2/sha2.c
deleted file mode 100644 (file)
index e6bbe34..0000000
+++ /dev/null
@@ -1,957 +0,0 @@
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright 2013 Saso Kiselkov.  All rights reserved.
- */
-
-/*
- * The basic framework for this code came from the reference
- * implementation for MD5.  That implementation is Copyright (C)
- * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
- *
- * License to copy and use this software is granted provided that it
- * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
- * Algorithm" in all material mentioning or referencing this software
- * or this function.
- *
- * License is also granted to make and use derivative works provided
- * that such works are identified as "derived from the RSA Data
- * Security, Inc. MD5 Message-Digest Algorithm" in all material
- * mentioning or referencing the derived work.
- *
- * RSA Data Security, Inc. makes no representations concerning either
- * the merchantability of this software or the suitability of this
- * software for any particular purpose. It is provided "as is"
- * without express or implied warranty of any kind.
- *
- * These notices must be retained in any copies of any part of this
- * documentation and/or software.
- *
- * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
- * standard, available at
- * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
- * Not as fast as one would like -- further optimizations are encouraged
- * and appreciated.
- */
-
-#include <sys/zfs_context.h>
-#define        _SHA2_IMPL
-#include <sys/sha2.h>
-#include <sha2/sha2_consts.h>
-
-#define        _RESTRICT_KYWD
-
-#ifdef _ZFS_LITTLE_ENDIAN
-#include <sys/byteorder.h>
-#define        HAVE_HTONL
-#endif
-#include <sys/isa_defs.h>      /* for _ILP32 */
-#include <sys/asm_linkage.h>
-
-static void Encode(uint8_t *, uint32_t *, size_t);
-static void Encode64(uint8_t *, uint64_t *, size_t);
-
-/* userspace only supports the generic version */
-#if    defined(__amd64) && defined(_KERNEL)
-#define        SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
-#define        SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
-
-void ASMABI SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
-void ASMABI SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
-
-#else
-static void SHA256Transform(SHA2_CTX *, const uint8_t *);
-static void SHA512Transform(SHA2_CTX *, const uint8_t *);
-#endif /* __amd64 && _KERNEL */
-
-static const uint8_t PADDING[128] = { 0x80, /* all zeros */ };
-
-/*
- * The low-level checksum routines use a lot of stack space. On systems where
- * small stacks are enforced (like 32-bit kernel builds), insert compiler memory
- * barriers to reduce stack frame size. This can reduce the SHA512Transform()
- * stack frame usage from 3k to <1k on ARM32, for example.
- */
-#if defined(_ILP32) || defined(__powerpc)      /* small stack */
-#define        SMALL_STACK_MEMORY_BARRIER      asm volatile("": : :"memory");
-#else
-#define        SMALL_STACK_MEMORY_BARRIER
-#endif
-
-/* Ch and Maj are the basic SHA2 functions. */
-#define        Ch(b, c, d)     (((b) & (c)) ^ ((~b) & (d)))
-#define        Maj(b, c, d)    (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
-
-/* Rotates x right n bits. */
-#define        ROTR(x, n)      \
-       (((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
-
-/* Shift x right n bits */
-#define        SHR(x, n)       ((x) >> (n))
-
-/* SHA256 Functions */
-#define        BIGSIGMA0_256(x)        (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
-#define        BIGSIGMA1_256(x)        (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
-#define        SIGMA0_256(x)           (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
-#define        SIGMA1_256(x)           (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
-
-#define        SHA256ROUND(a, b, c, d, e, f, g, h, i, w)                       \
-       T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;  \
-       d += T1;                                                        \
-       T2 = BIGSIGMA0_256(a) + Maj(a, b, c);                           \
-       h = T1 + T2
-
-/* SHA384/512 Functions */
-#define        BIGSIGMA0(x)    (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
-#define        BIGSIGMA1(x)    (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
-#define        SIGMA0(x)       (ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
-#define        SIGMA1(x)       (ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
-#define        SHA512ROUND(a, b, c, d, e, f, g, h, i, w)                       \
-       T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;      \
-       d += T1;                                                        \
-       T2 = BIGSIGMA0(a) + Maj(a, b, c);                               \
-       h = T1 + T2;                                                    \
-       SMALL_STACK_MEMORY_BARRIER;
-
-/*
- * sparc optimization:
- *
- * on the sparc, we can load big endian 32-bit data easily.  note that
- * special care must be taken to ensure the address is 32-bit aligned.
- * in the interest of speed, we don't check to make sure, since
- * careful programming can guarantee this for us.
- */
-
-#if    defined(_ZFS_BIG_ENDIAN)
-#define        LOAD_BIG_32(addr)       (*(uint32_t *)(addr))
-#define        LOAD_BIG_64(addr)       (*(uint64_t *)(addr))
-
-#elif  defined(HAVE_HTONL)
-#define        LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
-#define        LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
-
-#else
-/* little endian -- will work on big endian, but slowly */
-#define        LOAD_BIG_32(addr)       \
-       (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
-#define        LOAD_BIG_64(addr)       \
-       (((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |    \
-           ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \
-           ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \
-           ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
-#endif /* _BIG_ENDIAN */
-
-
-#if    !defined(__amd64) || !defined(_KERNEL)
-/* SHA256 Transform */
-
-static void
-SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
-{
-       uint32_t a = ctx->state.s32[0];
-       uint32_t b = ctx->state.s32[1];
-       uint32_t c = ctx->state.s32[2];
-       uint32_t d = ctx->state.s32[3];
-       uint32_t e = ctx->state.s32[4];
-       uint32_t f = ctx->state.s32[5];
-       uint32_t g = ctx->state.s32[6];
-       uint32_t h = ctx->state.s32[7];
-
-       uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
-       uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
-       uint32_t T1, T2;
-
-#if    defined(__sparc)
-       static const uint32_t sha256_consts[] = {
-               SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
-               SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
-               SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
-               SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
-               SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
-               SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
-               SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
-               SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
-               SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
-               SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
-               SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
-               SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
-               SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
-               SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
-               SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
-               SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
-               SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
-               SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
-               SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
-               SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
-               SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
-               SHA256_CONST_63
-       };
-#endif /* __sparc */
-
-       if ((uintptr_t)blk & 0x3) {             /* not 4-byte aligned? */
-               memcpy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32));
-               blk = (uint8_t *)ctx->buf_un.buf32;
-       }
-
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w0 =  LOAD_BIG_32(blk + 4 * 0);
-       SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w1 =  LOAD_BIG_32(blk + 4 * 1);
-       SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w2 =  LOAD_BIG_32(blk + 4 * 2);
-       SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w3 =  LOAD_BIG_32(blk + 4 * 3);
-       SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w4 =  LOAD_BIG_32(blk + 4 * 4);
-       SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w5 =  LOAD_BIG_32(blk + 4 * 5);
-       SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w6 =  LOAD_BIG_32(blk + 4 * 6);
-       SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w7 =  LOAD_BIG_32(blk + 4 * 7);
-       SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w8 =  LOAD_BIG_32(blk + 4 * 8);
-       SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w9 =  LOAD_BIG_32(blk + 4 * 9);
-       SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w10 =  LOAD_BIG_32(blk + 4 * 10);
-       SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w11 =  LOAD_BIG_32(blk + 4 * 11);
-       SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w12 =  LOAD_BIG_32(blk + 4 * 12);
-       SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w13 =  LOAD_BIG_32(blk + 4 * 13);
-       SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w14 =  LOAD_BIG_32(blk + 4 * 14);
-       SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w15 =  LOAD_BIG_32(blk + 4 * 15);
-       SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
-       w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
-       SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
-       w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
-       SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
-       w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
-       SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
-       w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
-       SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
-       w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
-       SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
-       w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
-       SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
-       w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
-       SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
-       w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
-       SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
-       w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
-       SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
-       w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
-       SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
-       w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
-       SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
-       w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
-       SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
-       w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
-       SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
-       w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
-       SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
-       w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
-       SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
-       w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
-       SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
-       w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
-       SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
-       w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
-       SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
-       w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
-       SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
-       w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
-       SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
-       w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
-       SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
-       w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
-       SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
-       w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
-       SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
-       w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
-       SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
-       w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
-       SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
-       w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
-       SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
-       w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
-       SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
-       w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
-       SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
-       w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
-       SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
-       w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
-       SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
-       w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
-       SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
-       w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
-       SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
-       w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
-       SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
-       w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
-       SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
-       w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
-       SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
-       w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
-       SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
-       w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
-       SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
-       w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
-       SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
-       w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
-       SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
-       w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
-       SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
-       w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
-       SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
-       w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
-       SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
-       w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
-       SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
-       w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
-       SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
-       w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
-       SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
-       w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
-       SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
-       w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
-       SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
-       w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
-       SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
-
-       ctx->state.s32[0] += a;
-       ctx->state.s32[1] += b;
-       ctx->state.s32[2] += c;
-       ctx->state.s32[3] += d;
-       ctx->state.s32[4] += e;
-       ctx->state.s32[5] += f;
-       ctx->state.s32[6] += g;
-       ctx->state.s32[7] += h;
-}
-
-
-/* SHA384 and SHA512 Transform */
-
-static void
-SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
-{
-
-       uint64_t a = ctx->state.s64[0];
-       uint64_t b = ctx->state.s64[1];
-       uint64_t c = ctx->state.s64[2];
-       uint64_t d = ctx->state.s64[3];
-       uint64_t e = ctx->state.s64[4];
-       uint64_t f = ctx->state.s64[5];
-       uint64_t g = ctx->state.s64[6];
-       uint64_t h = ctx->state.s64[7];
-
-       uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
-       uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
-       uint64_t T1, T2;
-
-#if    defined(__sparc)
-       static const uint64_t sha512_consts[] = {
-               SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
-               SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
-               SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
-               SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
-               SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
-               SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
-               SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
-               SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
-               SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
-               SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
-               SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
-               SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
-               SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
-               SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
-               SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
-               SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
-               SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
-               SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
-               SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
-               SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
-               SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
-               SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
-               SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
-               SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
-               SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
-               SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
-               SHA512_CONST_78, SHA512_CONST_79
-       };
-#endif /* __sparc */
-
-
-       if ((uintptr_t)blk & 0x7) {             /* not 8-byte aligned? */
-               memcpy(ctx->buf_un.buf64, blk, sizeof (ctx->buf_un.buf64));
-               blk = (uint8_t *)ctx->buf_un.buf64;
-       }
-
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w0 =  LOAD_BIG_64(blk + 8 * 0);
-       SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w1 =  LOAD_BIG_64(blk + 8 * 1);
-       SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w2 =  LOAD_BIG_64(blk + 8 * 2);
-       SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w3 =  LOAD_BIG_64(blk + 8 * 3);
-       SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w4 =  LOAD_BIG_64(blk + 8 * 4);
-       SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w5 =  LOAD_BIG_64(blk + 8 * 5);
-       SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w6 =  LOAD_BIG_64(blk + 8 * 6);
-       SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w7 =  LOAD_BIG_64(blk + 8 * 7);
-       SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w8 =  LOAD_BIG_64(blk + 8 * 8);
-       SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w9 =  LOAD_BIG_64(blk + 8 * 9);
-       SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w10 =  LOAD_BIG_64(blk + 8 * 10);
-       SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w11 =  LOAD_BIG_64(blk + 8 * 11);
-       SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w12 =  LOAD_BIG_64(blk + 8 * 12);
-       SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w13 =  LOAD_BIG_64(blk + 8 * 13);
-       SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w14 =  LOAD_BIG_64(blk + 8 * 14);
-       SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
-       /* LINTED E_BAD_PTR_CAST_ALIGN */
-       w15 =  LOAD_BIG_64(blk + 8 * 15);
-       SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
-
-       w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
-       SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
-       w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
-       SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
-       w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
-       SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
-       w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
-       SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
-       w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
-       SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
-       w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
-       SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
-       w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
-       SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
-       w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
-       SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
-       w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
-       SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
-       w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
-       SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
-       w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
-       SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
-       w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
-       SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
-       w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
-       SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
-       w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
-       SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
-       w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
-       SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
-       w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
-       SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
-
-       w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
-       SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
-       w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
-       SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
-       w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
-       SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
-       w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
-       SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
-       w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
-       SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
-       w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
-       SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
-       w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
-       SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
-       w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
-       SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
-       w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
-       SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
-       w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
-       SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
-       w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
-       SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
-       w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
-       SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
-       w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
-       SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
-       w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
-       SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
-       w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
-       SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
-       w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
-       SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
-
-       w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
-       SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
-       w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
-       SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
-       w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
-       SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
-       w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
-       SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
-       w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
-       SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
-       w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
-       SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
-       w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
-       SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
-       w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
-       SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
-       w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
-       SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
-       w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
-       SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
-       w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
-       SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
-       w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
-       SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
-       w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
-       SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
-       w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
-       SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
-       w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
-       SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
-       w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
-       SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
-
-       w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
-       SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
-       w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
-       SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
-       w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
-       SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
-       w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
-       SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
-       w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
-       SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
-       w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
-       SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
-       w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
-       SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
-       w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
-       SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
-       w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
-       SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
-       w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
-       SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
-       w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
-       SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
-       w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
-       SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
-       w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
-       SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
-       w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
-       SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
-       w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
-       SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
-       w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
-       SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
-
-       ctx->state.s64[0] += a;
-       ctx->state.s64[1] += b;
-       ctx->state.s64[2] += c;
-       ctx->state.s64[3] += d;
-       ctx->state.s64[4] += e;
-       ctx->state.s64[5] += f;
-       ctx->state.s64[6] += g;
-       ctx->state.s64[7] += h;
-
-}
-#endif /* !__amd64 || !_KERNEL */
-
-
-/*
- * Encode()
- *
- * purpose: to convert a list of numbers from little endian to big endian
- *   input: uint8_t *  : place to store the converted big endian numbers
- *         uint32_t *  : place to get numbers to convert from
- *          size_t     : the length of the input in bytes
- *  output: void
- */
-
-static void
-Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
-    size_t len)
-{
-       size_t          i, j;
-
-#if    defined(__sparc)
-       if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
-               for (i = 0, j = 0; j < len; i++, j += 4) {
-                       /* LINTED E_BAD_PTR_CAST_ALIGN */
-                       *((uint32_t *)(output + j)) = input[i];
-               }
-       } else {
-#endif /* little endian -- will work on big endian, but slowly */
-               for (i = 0, j = 0; j < len; i++, j += 4) {
-                       output[j]       = (input[i] >> 24) & 0xff;
-                       output[j + 1]   = (input[i] >> 16) & 0xff;
-                       output[j + 2]   = (input[i] >>  8) & 0xff;
-                       output[j + 3]   = input[i] & 0xff;
-               }
-#if    defined(__sparc)
-       }
-#endif
-}
-
-static void
-Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
-    size_t len)
-{
-       size_t          i, j;
-
-#if    defined(__sparc)
-       if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
-               for (i = 0, j = 0; j < len; i++, j += 8) {
-                       /* LINTED E_BAD_PTR_CAST_ALIGN */
-                       *((uint64_t *)(output + j)) = input[i];
-               }
-       } else {
-#endif /* little endian -- will work on big endian, but slowly */
-               for (i = 0, j = 0; j < len; i++, j += 8) {
-
-                       output[j]       = (input[i] >> 56) & 0xff;
-                       output[j + 1]   = (input[i] >> 48) & 0xff;
-                       output[j + 2]   = (input[i] >> 40) & 0xff;
-                       output[j + 3]   = (input[i] >> 32) & 0xff;
-                       output[j + 4]   = (input[i] >> 24) & 0xff;
-                       output[j + 5]   = (input[i] >> 16) & 0xff;
-                       output[j + 6]   = (input[i] >>  8) & 0xff;
-                       output[j + 7]   = input[i] & 0xff;
-               }
-#if    defined(__sparc)
-       }
-#endif
-}
-
-
-void
-SHA2Init(uint64_t mech, SHA2_CTX *ctx)
-{
-
-       switch (mech) {
-       case SHA256_MECH_INFO_TYPE:
-       case SHA256_HMAC_MECH_INFO_TYPE:
-       case SHA256_HMAC_GEN_MECH_INFO_TYPE:
-               ctx->state.s32[0] = 0x6a09e667U;
-               ctx->state.s32[1] = 0xbb67ae85U;
-               ctx->state.s32[2] = 0x3c6ef372U;
-               ctx->state.s32[3] = 0xa54ff53aU;
-               ctx->state.s32[4] = 0x510e527fU;
-               ctx->state.s32[5] = 0x9b05688cU;
-               ctx->state.s32[6] = 0x1f83d9abU;
-               ctx->state.s32[7] = 0x5be0cd19U;
-               break;
-       case SHA384_MECH_INFO_TYPE:
-       case SHA384_HMAC_MECH_INFO_TYPE:
-       case SHA384_HMAC_GEN_MECH_INFO_TYPE:
-               ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
-               ctx->state.s64[1] = 0x629a292a367cd507ULL;
-               ctx->state.s64[2] = 0x9159015a3070dd17ULL;
-               ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
-               ctx->state.s64[4] = 0x67332667ffc00b31ULL;
-               ctx->state.s64[5] = 0x8eb44a8768581511ULL;
-               ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
-               ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
-               break;
-       case SHA512_MECH_INFO_TYPE:
-       case SHA512_HMAC_MECH_INFO_TYPE:
-       case SHA512_HMAC_GEN_MECH_INFO_TYPE:
-               ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
-               ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
-               ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
-               ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
-               ctx->state.s64[4] = 0x510e527fade682d1ULL;
-               ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
-               ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
-               ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
-               break;
-       case SHA512_224_MECH_INFO_TYPE:
-               ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
-               ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
-               ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
-               ctx->state.s64[3] = 0x679DD514582F9FCFULL;
-               ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
-               ctx->state.s64[5] = 0x77E36F7304C48942ULL;
-               ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
-               ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
-               break;
-       case SHA512_256_MECH_INFO_TYPE:
-               ctx->state.s64[0] = 0x22312194FC2BF72CULL;
-               ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
-               ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
-               ctx->state.s64[3] = 0x963877195940EABDULL;
-               ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
-               ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
-               ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
-               ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
-               break;
-#ifdef _KERNEL
-       default:
-               cmn_err(CE_PANIC,
-                   "sha2_init: failed to find a supported algorithm: 0x%x",
-                   (uint32_t)mech);
-
-#endif /* _KERNEL */
-       }
-
-       ctx->algotype = (uint32_t)mech;
-       ctx->count.c64[0] = ctx->count.c64[1] = 0;
-}
-
-#ifndef _KERNEL
-
-// #pragma inline(SHA256Init, SHA384Init, SHA512Init)
-void
-SHA256Init(SHA256_CTX *ctx)
-{
-       SHA2Init(SHA256, ctx);
-}
-
-void
-SHA384Init(SHA384_CTX *ctx)
-{
-       SHA2Init(SHA384, ctx);
-}
-
-void
-SHA512Init(SHA512_CTX *ctx)
-{
-       SHA2Init(SHA512, ctx);
-}
-
-#endif /* _KERNEL */
-
-/*
- * SHA2Update()
- *
- * purpose: continues an sha2 digest operation, using the message block
- *          to update the context.
- *   input: SHA2_CTX * : the context to update
- *          void *     : the message block
- *          size_t      : the length of the message block, in bytes
- *  output: void
- */
-
-void
-SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
-{
-       uint32_t        i, buf_index, buf_len, buf_limit;
-       const uint8_t   *input = inptr;
-       uint32_t        algotype = ctx->algotype;
-
-       /* check for noop */
-       if (input_len == 0)
-               return;
-
-       if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
-               buf_limit = 64;
-
-               /* compute number of bytes mod 64 */
-               buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
-
-               /* update number of bits */
-               if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
-                       ctx->count.c32[0]++;
-
-               ctx->count.c32[0] += (input_len >> 29);
-
-       } else {
-               buf_limit = 128;
-
-               /* compute number of bytes mod 128 */
-               buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
-
-               /* update number of bits */
-               if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
-                       ctx->count.c64[0]++;
-
-               ctx->count.c64[0] += (input_len >> 29);
-       }
-
-       buf_len = buf_limit - buf_index;
-
-       /* transform as many times as possible */
-       i = 0;
-       if (input_len >= buf_len) {
-
-               /*
-                * general optimization:
-                *
-                * only do initial memcpy() and SHA2Transform() if
-                * buf_index != 0.  if buf_index == 0, we're just
-                * wasting our time doing the memcpy() since there
-                * wasn't any data left over from a previous call to
-                * SHA2Update().
-                */
-               if (buf_index) {
-                       memcpy(&ctx->buf_un.buf8[buf_index], input, buf_len);
-                       if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
-                               SHA256Transform(ctx, ctx->buf_un.buf8);
-                       else
-                               SHA512Transform(ctx, ctx->buf_un.buf8);
-
-                       i = buf_len;
-               }
-
-#if !defined(__amd64) || !defined(_KERNEL)
-               if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
-                       for (; i + buf_limit - 1 < input_len; i += buf_limit) {
-                               SHA256Transform(ctx, &input[i]);
-                       }
-               } else {
-                       for (; i + buf_limit - 1 < input_len; i += buf_limit) {
-                               SHA512Transform(ctx, &input[i]);
-                       }
-               }
-
-#else
-               uint32_t block_count;
-               if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
-                       block_count = (input_len - i) >> 6;
-                       if (block_count > 0) {
-                               SHA256TransformBlocks(ctx, &input[i],
-                                   block_count);
-                               i += block_count << 6;
-                       }
-               } else {
-                       block_count = (input_len - i) >> 7;
-                       if (block_count > 0) {
-                               SHA512TransformBlocks(ctx, &input[i],
-                                   block_count);
-                               i += block_count << 7;
-                       }
-               }
-#endif /* !__amd64 || !_KERNEL */
-
-               /*
-                * general optimization:
-                *
-                * if i and input_len are the same, return now instead
-                * of calling memcpy(), since the memcpy() in this case
-                * will be an expensive noop.
-                */
-
-               if (input_len == i)
-                       return;
-
-               buf_index = 0;
-       }
-
-       /* buffer remaining input */
-       memcpy(&ctx->buf_un.buf8[buf_index], &input[i], input_len - i);
-}
-
-
-/*
- * SHA2Final()
- *
- * purpose: ends an sha2 digest operation, finalizing the message digest and
- *          zeroing the context.
- *   input: uchar_t *  : a buffer to store the digest
- *                     : The function actually uses void* because many
- *                     : callers pass things other than uchar_t here.
- *          SHA2_CTX *  : the context to finalize, save, and zero
- *  output: void
- */
-
-void
-SHA2Final(void *digest, SHA2_CTX *ctx)
-{
-       uint8_t         bitcount_be[sizeof (ctx->count.c32)];
-       uint8_t         bitcount_be64[sizeof (ctx->count.c64)];
-       uint32_t        index;
-       uint32_t        algotype = ctx->algotype;
-
-       if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
-               index  = (ctx->count.c32[1] >> 3) & 0x3f;
-               Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
-               SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
-               SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
-               Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
-       } else {
-               index  = (ctx->count.c64[1] >> 3) & 0x7f;
-               Encode64(bitcount_be64, ctx->count.c64,
-                   sizeof (bitcount_be64));
-               SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
-               SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
-               if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
-                       ctx->state.s64[6] = ctx->state.s64[7] = 0;
-                       Encode64(digest, ctx->state.s64,
-                           sizeof (uint64_t) * 6);
-               } else if (algotype == SHA512_224_MECH_INFO_TYPE) {
-                       uint8_t last[sizeof (uint64_t)];
-                       /*
-                        * Since SHA-512/224 doesn't align well to 64-bit
-                        * boundaries, we must do the encoding in three steps:
-                        * 1) encode the three 64-bit words that fit neatly
-                        * 2) encode the last 64-bit word to a temp buffer
-                        * 3) chop out the lower 32-bits from the temp buffer
-                        *    and append them to the digest
-                        */
-                       Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
-                       Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
-                       memcpy((uint8_t *)digest + 24, last, 4);
-               } else if (algotype == SHA512_256_MECH_INFO_TYPE) {
-                       Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
-               } else {
-                       Encode64(digest, ctx->state.s64,
-                           sizeof (ctx->state.s64));
-               }
-       }
-
-       /* zeroize sensitive information */
-       memset(ctx, 0, sizeof (*ctx));
-}
-
-#ifdef _KERNEL
-EXPORT_SYMBOL(SHA2Init);
-EXPORT_SYMBOL(SHA2Update);
-EXPORT_SYMBOL(SHA2Final);
-#endif
diff --git a/module/icp/asm-x86_64/sha2/sha256_impl.S b/module/icp/asm-x86_64/sha2/sha256_impl.S
deleted file mode 100644 (file)
index f1fde51..0000000
+++ /dev/null
@@ -1,2090 +0,0 @@
-/*
- * ====================================================================
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- * project. Rights for redistribution and usage in source and binary
- * forms are granted according to the OpenSSL license.
- * ====================================================================
- *
- * sha256/512_block procedure for x86_64.
- *
- * 40% improvement over compiler-generated code on Opteron. On EM64T
- * sha256 was observed to run >80% faster and sha512 - >40%. No magical
- * tricks, just straight implementation... I really wonder why gcc
- * [being armed with inline assembler] fails to generate as fast code.
- * The only thing which is cool about this module is that it's very
- * same instruction sequence used for both SHA-256 and SHA-512. In
- * former case the instructions operate on 32-bit operands, while in
- * latter - on 64-bit ones. All I had to do is to get one flavor right,
- * the other one passed the test right away:-)
- *
- * sha256_block runs in ~1005 cycles on Opteron, which gives you
- * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
- * frequency in GHz. sha512_block runs in ~1275 cycles, which results
- * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
- * Well, if you compare it to IA-64 implementation, which maintains
- * X[16] in register bank[!], tends to 4 instructions per CPU clock
- * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
- * issue Opteron pipeline and X[16] maintained in memory. So that *if*
- * there is a way to improve it, *then* the only way would be to try to
- * offload X[16] updates to SSE unit, but that would require "deeper"
- * loop unroll, which in turn would naturally cause size blow-up, not
- * to mention increased complexity! And once again, only *if* it's
- * actually possible to noticeably improve overall ILP, instruction
- * level parallelism, on a given CPU implementation in this case.
- *
- * Special note on Intel EM64T. While Opteron CPU exhibits perfect
- * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
- * [currently available] EM64T CPUs apparently are far from it. On the
- * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
- * sha256_block:-( This is presumably because 64-bit shifts/rotates
- * apparently are not atomic instructions, but implemented in microcode.
- */
-
-/*
- * OpenSolaris OS modifications
- *
- * Sun elects to use this software under the BSD license.
- *
- * This source originates from OpenSSL file sha512-x86_64.pl at
- * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
- * (presumably for future OpenSSL release 0.9.8h), with these changes:
- *
- * 1. Added perl "use strict" and declared variables.
- *
- * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
- * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
- *
- * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
- * assemblers).  Replaced the .picmeup macro with assembler code.
- *
- * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
- * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
- */
-
-/*
- * This file was generated by a perl script (sha512-x86_64.pl) that were
- * used to generate sha256 and sha512 variants from the same code base.
- * The comments from the original file have been pasted above.
- */
-
-#if defined(lint) || defined(__lint)
-#include <sys/stdint.h>
-#include <sha2/sha2.h>
-
-void
-SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
-{
-       (void) ctx, (void) in, (void) num;
-}
-
-
-#else
-#define _ASM
-#include <sys/asm_linkage.h>
-
-ENTRY_NP(SHA256TransformBlocks)
-.cfi_startproc
-       ENDBR
-       movq    %rsp, %rax
-.cfi_def_cfa_register %rax
-       push    %rbx
-.cfi_offset    %rbx,-16
-       push    %rbp
-.cfi_offset    %rbp,-24
-       push    %r12
-.cfi_offset    %r12,-32
-       push    %r13
-.cfi_offset    %r13,-40
-       push    %r14
-.cfi_offset    %r14,-48
-       push    %r15
-.cfi_offset    %r15,-56
-       mov     %rsp,%rbp               # copy %rsp
-       shl     $4,%rdx         # num*16
-       sub     $16*4+4*8,%rsp
-       lea     (%rsi,%rdx,4),%rdx      # inp+num*16*4
-       and     $-64,%rsp               # align stack frame
-       add     $8,%rdi         # Skip OpenSolaris field, "algotype"
-       mov     %rdi,16*4+0*8(%rsp)             # save ctx, 1st arg
-       mov     %rsi,16*4+1*8(%rsp)             # save inp, 2nd arg
-       mov     %rdx,16*4+2*8(%rsp)             # save end pointer, "3rd" arg
-       mov     %rbp,16*4+3*8(%rsp)             # save copy of %rsp
-# echo ".cfi_cfa_expression %rsp+88,deref,+56" |
-#      openssl/crypto/perlasm/x86_64-xlate.pl
-.cfi_escape    0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38
-
-       #.picmeup %rbp
-       # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
-       # the address of the "next" instruction into the target register
-       # (%rbp).  This generates these 2 instructions:
-       lea     .Llea(%rip),%rbp
-       #nop    # .picmeup generates a nop for mod 8 alignment--not needed here
-
-.Llea:
-       lea     K256-.(%rbp),%rbp
-
-       mov     4*0(%rdi),%eax
-       mov     4*1(%rdi),%ebx
-       mov     4*2(%rdi),%ecx
-       mov     4*3(%rdi),%edx
-       mov     4*4(%rdi),%r8d
-       mov     4*5(%rdi),%r9d
-       mov     4*6(%rdi),%r10d
-       mov     4*7(%rdi),%r11d
-       jmp     .Lloop
-
-.balign        16
-.Lloop:
-       xor     %rdi,%rdi
-       mov     4*0(%rsi),%r12d
-       bswap   %r12d
-       mov     %r8d,%r13d
-       mov     %r8d,%r14d
-       mov     %r9d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r10d,%r15d                     # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r8d,%r15d                      # (f^g)&e
-       mov     %r12d,0(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r10d,%r15d                     # Ch(e,f,g)=((f^g)&e)^g
-       add     %r11d,%r12d                     # T1+=h
-
-       mov     %eax,%r11d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %eax,%r13d
-       mov     %eax,%r14d
-
-       ror     $2,%r11d
-       ror     $13,%r13d
-       mov     %eax,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r11d
-       ror     $9,%r13d
-       or      %ecx,%r14d                      # a|c
-
-       xor     %r13d,%r11d                     # h=Sigma0(a)
-       and     %ecx,%r15d                      # a&c
-       add     %r12d,%edx                      # d+=T1
-
-       and     %ebx,%r14d                      # (a|c)&b
-       add     %r12d,%r11d                     # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r11d                     # h+=Maj(a,b,c)
-       mov     4*1(%rsi),%r12d
-       bswap   %r12d
-       mov     %edx,%r13d
-       mov     %edx,%r14d
-       mov     %r8d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r9d,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %edx,%r15d                      # (f^g)&e
-       mov     %r12d,4(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r9d,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r10d,%r12d                     # T1+=h
-
-       mov     %r11d,%r10d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r11d,%r13d
-       mov     %r11d,%r14d
-
-       ror     $2,%r10d
-       ror     $13,%r13d
-       mov     %r11d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r10d
-       ror     $9,%r13d
-       or      %ebx,%r14d                      # a|c
-
-       xor     %r13d,%r10d                     # h=Sigma0(a)
-       and     %ebx,%r15d                      # a&c
-       add     %r12d,%ecx                      # d+=T1
-
-       and     %eax,%r14d                      # (a|c)&b
-       add     %r12d,%r10d                     # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r10d                     # h+=Maj(a,b,c)
-       mov     4*2(%rsi),%r12d
-       bswap   %r12d
-       mov     %ecx,%r13d
-       mov     %ecx,%r14d
-       mov     %edx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r8d,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %ecx,%r15d                      # (f^g)&e
-       mov     %r12d,8(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r8d,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r9d,%r12d                      # T1+=h
-
-       mov     %r10d,%r9d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r10d,%r13d
-       mov     %r10d,%r14d
-
-       ror     $2,%r9d
-       ror     $13,%r13d
-       mov     %r10d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r9d
-       ror     $9,%r13d
-       or      %eax,%r14d                      # a|c
-
-       xor     %r13d,%r9d                      # h=Sigma0(a)
-       and     %eax,%r15d                      # a&c
-       add     %r12d,%ebx                      # d+=T1
-
-       and     %r11d,%r14d                     # (a|c)&b
-       add     %r12d,%r9d                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r9d                      # h+=Maj(a,b,c)
-       mov     4*3(%rsi),%r12d
-       bswap   %r12d
-       mov     %ebx,%r13d
-       mov     %ebx,%r14d
-       mov     %ecx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %edx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %ebx,%r15d                      # (f^g)&e
-       mov     %r12d,12(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %edx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r8d,%r12d                      # T1+=h
-
-       mov     %r9d,%r8d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r9d,%r13d
-       mov     %r9d,%r14d
-
-       ror     $2,%r8d
-       ror     $13,%r13d
-       mov     %r9d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r8d
-       ror     $9,%r13d
-       or      %r11d,%r14d                     # a|c
-
-       xor     %r13d,%r8d                      # h=Sigma0(a)
-       and     %r11d,%r15d                     # a&c
-       add     %r12d,%eax                      # d+=T1
-
-       and     %r10d,%r14d                     # (a|c)&b
-       add     %r12d,%r8d                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r8d                      # h+=Maj(a,b,c)
-       mov     4*4(%rsi),%r12d
-       bswap   %r12d
-       mov     %eax,%r13d
-       mov     %eax,%r14d
-       mov     %ebx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %ecx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %eax,%r15d                      # (f^g)&e
-       mov     %r12d,16(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %ecx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %edx,%r12d                      # T1+=h
-
-       mov     %r8d,%edx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r8d,%r13d
-       mov     %r8d,%r14d
-
-       ror     $2,%edx
-       ror     $13,%r13d
-       mov     %r8d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%edx
-       ror     $9,%r13d
-       or      %r10d,%r14d                     # a|c
-
-       xor     %r13d,%edx                      # h=Sigma0(a)
-       and     %r10d,%r15d                     # a&c
-       add     %r12d,%r11d                     # d+=T1
-
-       and     %r9d,%r14d                      # (a|c)&b
-       add     %r12d,%edx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%edx                      # h+=Maj(a,b,c)
-       mov     4*5(%rsi),%r12d
-       bswap   %r12d
-       mov     %r11d,%r13d
-       mov     %r11d,%r14d
-       mov     %eax,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %ebx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r11d,%r15d                     # (f^g)&e
-       mov     %r12d,20(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %ebx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %ecx,%r12d                      # T1+=h
-
-       mov     %edx,%ecx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %edx,%r13d
-       mov     %edx,%r14d
-
-       ror     $2,%ecx
-       ror     $13,%r13d
-       mov     %edx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%ecx
-       ror     $9,%r13d
-       or      %r9d,%r14d                      # a|c
-
-       xor     %r13d,%ecx                      # h=Sigma0(a)
-       and     %r9d,%r15d                      # a&c
-       add     %r12d,%r10d                     # d+=T1
-
-       and     %r8d,%r14d                      # (a|c)&b
-       add     %r12d,%ecx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%ecx                      # h+=Maj(a,b,c)
-       mov     4*6(%rsi),%r12d
-       bswap   %r12d
-       mov     %r10d,%r13d
-       mov     %r10d,%r14d
-       mov     %r11d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %eax,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r10d,%r15d                     # (f^g)&e
-       mov     %r12d,24(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %eax,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %ebx,%r12d                      # T1+=h
-
-       mov     %ecx,%ebx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %ecx,%r13d
-       mov     %ecx,%r14d
-
-       ror     $2,%ebx
-       ror     $13,%r13d
-       mov     %ecx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%ebx
-       ror     $9,%r13d
-       or      %r8d,%r14d                      # a|c
-
-       xor     %r13d,%ebx                      # h=Sigma0(a)
-       and     %r8d,%r15d                      # a&c
-       add     %r12d,%r9d                      # d+=T1
-
-       and     %edx,%r14d                      # (a|c)&b
-       add     %r12d,%ebx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%ebx                      # h+=Maj(a,b,c)
-       mov     4*7(%rsi),%r12d
-       bswap   %r12d
-       mov     %r9d,%r13d
-       mov     %r9d,%r14d
-       mov     %r10d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r11d,%r15d                     # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r9d,%r15d                      # (f^g)&e
-       mov     %r12d,28(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r11d,%r15d                     # Ch(e,f,g)=((f^g)&e)^g
-       add     %eax,%r12d                      # T1+=h
-
-       mov     %ebx,%eax
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %ebx,%r13d
-       mov     %ebx,%r14d
-
-       ror     $2,%eax
-       ror     $13,%r13d
-       mov     %ebx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%eax
-       ror     $9,%r13d
-       or      %edx,%r14d                      # a|c
-
-       xor     %r13d,%eax                      # h=Sigma0(a)
-       and     %edx,%r15d                      # a&c
-       add     %r12d,%r8d                      # d+=T1
-
-       and     %ecx,%r14d                      # (a|c)&b
-       add     %r12d,%eax                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%eax                      # h+=Maj(a,b,c)
-       mov     4*8(%rsi),%r12d
-       bswap   %r12d
-       mov     %r8d,%r13d
-       mov     %r8d,%r14d
-       mov     %r9d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r10d,%r15d                     # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r8d,%r15d                      # (f^g)&e
-       mov     %r12d,32(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r10d,%r15d                     # Ch(e,f,g)=((f^g)&e)^g
-       add     %r11d,%r12d                     # T1+=h
-
-       mov     %eax,%r11d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %eax,%r13d
-       mov     %eax,%r14d
-
-       ror     $2,%r11d
-       ror     $13,%r13d
-       mov     %eax,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r11d
-       ror     $9,%r13d
-       or      %ecx,%r14d                      # a|c
-
-       xor     %r13d,%r11d                     # h=Sigma0(a)
-       and     %ecx,%r15d                      # a&c
-       add     %r12d,%edx                      # d+=T1
-
-       and     %ebx,%r14d                      # (a|c)&b
-       add     %r12d,%r11d                     # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r11d                     # h+=Maj(a,b,c)
-       mov     4*9(%rsi),%r12d
-       bswap   %r12d
-       mov     %edx,%r13d
-       mov     %edx,%r14d
-       mov     %r8d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r9d,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %edx,%r15d                      # (f^g)&e
-       mov     %r12d,36(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r9d,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r10d,%r12d                     # T1+=h
-
-       mov     %r11d,%r10d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r11d,%r13d
-       mov     %r11d,%r14d
-
-       ror     $2,%r10d
-       ror     $13,%r13d
-       mov     %r11d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r10d
-       ror     $9,%r13d
-       or      %ebx,%r14d                      # a|c
-
-       xor     %r13d,%r10d                     # h=Sigma0(a)
-       and     %ebx,%r15d                      # a&c
-       add     %r12d,%ecx                      # d+=T1
-
-       and     %eax,%r14d                      # (a|c)&b
-       add     %r12d,%r10d                     # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r10d                     # h+=Maj(a,b,c)
-       mov     4*10(%rsi),%r12d
-       bswap   %r12d
-       mov     %ecx,%r13d
-       mov     %ecx,%r14d
-       mov     %edx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r8d,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %ecx,%r15d                      # (f^g)&e
-       mov     %r12d,40(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r8d,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r9d,%r12d                      # T1+=h
-
-       mov     %r10d,%r9d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r10d,%r13d
-       mov     %r10d,%r14d
-
-       ror     $2,%r9d
-       ror     $13,%r13d
-       mov     %r10d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r9d
-       ror     $9,%r13d
-       or      %eax,%r14d                      # a|c
-
-       xor     %r13d,%r9d                      # h=Sigma0(a)
-       and     %eax,%r15d                      # a&c
-       add     %r12d,%ebx                      # d+=T1
-
-       and     %r11d,%r14d                     # (a|c)&b
-       add     %r12d,%r9d                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r9d                      # h+=Maj(a,b,c)
-       mov     4*11(%rsi),%r12d
-       bswap   %r12d
-       mov     %ebx,%r13d
-       mov     %ebx,%r14d
-       mov     %ecx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %edx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %ebx,%r15d                      # (f^g)&e
-       mov     %r12d,44(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %edx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r8d,%r12d                      # T1+=h
-
-       mov     %r9d,%r8d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r9d,%r13d
-       mov     %r9d,%r14d
-
-       ror     $2,%r8d
-       ror     $13,%r13d
-       mov     %r9d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r8d
-       ror     $9,%r13d
-       or      %r11d,%r14d                     # a|c
-
-       xor     %r13d,%r8d                      # h=Sigma0(a)
-       and     %r11d,%r15d                     # a&c
-       add     %r12d,%eax                      # d+=T1
-
-       and     %r10d,%r14d                     # (a|c)&b
-       add     %r12d,%r8d                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r8d                      # h+=Maj(a,b,c)
-       mov     4*12(%rsi),%r12d
-       bswap   %r12d
-       mov     %eax,%r13d
-       mov     %eax,%r14d
-       mov     %ebx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %ecx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %eax,%r15d                      # (f^g)&e
-       mov     %r12d,48(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %ecx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %edx,%r12d                      # T1+=h
-
-       mov     %r8d,%edx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r8d,%r13d
-       mov     %r8d,%r14d
-
-       ror     $2,%edx
-       ror     $13,%r13d
-       mov     %r8d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%edx
-       ror     $9,%r13d
-       or      %r10d,%r14d                     # a|c
-
-       xor     %r13d,%edx                      # h=Sigma0(a)
-       and     %r10d,%r15d                     # a&c
-       add     %r12d,%r11d                     # d+=T1
-
-       and     %r9d,%r14d                      # (a|c)&b
-       add     %r12d,%edx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%edx                      # h+=Maj(a,b,c)
-       mov     4*13(%rsi),%r12d
-       bswap   %r12d
-       mov     %r11d,%r13d
-       mov     %r11d,%r14d
-       mov     %eax,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %ebx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r11d,%r15d                     # (f^g)&e
-       mov     %r12d,52(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %ebx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %ecx,%r12d                      # T1+=h
-
-       mov     %edx,%ecx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %edx,%r13d
-       mov     %edx,%r14d
-
-       ror     $2,%ecx
-       ror     $13,%r13d
-       mov     %edx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%ecx
-       ror     $9,%r13d
-       or      %r9d,%r14d                      # a|c
-
-       xor     %r13d,%ecx                      # h=Sigma0(a)
-       and     %r9d,%r15d                      # a&c
-       add     %r12d,%r10d                     # d+=T1
-
-       and     %r8d,%r14d                      # (a|c)&b
-       add     %r12d,%ecx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%ecx                      # h+=Maj(a,b,c)
-       mov     4*14(%rsi),%r12d
-       bswap   %r12d
-       mov     %r10d,%r13d
-       mov     %r10d,%r14d
-       mov     %r11d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %eax,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r10d,%r15d                     # (f^g)&e
-       mov     %r12d,56(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %eax,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %ebx,%r12d                      # T1+=h
-
-       mov     %ecx,%ebx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %ecx,%r13d
-       mov     %ecx,%r14d
-
-       ror     $2,%ebx
-       ror     $13,%r13d
-       mov     %ecx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%ebx
-       ror     $9,%r13d
-       or      %r8d,%r14d                      # a|c
-
-       xor     %r13d,%ebx                      # h=Sigma0(a)
-       and     %r8d,%r15d                      # a&c
-       add     %r12d,%r9d                      # d+=T1
-
-       and     %edx,%r14d                      # (a|c)&b
-       add     %r12d,%ebx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%ebx                      # h+=Maj(a,b,c)
-       mov     4*15(%rsi),%r12d
-       bswap   %r12d
-       mov     %r9d,%r13d
-       mov     %r9d,%r14d
-       mov     %r10d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r11d,%r15d                     # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r9d,%r15d                      # (f^g)&e
-       mov     %r12d,60(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r11d,%r15d                     # Ch(e,f,g)=((f^g)&e)^g
-       add     %eax,%r12d                      # T1+=h
-
-       mov     %ebx,%eax
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %ebx,%r13d
-       mov     %ebx,%r14d
-
-       ror     $2,%eax
-       ror     $13,%r13d
-       mov     %ebx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%eax
-       ror     $9,%r13d
-       or      %edx,%r14d                      # a|c
-
-       xor     %r13d,%eax                      # h=Sigma0(a)
-       and     %edx,%r15d                      # a&c
-       add     %r12d,%r8d                      # d+=T1
-
-       and     %ecx,%r14d                      # (a|c)&b
-       add     %r12d,%eax                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%eax                      # h+=Maj(a,b,c)
-       jmp     .Lrounds_16_xx
-.balign        16
-.Lrounds_16_xx:
-       mov     4(%rsp),%r13d
-       mov     56(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     36(%rsp),%r12d
-
-       add     0(%rsp),%r12d
-       mov     %r8d,%r13d
-       mov     %r8d,%r14d
-       mov     %r9d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r10d,%r15d                     # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r8d,%r15d                      # (f^g)&e
-       mov     %r12d,0(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r10d,%r15d                     # Ch(e,f,g)=((f^g)&e)^g
-       add     %r11d,%r12d                     # T1+=h
-
-       mov     %eax,%r11d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %eax,%r13d
-       mov     %eax,%r14d
-
-       ror     $2,%r11d
-       ror     $13,%r13d
-       mov     %eax,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r11d
-       ror     $9,%r13d
-       or      %ecx,%r14d                      # a|c
-
-       xor     %r13d,%r11d                     # h=Sigma0(a)
-       and     %ecx,%r15d                      # a&c
-       add     %r12d,%edx                      # d+=T1
-
-       and     %ebx,%r14d                      # (a|c)&b
-       add     %r12d,%r11d                     # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r11d                     # h+=Maj(a,b,c)
-       mov     8(%rsp),%r13d
-       mov     60(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     40(%rsp),%r12d
-
-       add     4(%rsp),%r12d
-       mov     %edx,%r13d
-       mov     %edx,%r14d
-       mov     %r8d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r9d,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %edx,%r15d                      # (f^g)&e
-       mov     %r12d,4(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r9d,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r10d,%r12d                     # T1+=h
-
-       mov     %r11d,%r10d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r11d,%r13d
-       mov     %r11d,%r14d
-
-       ror     $2,%r10d
-       ror     $13,%r13d
-       mov     %r11d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r10d
-       ror     $9,%r13d
-       or      %ebx,%r14d                      # a|c
-
-       xor     %r13d,%r10d                     # h=Sigma0(a)
-       and     %ebx,%r15d                      # a&c
-       add     %r12d,%ecx                      # d+=T1
-
-       and     %eax,%r14d                      # (a|c)&b
-       add     %r12d,%r10d                     # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r10d                     # h+=Maj(a,b,c)
-       mov     12(%rsp),%r13d
-       mov     0(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     44(%rsp),%r12d
-
-       add     8(%rsp),%r12d
-       mov     %ecx,%r13d
-       mov     %ecx,%r14d
-       mov     %edx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r8d,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %ecx,%r15d                      # (f^g)&e
-       mov     %r12d,8(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r8d,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r9d,%r12d                      # T1+=h
-
-       mov     %r10d,%r9d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r10d,%r13d
-       mov     %r10d,%r14d
-
-       ror     $2,%r9d
-       ror     $13,%r13d
-       mov     %r10d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r9d
-       ror     $9,%r13d
-       or      %eax,%r14d                      # a|c
-
-       xor     %r13d,%r9d                      # h=Sigma0(a)
-       and     %eax,%r15d                      # a&c
-       add     %r12d,%ebx                      # d+=T1
-
-       and     %r11d,%r14d                     # (a|c)&b
-       add     %r12d,%r9d                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r9d                      # h+=Maj(a,b,c)
-       mov     16(%rsp),%r13d
-       mov     4(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     48(%rsp),%r12d
-
-       add     12(%rsp),%r12d
-       mov     %ebx,%r13d
-       mov     %ebx,%r14d
-       mov     %ecx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %edx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %ebx,%r15d                      # (f^g)&e
-       mov     %r12d,12(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %edx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r8d,%r12d                      # T1+=h
-
-       mov     %r9d,%r8d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r9d,%r13d
-       mov     %r9d,%r14d
-
-       ror     $2,%r8d
-       ror     $13,%r13d
-       mov     %r9d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r8d
-       ror     $9,%r13d
-       or      %r11d,%r14d                     # a|c
-
-       xor     %r13d,%r8d                      # h=Sigma0(a)
-       and     %r11d,%r15d                     # a&c
-       add     %r12d,%eax                      # d+=T1
-
-       and     %r10d,%r14d                     # (a|c)&b
-       add     %r12d,%r8d                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r8d                      # h+=Maj(a,b,c)
-       mov     20(%rsp),%r13d
-       mov     8(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     52(%rsp),%r12d
-
-       add     16(%rsp),%r12d
-       mov     %eax,%r13d
-       mov     %eax,%r14d
-       mov     %ebx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %ecx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %eax,%r15d                      # (f^g)&e
-       mov     %r12d,16(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %ecx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %edx,%r12d                      # T1+=h
-
-       mov     %r8d,%edx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r8d,%r13d
-       mov     %r8d,%r14d
-
-       ror     $2,%edx
-       ror     $13,%r13d
-       mov     %r8d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%edx
-       ror     $9,%r13d
-       or      %r10d,%r14d                     # a|c
-
-       xor     %r13d,%edx                      # h=Sigma0(a)
-       and     %r10d,%r15d                     # a&c
-       add     %r12d,%r11d                     # d+=T1
-
-       and     %r9d,%r14d                      # (a|c)&b
-       add     %r12d,%edx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%edx                      # h+=Maj(a,b,c)
-       mov     24(%rsp),%r13d
-       mov     12(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     56(%rsp),%r12d
-
-       add     20(%rsp),%r12d
-       mov     %r11d,%r13d
-       mov     %r11d,%r14d
-       mov     %eax,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %ebx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r11d,%r15d                     # (f^g)&e
-       mov     %r12d,20(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %ebx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %ecx,%r12d                      # T1+=h
-
-       mov     %edx,%ecx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %edx,%r13d
-       mov     %edx,%r14d
-
-       ror     $2,%ecx
-       ror     $13,%r13d
-       mov     %edx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%ecx
-       ror     $9,%r13d
-       or      %r9d,%r14d                      # a|c
-
-       xor     %r13d,%ecx                      # h=Sigma0(a)
-       and     %r9d,%r15d                      # a&c
-       add     %r12d,%r10d                     # d+=T1
-
-       and     %r8d,%r14d                      # (a|c)&b
-       add     %r12d,%ecx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%ecx                      # h+=Maj(a,b,c)
-       mov     28(%rsp),%r13d
-       mov     16(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     60(%rsp),%r12d
-
-       add     24(%rsp),%r12d
-       mov     %r10d,%r13d
-       mov     %r10d,%r14d
-       mov     %r11d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %eax,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r10d,%r15d                     # (f^g)&e
-       mov     %r12d,24(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %eax,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %ebx,%r12d                      # T1+=h
-
-       mov     %ecx,%ebx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %ecx,%r13d
-       mov     %ecx,%r14d
-
-       ror     $2,%ebx
-       ror     $13,%r13d
-       mov     %ecx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%ebx
-       ror     $9,%r13d
-       or      %r8d,%r14d                      # a|c
-
-       xor     %r13d,%ebx                      # h=Sigma0(a)
-       and     %r8d,%r15d                      # a&c
-       add     %r12d,%r9d                      # d+=T1
-
-       and     %edx,%r14d                      # (a|c)&b
-       add     %r12d,%ebx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%ebx                      # h+=Maj(a,b,c)
-       mov     32(%rsp),%r13d
-       mov     20(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     0(%rsp),%r12d
-
-       add     28(%rsp),%r12d
-       mov     %r9d,%r13d
-       mov     %r9d,%r14d
-       mov     %r10d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r11d,%r15d                     # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r9d,%r15d                      # (f^g)&e
-       mov     %r12d,28(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r11d,%r15d                     # Ch(e,f,g)=((f^g)&e)^g
-       add     %eax,%r12d                      # T1+=h
-
-       mov     %ebx,%eax
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %ebx,%r13d
-       mov     %ebx,%r14d
-
-       ror     $2,%eax
-       ror     $13,%r13d
-       mov     %ebx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%eax
-       ror     $9,%r13d
-       or      %edx,%r14d                      # a|c
-
-       xor     %r13d,%eax                      # h=Sigma0(a)
-       and     %edx,%r15d                      # a&c
-       add     %r12d,%r8d                      # d+=T1
-
-       and     %ecx,%r14d                      # (a|c)&b
-       add     %r12d,%eax                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%eax                      # h+=Maj(a,b,c)
-       mov     36(%rsp),%r13d
-       mov     24(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     4(%rsp),%r12d
-
-       add     32(%rsp),%r12d
-       mov     %r8d,%r13d
-       mov     %r8d,%r14d
-       mov     %r9d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r10d,%r15d                     # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r8d,%r15d                      # (f^g)&e
-       mov     %r12d,32(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r10d,%r15d                     # Ch(e,f,g)=((f^g)&e)^g
-       add     %r11d,%r12d                     # T1+=h
-
-       mov     %eax,%r11d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %eax,%r13d
-       mov     %eax,%r14d
-
-       ror     $2,%r11d
-       ror     $13,%r13d
-       mov     %eax,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r11d
-       ror     $9,%r13d
-       or      %ecx,%r14d                      # a|c
-
-       xor     %r13d,%r11d                     # h=Sigma0(a)
-       and     %ecx,%r15d                      # a&c
-       add     %r12d,%edx                      # d+=T1
-
-       and     %ebx,%r14d                      # (a|c)&b
-       add     %r12d,%r11d                     # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r11d                     # h+=Maj(a,b,c)
-       mov     40(%rsp),%r13d
-       mov     28(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     8(%rsp),%r12d
-
-       add     36(%rsp),%r12d
-       mov     %edx,%r13d
-       mov     %edx,%r14d
-       mov     %r8d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r9d,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %edx,%r15d                      # (f^g)&e
-       mov     %r12d,36(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r9d,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r10d,%r12d                     # T1+=h
-
-       mov     %r11d,%r10d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r11d,%r13d
-       mov     %r11d,%r14d
-
-       ror     $2,%r10d
-       ror     $13,%r13d
-       mov     %r11d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r10d
-       ror     $9,%r13d
-       or      %ebx,%r14d                      # a|c
-
-       xor     %r13d,%r10d                     # h=Sigma0(a)
-       and     %ebx,%r15d                      # a&c
-       add     %r12d,%ecx                      # d+=T1
-
-       and     %eax,%r14d                      # (a|c)&b
-       add     %r12d,%r10d                     # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r10d                     # h+=Maj(a,b,c)
-       mov     44(%rsp),%r13d
-       mov     32(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     12(%rsp),%r12d
-
-       add     40(%rsp),%r12d
-       mov     %ecx,%r13d
-       mov     %ecx,%r14d
-       mov     %edx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r8d,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %ecx,%r15d                      # (f^g)&e
-       mov     %r12d,40(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r8d,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r9d,%r12d                      # T1+=h
-
-       mov     %r10d,%r9d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r10d,%r13d
-       mov     %r10d,%r14d
-
-       ror     $2,%r9d
-       ror     $13,%r13d
-       mov     %r10d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r9d
-       ror     $9,%r13d
-       or      %eax,%r14d                      # a|c
-
-       xor     %r13d,%r9d                      # h=Sigma0(a)
-       and     %eax,%r15d                      # a&c
-       add     %r12d,%ebx                      # d+=T1
-
-       and     %r11d,%r14d                     # (a|c)&b
-       add     %r12d,%r9d                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r9d                      # h+=Maj(a,b,c)
-       mov     48(%rsp),%r13d
-       mov     36(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     16(%rsp),%r12d
-
-       add     44(%rsp),%r12d
-       mov     %ebx,%r13d
-       mov     %ebx,%r14d
-       mov     %ecx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %edx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %ebx,%r15d                      # (f^g)&e
-       mov     %r12d,44(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %edx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %r8d,%r12d                      # T1+=h
-
-       mov     %r9d,%r8d
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r9d,%r13d
-       mov     %r9d,%r14d
-
-       ror     $2,%r8d
-       ror     $13,%r13d
-       mov     %r9d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%r8d
-       ror     $9,%r13d
-       or      %r11d,%r14d                     # a|c
-
-       xor     %r13d,%r8d                      # h=Sigma0(a)
-       and     %r11d,%r15d                     # a&c
-       add     %r12d,%eax                      # d+=T1
-
-       and     %r10d,%r14d                     # (a|c)&b
-       add     %r12d,%r8d                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%r8d                      # h+=Maj(a,b,c)
-       mov     52(%rsp),%r13d
-       mov     40(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     20(%rsp),%r12d
-
-       add     48(%rsp),%r12d
-       mov     %eax,%r13d
-       mov     %eax,%r14d
-       mov     %ebx,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %ecx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %eax,%r15d                      # (f^g)&e
-       mov     %r12d,48(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %ecx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %edx,%r12d                      # T1+=h
-
-       mov     %r8d,%edx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %r8d,%r13d
-       mov     %r8d,%r14d
-
-       ror     $2,%edx
-       ror     $13,%r13d
-       mov     %r8d,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%edx
-       ror     $9,%r13d
-       or      %r10d,%r14d                     # a|c
-
-       xor     %r13d,%edx                      # h=Sigma0(a)
-       and     %r10d,%r15d                     # a&c
-       add     %r12d,%r11d                     # d+=T1
-
-       and     %r9d,%r14d                      # (a|c)&b
-       add     %r12d,%edx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%edx                      # h+=Maj(a,b,c)
-       mov     56(%rsp),%r13d
-       mov     44(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     24(%rsp),%r12d
-
-       add     52(%rsp),%r12d
-       mov     %r11d,%r13d
-       mov     %r11d,%r14d
-       mov     %eax,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %ebx,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r11d,%r15d                     # (f^g)&e
-       mov     %r12d,52(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %ebx,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %ecx,%r12d                      # T1+=h
-
-       mov     %edx,%ecx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %edx,%r13d
-       mov     %edx,%r14d
-
-       ror     $2,%ecx
-       ror     $13,%r13d
-       mov     %edx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%ecx
-       ror     $9,%r13d
-       or      %r9d,%r14d                      # a|c
-
-       xor     %r13d,%ecx                      # h=Sigma0(a)
-       and     %r9d,%r15d                      # a&c
-       add     %r12d,%r10d                     # d+=T1
-
-       and     %r8d,%r14d                      # (a|c)&b
-       add     %r12d,%ecx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%ecx                      # h+=Maj(a,b,c)
-       mov     60(%rsp),%r13d
-       mov     48(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     28(%rsp),%r12d
-
-       add     56(%rsp),%r12d
-       mov     %r10d,%r13d
-       mov     %r10d,%r14d
-       mov     %r11d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %eax,%r15d                      # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r10d,%r15d                     # (f^g)&e
-       mov     %r12d,56(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %eax,%r15d                      # Ch(e,f,g)=((f^g)&e)^g
-       add     %ebx,%r12d                      # T1+=h
-
-       mov     %ecx,%ebx
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %ecx,%r13d
-       mov     %ecx,%r14d
-
-       ror     $2,%ebx
-       ror     $13,%r13d
-       mov     %ecx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%ebx
-       ror     $9,%r13d
-       or      %r8d,%r14d                      # a|c
-
-       xor     %r13d,%ebx                      # h=Sigma0(a)
-       and     %r8d,%r15d                      # a&c
-       add     %r12d,%r9d                      # d+=T1
-
-       and     %edx,%r14d                      # (a|c)&b
-       add     %r12d,%ebx                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%ebx                      # h+=Maj(a,b,c)
-       mov     0(%rsp),%r13d
-       mov     52(%rsp),%r12d
-
-       mov     %r13d,%r15d
-
-       shr     $3,%r13d
-       ror     $7,%r15d
-
-       xor     %r15d,%r13d
-       ror     $11,%r15d
-
-       xor     %r15d,%r13d                     # sigma0(X[(i+1)&0xf])
-       mov     %r12d,%r14d
-
-       shr     $10,%r12d
-       ror     $17,%r14d
-
-       xor     %r14d,%r12d
-       ror     $2,%r14d
-
-       xor     %r14d,%r12d                     # sigma1(X[(i+14)&0xf])
-
-       add     %r13d,%r12d
-
-       add     32(%rsp),%r12d
-
-       add     60(%rsp),%r12d
-       mov     %r9d,%r13d
-       mov     %r9d,%r14d
-       mov     %r10d,%r15d
-
-       ror     $6,%r13d
-       ror     $11,%r14d
-       xor     %r11d,%r15d                     # f^g
-
-       xor     %r14d,%r13d
-       ror     $14,%r14d
-       and     %r9d,%r15d                      # (f^g)&e
-       mov     %r12d,60(%rsp)
-
-       xor     %r14d,%r13d                     # Sigma1(e)
-       xor     %r11d,%r15d                     # Ch(e,f,g)=((f^g)&e)^g
-       add     %eax,%r12d                      # T1+=h
-
-       mov     %ebx,%eax
-       add     %r13d,%r12d                     # T1+=Sigma1(e)
-
-       add     %r15d,%r12d                     # T1+=Ch(e,f,g)
-       mov     %ebx,%r13d
-       mov     %ebx,%r14d
-
-       ror     $2,%eax
-       ror     $13,%r13d
-       mov     %ebx,%r15d
-       add     (%rbp,%rdi,4),%r12d     # T1+=K[round]
-
-       xor     %r13d,%eax
-       ror     $9,%r13d
-       or      %edx,%r14d                      # a|c
-
-       xor     %r13d,%eax                      # h=Sigma0(a)
-       and     %edx,%r15d                      # a&c
-       add     %r12d,%r8d                      # d+=T1
-
-       and     %ecx,%r14d                      # (a|c)&b
-       add     %r12d,%eax                      # h+=T1
-
-       or      %r15d,%r14d                     # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14d,%eax                      # h+=Maj(a,b,c)
-       cmp     $64,%rdi
-       jb      .Lrounds_16_xx
-
-       mov     16*4+0*8(%rsp),%rdi
-       lea     16*4(%rsi),%rsi
-
-       add     4*0(%rdi),%eax
-       add     4*1(%rdi),%ebx
-       add     4*2(%rdi),%ecx
-       add     4*3(%rdi),%edx
-       add     4*4(%rdi),%r8d
-       add     4*5(%rdi),%r9d
-       add     4*6(%rdi),%r10d
-       add     4*7(%rdi),%r11d
-
-       cmp     16*4+2*8(%rsp),%rsi
-
-       mov     %eax,4*0(%rdi)
-       mov     %ebx,4*1(%rdi)
-       mov     %ecx,4*2(%rdi)
-       mov     %edx,4*3(%rdi)
-       mov     %r8d,4*4(%rdi)
-       mov     %r9d,4*5(%rdi)
-       mov     %r10d,4*6(%rdi)
-       mov     %r11d,4*7(%rdi)
-       jb      .Lloop
-
-       mov     16*4+3*8(%rsp),%rsp
-.cfi_def_cfa   %rsp,56
-       pop     %r15
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %r15
-       pop     %r14
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %r14
-       pop     %r13
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %r13
-       pop     %r12
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %r12
-       pop     %rbp
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %rbp
-       pop     %rbx
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %rbx
-
-       RET
-.cfi_endproc
-SET_SIZE(SHA256TransformBlocks)
-
-SECTION_STATIC
-.balign        64
-SET_OBJ(K256)
-K256:
-       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-#endif /* !lint && !__lint */
-
-#ifdef __ELF__
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/module/icp/asm-x86_64/sha2/sha512_impl.S b/module/icp/asm-x86_64/sha2/sha512_impl.S
deleted file mode 100644 (file)
index b2f7d48..0000000
+++ /dev/null
@@ -1,2115 +0,0 @@
-/*
- * ====================================================================
- * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
- * project. Rights for redistribution and usage in source and binary
- * forms are granted according to the OpenSSL license.
- * ====================================================================
- *
- * sha256/512_block procedure for x86_64.
- *
- * 40% improvement over compiler-generated code on Opteron. On EM64T
- * sha256 was observed to run >80% faster and sha512 - >40%. No magical
- * tricks, just straight implementation... I really wonder why gcc
- * [being armed with inline assembler] fails to generate as fast code.
- * The only thing which is cool about this module is that it's very
- * same instruction sequence used for both SHA-256 and SHA-512. In
- * former case the instructions operate on 32-bit operands, while in
- * latter - on 64-bit ones. All I had to do is to get one flavor right,
- * the other one passed the test right away:-)
- *
- * sha256_block runs in ~1005 cycles on Opteron, which gives you
- * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
- * frequency in GHz. sha512_block runs in ~1275 cycles, which results
- * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
- * Well, if you compare it to IA-64 implementation, which maintains
- * X[16] in register bank[!], tends to 4 instructions per CPU clock
- * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
- * issue Opteron pipeline and X[16] maintained in memory. So that *if*
- * there is a way to improve it, *then* the only way would be to try to
- * offload X[16] updates to SSE unit, but that would require "deeper"
- * loop unroll, which in turn would naturally cause size blow-up, not
- * to mention increased complexity! And once again, only *if* it's
- * actually possible to noticeably improve overall ILP, instruction
- * level parallelism, on a given CPU implementation in this case.
- *
- * Special note on Intel EM64T. While Opteron CPU exhibits perfect
- * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
- * [currently available] EM64T CPUs apparently are far from it. On the
- * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
- * sha256_block:-( This is presumably because 64-bit shifts/rotates
- * apparently are not atomic instructions, but implemented in microcode.
- */
-
-/*
- * OpenSolaris OS modifications
- *
- * Sun elects to use this software under the BSD license.
- *
- * This source originates from OpenSSL file sha512-x86_64.pl at
- * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
- * (presumably for future OpenSSL release 0.9.8h), with these changes:
- *
- * 1. Added perl "use strict" and declared variables.
- *
- * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
- * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
- *
- * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
- * assemblers).  Replaced the .picmeup macro with assembler code.
- *
- * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
- * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
- */
-
-/*
- * This file was generated by a perl script (sha512-x86_64.pl) that were
- * used to generate sha256 and sha512 variants from the same code base.
- * The comments from the original file have been pasted above.
- */
-
-
-#if defined(lint) || defined(__lint)
-#include <sys/stdint.h>
-#include <sha2/sha2.h>
-
-void
-SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
-{
-       (void) ctx, (void) in, (void) num;
-}
-
-
-#else
-#define _ASM
-#include <sys/asm_linkage.h>
-
-ENTRY_NP(SHA512TransformBlocks)
-.cfi_startproc
-       ENDBR
-       movq    %rsp, %rax
-.cfi_def_cfa_register %rax
-       push    %rbx
-.cfi_offset    %rbx,-16
-       push    %rbp
-.cfi_offset    %rbp,-24
-       push    %r12
-.cfi_offset    %r12,-32
-       push    %r13
-.cfi_offset    %r13,-40
-       push    %r14
-.cfi_offset    %r14,-48
-       push    %r15
-.cfi_offset    %r15,-56
-       mov     %rsp,%rbp               # copy %rsp
-       shl     $4,%rdx         # num*16
-       sub     $16*8+4*8,%rsp
-       lea     (%rsi,%rdx,8),%rdx      # inp+num*16*8
-       and     $-64,%rsp               # align stack frame
-       add     $8,%rdi         # Skip OpenSolaris field, "algotype"
-       mov     %rdi,16*8+0*8(%rsp)             # save ctx, 1st arg
-       mov     %rsi,16*8+1*8(%rsp)             # save inp, 2nd arg
-       mov     %rdx,16*8+2*8(%rsp)             # save end pointer, "3rd" arg
-       mov     %rbp,16*8+3*8(%rsp)             # save copy of %rsp
-# echo ".cfi_cfa_expression %rsp+152,deref,+56" |
-#      openssl/crypto/perlasm/x86_64-xlate.pl
-.cfi_escape    0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x38
-
-       #.picmeup %rbp
-       # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
-       # the address of the "next" instruction into the target register
-       # (%rbp).  This generates these 2 instructions:
-       lea     .Llea(%rip),%rbp
-       #nop    # .picmeup generates a nop for mod 8 alignment--not needed here
-
-.Llea:
-       lea     K512-.(%rbp),%rbp
-
-       mov     8*0(%rdi),%rax
-       mov     8*1(%rdi),%rbx
-       mov     8*2(%rdi),%rcx
-       mov     8*3(%rdi),%rdx
-       mov     8*4(%rdi),%r8
-       mov     8*5(%rdi),%r9
-       mov     8*6(%rdi),%r10
-       mov     8*7(%rdi),%r11
-       jmp     .Lloop
-
-.balign        16
-.Lloop:
-       xor     %rdi,%rdi
-       mov     8*0(%rsi),%r12
-       bswap   %r12
-       mov     %r8,%r13
-       mov     %r8,%r14
-       mov     %r9,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r10,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r8,%r15                        # (f^g)&e
-       mov     %r12,0(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r10,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %r11,%r12                       # T1+=h
-
-       mov     %rax,%r11
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rax,%r13
-       mov     %rax,%r14
-
-       ror     $28,%r11
-       ror     $34,%r13
-       mov     %rax,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r11
-       ror     $5,%r13
-       or      %rcx,%r14                       # a|c
-
-       xor     %r13,%r11                       # h=Sigma0(a)
-       and     %rcx,%r15                       # a&c
-       add     %r12,%rdx                       # d+=T1
-
-       and     %rbx,%r14                       # (a|c)&b
-       add     %r12,%r11                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r11                       # h+=Maj(a,b,c)
-       mov     8*1(%rsi),%r12
-       bswap   %r12
-       mov     %rdx,%r13
-       mov     %rdx,%r14
-       mov     %r8,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r9,%r15                        # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rdx,%r15                       # (f^g)&e
-       mov     %r12,8(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r9,%r15                        # Ch(e,f,g)=((f^g)&e)^g
-       add     %r10,%r12                       # T1+=h
-
-       mov     %r11,%r10
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r11,%r13
-       mov     %r11,%r14
-
-       ror     $28,%r10
-       ror     $34,%r13
-       mov     %r11,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r10
-       ror     $5,%r13
-       or      %rbx,%r14                       # a|c
-
-       xor     %r13,%r10                       # h=Sigma0(a)
-       and     %rbx,%r15                       # a&c
-       add     %r12,%rcx                       # d+=T1
-
-       and     %rax,%r14                       # (a|c)&b
-       add     %r12,%r10                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r10                       # h+=Maj(a,b,c)
-       mov     8*2(%rsi),%r12
-       bswap   %r12
-       mov     %rcx,%r13
-       mov     %rcx,%r14
-       mov     %rdx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r8,%r15                        # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rcx,%r15                       # (f^g)&e
-       mov     %r12,16(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r8,%r15                        # Ch(e,f,g)=((f^g)&e)^g
-       add     %r9,%r12                        # T1+=h
-
-       mov     %r10,%r9
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r10,%r13
-       mov     %r10,%r14
-
-       ror     $28,%r9
-       ror     $34,%r13
-       mov     %r10,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r9
-       ror     $5,%r13
-       or      %rax,%r14                       # a|c
-
-       xor     %r13,%r9                        # h=Sigma0(a)
-       and     %rax,%r15                       # a&c
-       add     %r12,%rbx                       # d+=T1
-
-       and     %r11,%r14                       # (a|c)&b
-       add     %r12,%r9                        # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r9                        # h+=Maj(a,b,c)
-       mov     8*3(%rsi),%r12
-       bswap   %r12
-       mov     %rbx,%r13
-       mov     %rbx,%r14
-       mov     %rcx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rdx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rbx,%r15                       # (f^g)&e
-       mov     %r12,24(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rdx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %r8,%r12                        # T1+=h
-
-       mov     %r9,%r8
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r9,%r13
-       mov     %r9,%r14
-
-       ror     $28,%r8
-       ror     $34,%r13
-       mov     %r9,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r8
-       ror     $5,%r13
-       or      %r11,%r14                       # a|c
-
-       xor     %r13,%r8                        # h=Sigma0(a)
-       and     %r11,%r15                       # a&c
-       add     %r12,%rax                       # d+=T1
-
-       and     %r10,%r14                       # (a|c)&b
-       add     %r12,%r8                        # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r8                        # h+=Maj(a,b,c)
-       mov     8*4(%rsi),%r12
-       bswap   %r12
-       mov     %rax,%r13
-       mov     %rax,%r14
-       mov     %rbx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rcx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rax,%r15                       # (f^g)&e
-       mov     %r12,32(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rcx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rdx,%r12                       # T1+=h
-
-       mov     %r8,%rdx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r8,%r13
-       mov     %r8,%r14
-
-       ror     $28,%rdx
-       ror     $34,%r13
-       mov     %r8,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rdx
-       ror     $5,%r13
-       or      %r10,%r14                       # a|c
-
-       xor     %r13,%rdx                       # h=Sigma0(a)
-       and     %r10,%r15                       # a&c
-       add     %r12,%r11                       # d+=T1
-
-       and     %r9,%r14                        # (a|c)&b
-       add     %r12,%rdx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rdx                       # h+=Maj(a,b,c)
-       mov     8*5(%rsi),%r12
-       bswap   %r12
-       mov     %r11,%r13
-       mov     %r11,%r14
-       mov     %rax,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rbx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r11,%r15                       # (f^g)&e
-       mov     %r12,40(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rbx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rcx,%r12                       # T1+=h
-
-       mov     %rdx,%rcx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rdx,%r13
-       mov     %rdx,%r14
-
-       ror     $28,%rcx
-       ror     $34,%r13
-       mov     %rdx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rcx
-       ror     $5,%r13
-       or      %r9,%r14                        # a|c
-
-       xor     %r13,%rcx                       # h=Sigma0(a)
-       and     %r9,%r15                        # a&c
-       add     %r12,%r10                       # d+=T1
-
-       and     %r8,%r14                        # (a|c)&b
-       add     %r12,%rcx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rcx                       # h+=Maj(a,b,c)
-       mov     8*6(%rsi),%r12
-       bswap   %r12
-       mov     %r10,%r13
-       mov     %r10,%r14
-       mov     %r11,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rax,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r10,%r15                       # (f^g)&e
-       mov     %r12,48(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rax,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rbx,%r12                       # T1+=h
-
-       mov     %rcx,%rbx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rcx,%r13
-       mov     %rcx,%r14
-
-       ror     $28,%rbx
-       ror     $34,%r13
-       mov     %rcx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rbx
-       ror     $5,%r13
-       or      %r8,%r14                        # a|c
-
-       xor     %r13,%rbx                       # h=Sigma0(a)
-       and     %r8,%r15                        # a&c
-       add     %r12,%r9                        # d+=T1
-
-       and     %rdx,%r14                       # (a|c)&b
-       add     %r12,%rbx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rbx                       # h+=Maj(a,b,c)
-       mov     8*7(%rsi),%r12
-       bswap   %r12
-       mov     %r9,%r13
-       mov     %r9,%r14
-       mov     %r10,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r11,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r9,%r15                        # (f^g)&e
-       mov     %r12,56(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r11,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rax,%r12                       # T1+=h
-
-       mov     %rbx,%rax
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rbx,%r13
-       mov     %rbx,%r14
-
-       ror     $28,%rax
-       ror     $34,%r13
-       mov     %rbx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rax
-       ror     $5,%r13
-       or      %rdx,%r14                       # a|c
-
-       xor     %r13,%rax                       # h=Sigma0(a)
-       and     %rdx,%r15                       # a&c
-       add     %r12,%r8                        # d+=T1
-
-       and     %rcx,%r14                       # (a|c)&b
-       add     %r12,%rax                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rax                       # h+=Maj(a,b,c)
-       mov     8*8(%rsi),%r12
-       bswap   %r12
-       mov     %r8,%r13
-       mov     %r8,%r14
-       mov     %r9,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r10,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r8,%r15                        # (f^g)&e
-       mov     %r12,64(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r10,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %r11,%r12                       # T1+=h
-
-       mov     %rax,%r11
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rax,%r13
-       mov     %rax,%r14
-
-       ror     $28,%r11
-       ror     $34,%r13
-       mov     %rax,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r11
-       ror     $5,%r13
-       or      %rcx,%r14                       # a|c
-
-       xor     %r13,%r11                       # h=Sigma0(a)
-       and     %rcx,%r15                       # a&c
-       add     %r12,%rdx                       # d+=T1
-
-       and     %rbx,%r14                       # (a|c)&b
-       add     %r12,%r11                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r11                       # h+=Maj(a,b,c)
-       mov     8*9(%rsi),%r12
-       bswap   %r12
-       mov     %rdx,%r13
-       mov     %rdx,%r14
-       mov     %r8,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r9,%r15                        # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rdx,%r15                       # (f^g)&e
-       mov     %r12,72(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r9,%r15                        # Ch(e,f,g)=((f^g)&e)^g
-       add     %r10,%r12                       # T1+=h
-
-       mov     %r11,%r10
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r11,%r13
-       mov     %r11,%r14
-
-       ror     $28,%r10
-       ror     $34,%r13
-       mov     %r11,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r10
-       ror     $5,%r13
-       or      %rbx,%r14                       # a|c
-
-       xor     %r13,%r10                       # h=Sigma0(a)
-       and     %rbx,%r15                       # a&c
-       add     %r12,%rcx                       # d+=T1
-
-       and     %rax,%r14                       # (a|c)&b
-       add     %r12,%r10                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r10                       # h+=Maj(a,b,c)
-       mov     8*10(%rsi),%r12
-       bswap   %r12
-       mov     %rcx,%r13
-       mov     %rcx,%r14
-       mov     %rdx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r8,%r15                        # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rcx,%r15                       # (f^g)&e
-       mov     %r12,80(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r8,%r15                        # Ch(e,f,g)=((f^g)&e)^g
-       add     %r9,%r12                        # T1+=h
-
-       mov     %r10,%r9
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r10,%r13
-       mov     %r10,%r14
-
-       ror     $28,%r9
-       ror     $34,%r13
-       mov     %r10,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r9
-       ror     $5,%r13
-       or      %rax,%r14                       # a|c
-
-       xor     %r13,%r9                        # h=Sigma0(a)
-       and     %rax,%r15                       # a&c
-       add     %r12,%rbx                       # d+=T1
-
-       and     %r11,%r14                       # (a|c)&b
-       add     %r12,%r9                        # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r9                        # h+=Maj(a,b,c)
-       mov     8*11(%rsi),%r12
-       bswap   %r12
-       mov     %rbx,%r13
-       mov     %rbx,%r14
-       mov     %rcx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rdx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rbx,%r15                       # (f^g)&e
-       mov     %r12,88(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rdx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %r8,%r12                        # T1+=h
-
-       mov     %r9,%r8
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r9,%r13
-       mov     %r9,%r14
-
-       ror     $28,%r8
-       ror     $34,%r13
-       mov     %r9,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r8
-       ror     $5,%r13
-       or      %r11,%r14                       # a|c
-
-       xor     %r13,%r8                        # h=Sigma0(a)
-       and     %r11,%r15                       # a&c
-       add     %r12,%rax                       # d+=T1
-
-       and     %r10,%r14                       # (a|c)&b
-       add     %r12,%r8                        # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r8                        # h+=Maj(a,b,c)
-       mov     8*12(%rsi),%r12
-       bswap   %r12
-       mov     %rax,%r13
-       mov     %rax,%r14
-       mov     %rbx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rcx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rax,%r15                       # (f^g)&e
-       mov     %r12,96(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rcx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rdx,%r12                       # T1+=h
-
-       mov     %r8,%rdx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r8,%r13
-       mov     %r8,%r14
-
-       ror     $28,%rdx
-       ror     $34,%r13
-       mov     %r8,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rdx
-       ror     $5,%r13
-       or      %r10,%r14                       # a|c
-
-       xor     %r13,%rdx                       # h=Sigma0(a)
-       and     %r10,%r15                       # a&c
-       add     %r12,%r11                       # d+=T1
-
-       and     %r9,%r14                        # (a|c)&b
-       add     %r12,%rdx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rdx                       # h+=Maj(a,b,c)
-       mov     8*13(%rsi),%r12
-       bswap   %r12
-       mov     %r11,%r13
-       mov     %r11,%r14
-       mov     %rax,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rbx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r11,%r15                       # (f^g)&e
-       mov     %r12,104(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rbx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rcx,%r12                       # T1+=h
-
-       mov     %rdx,%rcx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rdx,%r13
-       mov     %rdx,%r14
-
-       ror     $28,%rcx
-       ror     $34,%r13
-       mov     %rdx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rcx
-       ror     $5,%r13
-       or      %r9,%r14                        # a|c
-
-       xor     %r13,%rcx                       # h=Sigma0(a)
-       and     %r9,%r15                        # a&c
-       add     %r12,%r10                       # d+=T1
-
-       and     %r8,%r14                        # (a|c)&b
-       add     %r12,%rcx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rcx                       # h+=Maj(a,b,c)
-       mov     8*14(%rsi),%r12
-       bswap   %r12
-       mov     %r10,%r13
-       mov     %r10,%r14
-       mov     %r11,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rax,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r10,%r15                       # (f^g)&e
-       mov     %r12,112(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rax,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rbx,%r12                       # T1+=h
-
-       mov     %rcx,%rbx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rcx,%r13
-       mov     %rcx,%r14
-
-       ror     $28,%rbx
-       ror     $34,%r13
-       mov     %rcx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rbx
-       ror     $5,%r13
-       or      %r8,%r14                        # a|c
-
-       xor     %r13,%rbx                       # h=Sigma0(a)
-       and     %r8,%r15                        # a&c
-       add     %r12,%r9                        # d+=T1
-
-       and     %rdx,%r14                       # (a|c)&b
-       add     %r12,%rbx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rbx                       # h+=Maj(a,b,c)
-       mov     8*15(%rsi),%r12
-       bswap   %r12
-       mov     %r9,%r13
-       mov     %r9,%r14
-       mov     %r10,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r11,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r9,%r15                        # (f^g)&e
-       mov     %r12,120(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r11,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rax,%r12                       # T1+=h
-
-       mov     %rbx,%rax
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rbx,%r13
-       mov     %rbx,%r14
-
-       ror     $28,%rax
-       ror     $34,%r13
-       mov     %rbx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rax
-       ror     $5,%r13
-       or      %rdx,%r14                       # a|c
-
-       xor     %r13,%rax                       # h=Sigma0(a)
-       and     %rdx,%r15                       # a&c
-       add     %r12,%r8                        # d+=T1
-
-       and     %rcx,%r14                       # (a|c)&b
-       add     %r12,%rax                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rax                       # h+=Maj(a,b,c)
-       jmp     .Lrounds_16_xx
-.balign        16
-.Lrounds_16_xx:
-       mov     8(%rsp),%r13
-       mov     112(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     72(%rsp),%r12
-
-       add     0(%rsp),%r12
-       mov     %r8,%r13
-       mov     %r8,%r14
-       mov     %r9,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r10,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r8,%r15                        # (f^g)&e
-       mov     %r12,0(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r10,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %r11,%r12                       # T1+=h
-
-       mov     %rax,%r11
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rax,%r13
-       mov     %rax,%r14
-
-       ror     $28,%r11
-       ror     $34,%r13
-       mov     %rax,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r11
-       ror     $5,%r13
-       or      %rcx,%r14                       # a|c
-
-       xor     %r13,%r11                       # h=Sigma0(a)
-       and     %rcx,%r15                       # a&c
-       add     %r12,%rdx                       # d+=T1
-
-       and     %rbx,%r14                       # (a|c)&b
-       add     %r12,%r11                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r11                       # h+=Maj(a,b,c)
-       mov     16(%rsp),%r13
-       mov     120(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     80(%rsp),%r12
-
-       add     8(%rsp),%r12
-       mov     %rdx,%r13
-       mov     %rdx,%r14
-       mov     %r8,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r9,%r15                        # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rdx,%r15                       # (f^g)&e
-       mov     %r12,8(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r9,%r15                        # Ch(e,f,g)=((f^g)&e)^g
-       add     %r10,%r12                       # T1+=h
-
-       mov     %r11,%r10
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r11,%r13
-       mov     %r11,%r14
-
-       ror     $28,%r10
-       ror     $34,%r13
-       mov     %r11,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r10
-       ror     $5,%r13
-       or      %rbx,%r14                       # a|c
-
-       xor     %r13,%r10                       # h=Sigma0(a)
-       and     %rbx,%r15                       # a&c
-       add     %r12,%rcx                       # d+=T1
-
-       and     %rax,%r14                       # (a|c)&b
-       add     %r12,%r10                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r10                       # h+=Maj(a,b,c)
-       mov     24(%rsp),%r13
-       mov     0(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     88(%rsp),%r12
-
-       add     16(%rsp),%r12
-       mov     %rcx,%r13
-       mov     %rcx,%r14
-       mov     %rdx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r8,%r15                        # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rcx,%r15                       # (f^g)&e
-       mov     %r12,16(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r8,%r15                        # Ch(e,f,g)=((f^g)&e)^g
-       add     %r9,%r12                        # T1+=h
-
-       mov     %r10,%r9
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r10,%r13
-       mov     %r10,%r14
-
-       ror     $28,%r9
-       ror     $34,%r13
-       mov     %r10,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r9
-       ror     $5,%r13
-       or      %rax,%r14                       # a|c
-
-       xor     %r13,%r9                        # h=Sigma0(a)
-       and     %rax,%r15                       # a&c
-       add     %r12,%rbx                       # d+=T1
-
-       and     %r11,%r14                       # (a|c)&b
-       add     %r12,%r9                        # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r9                        # h+=Maj(a,b,c)
-       mov     32(%rsp),%r13
-       mov     8(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     96(%rsp),%r12
-
-       add     24(%rsp),%r12
-       mov     %rbx,%r13
-       mov     %rbx,%r14
-       mov     %rcx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rdx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rbx,%r15                       # (f^g)&e
-       mov     %r12,24(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rdx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %r8,%r12                        # T1+=h
-
-       mov     %r9,%r8
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r9,%r13
-       mov     %r9,%r14
-
-       ror     $28,%r8
-       ror     $34,%r13
-       mov     %r9,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r8
-       ror     $5,%r13
-       or      %r11,%r14                       # a|c
-
-       xor     %r13,%r8                        # h=Sigma0(a)
-       and     %r11,%r15                       # a&c
-       add     %r12,%rax                       # d+=T1
-
-       and     %r10,%r14                       # (a|c)&b
-       add     %r12,%r8                        # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r8                        # h+=Maj(a,b,c)
-       mov     40(%rsp),%r13
-       mov     16(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     104(%rsp),%r12
-
-       add     32(%rsp),%r12
-       mov     %rax,%r13
-       mov     %rax,%r14
-       mov     %rbx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rcx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rax,%r15                       # (f^g)&e
-       mov     %r12,32(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rcx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rdx,%r12                       # T1+=h
-
-       mov     %r8,%rdx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r8,%r13
-       mov     %r8,%r14
-
-       ror     $28,%rdx
-       ror     $34,%r13
-       mov     %r8,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rdx
-       ror     $5,%r13
-       or      %r10,%r14                       # a|c
-
-       xor     %r13,%rdx                       # h=Sigma0(a)
-       and     %r10,%r15                       # a&c
-       add     %r12,%r11                       # d+=T1
-
-       and     %r9,%r14                        # (a|c)&b
-       add     %r12,%rdx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rdx                       # h+=Maj(a,b,c)
-       mov     48(%rsp),%r13
-       mov     24(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     112(%rsp),%r12
-
-       add     40(%rsp),%r12
-       mov     %r11,%r13
-       mov     %r11,%r14
-       mov     %rax,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rbx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r11,%r15                       # (f^g)&e
-       mov     %r12,40(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rbx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rcx,%r12                       # T1+=h
-
-       mov     %rdx,%rcx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rdx,%r13
-       mov     %rdx,%r14
-
-       ror     $28,%rcx
-       ror     $34,%r13
-       mov     %rdx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rcx
-       ror     $5,%r13
-       or      %r9,%r14                        # a|c
-
-       xor     %r13,%rcx                       # h=Sigma0(a)
-       and     %r9,%r15                        # a&c
-       add     %r12,%r10                       # d+=T1
-
-       and     %r8,%r14                        # (a|c)&b
-       add     %r12,%rcx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rcx                       # h+=Maj(a,b,c)
-       mov     56(%rsp),%r13
-       mov     32(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     120(%rsp),%r12
-
-       add     48(%rsp),%r12
-       mov     %r10,%r13
-       mov     %r10,%r14
-       mov     %r11,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rax,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r10,%r15                       # (f^g)&e
-       mov     %r12,48(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rax,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rbx,%r12                       # T1+=h
-
-       mov     %rcx,%rbx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rcx,%r13
-       mov     %rcx,%r14
-
-       ror     $28,%rbx
-       ror     $34,%r13
-       mov     %rcx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rbx
-       ror     $5,%r13
-       or      %r8,%r14                        # a|c
-
-       xor     %r13,%rbx                       # h=Sigma0(a)
-       and     %r8,%r15                        # a&c
-       add     %r12,%r9                        # d+=T1
-
-       and     %rdx,%r14                       # (a|c)&b
-       add     %r12,%rbx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rbx                       # h+=Maj(a,b,c)
-       mov     64(%rsp),%r13
-       mov     40(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     0(%rsp),%r12
-
-       add     56(%rsp),%r12
-       mov     %r9,%r13
-       mov     %r9,%r14
-       mov     %r10,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r11,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r9,%r15                        # (f^g)&e
-       mov     %r12,56(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r11,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rax,%r12                       # T1+=h
-
-       mov     %rbx,%rax
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rbx,%r13
-       mov     %rbx,%r14
-
-       ror     $28,%rax
-       ror     $34,%r13
-       mov     %rbx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rax
-       ror     $5,%r13
-       or      %rdx,%r14                       # a|c
-
-       xor     %r13,%rax                       # h=Sigma0(a)
-       and     %rdx,%r15                       # a&c
-       add     %r12,%r8                        # d+=T1
-
-       and     %rcx,%r14                       # (a|c)&b
-       add     %r12,%rax                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rax                       # h+=Maj(a,b,c)
-       mov     72(%rsp),%r13
-       mov     48(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     8(%rsp),%r12
-
-       add     64(%rsp),%r12
-       mov     %r8,%r13
-       mov     %r8,%r14
-       mov     %r9,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r10,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r8,%r15                        # (f^g)&e
-       mov     %r12,64(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r10,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %r11,%r12                       # T1+=h
-
-       mov     %rax,%r11
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rax,%r13
-       mov     %rax,%r14
-
-       ror     $28,%r11
-       ror     $34,%r13
-       mov     %rax,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r11
-       ror     $5,%r13
-       or      %rcx,%r14                       # a|c
-
-       xor     %r13,%r11                       # h=Sigma0(a)
-       and     %rcx,%r15                       # a&c
-       add     %r12,%rdx                       # d+=T1
-
-       and     %rbx,%r14                       # (a|c)&b
-       add     %r12,%r11                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r11                       # h+=Maj(a,b,c)
-       mov     80(%rsp),%r13
-       mov     56(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     16(%rsp),%r12
-
-       add     72(%rsp),%r12
-       mov     %rdx,%r13
-       mov     %rdx,%r14
-       mov     %r8,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r9,%r15                        # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rdx,%r15                       # (f^g)&e
-       mov     %r12,72(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r9,%r15                        # Ch(e,f,g)=((f^g)&e)^g
-       add     %r10,%r12                       # T1+=h
-
-       mov     %r11,%r10
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r11,%r13
-       mov     %r11,%r14
-
-       ror     $28,%r10
-       ror     $34,%r13
-       mov     %r11,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r10
-       ror     $5,%r13
-       or      %rbx,%r14                       # a|c
-
-       xor     %r13,%r10                       # h=Sigma0(a)
-       and     %rbx,%r15                       # a&c
-       add     %r12,%rcx                       # d+=T1
-
-       and     %rax,%r14                       # (a|c)&b
-       add     %r12,%r10                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r10                       # h+=Maj(a,b,c)
-       mov     88(%rsp),%r13
-       mov     64(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     24(%rsp),%r12
-
-       add     80(%rsp),%r12
-       mov     %rcx,%r13
-       mov     %rcx,%r14
-       mov     %rdx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r8,%r15                        # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rcx,%r15                       # (f^g)&e
-       mov     %r12,80(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r8,%r15                        # Ch(e,f,g)=((f^g)&e)^g
-       add     %r9,%r12                        # T1+=h
-
-       mov     %r10,%r9
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r10,%r13
-       mov     %r10,%r14
-
-       ror     $28,%r9
-       ror     $34,%r13
-       mov     %r10,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r9
-       ror     $5,%r13
-       or      %rax,%r14                       # a|c
-
-       xor     %r13,%r9                        # h=Sigma0(a)
-       and     %rax,%r15                       # a&c
-       add     %r12,%rbx                       # d+=T1
-
-       and     %r11,%r14                       # (a|c)&b
-       add     %r12,%r9                        # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r9                        # h+=Maj(a,b,c)
-       mov     96(%rsp),%r13
-       mov     72(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     32(%rsp),%r12
-
-       add     88(%rsp),%r12
-       mov     %rbx,%r13
-       mov     %rbx,%r14
-       mov     %rcx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rdx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rbx,%r15                       # (f^g)&e
-       mov     %r12,88(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rdx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %r8,%r12                        # T1+=h
-
-       mov     %r9,%r8
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r9,%r13
-       mov     %r9,%r14
-
-       ror     $28,%r8
-       ror     $34,%r13
-       mov     %r9,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%r8
-       ror     $5,%r13
-       or      %r11,%r14                       # a|c
-
-       xor     %r13,%r8                        # h=Sigma0(a)
-       and     %r11,%r15                       # a&c
-       add     %r12,%rax                       # d+=T1
-
-       and     %r10,%r14                       # (a|c)&b
-       add     %r12,%r8                        # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%r8                        # h+=Maj(a,b,c)
-       mov     104(%rsp),%r13
-       mov     80(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     40(%rsp),%r12
-
-       add     96(%rsp),%r12
-       mov     %rax,%r13
-       mov     %rax,%r14
-       mov     %rbx,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rcx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %rax,%r15                       # (f^g)&e
-       mov     %r12,96(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rcx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rdx,%r12                       # T1+=h
-
-       mov     %r8,%rdx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %r8,%r13
-       mov     %r8,%r14
-
-       ror     $28,%rdx
-       ror     $34,%r13
-       mov     %r8,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rdx
-       ror     $5,%r13
-       or      %r10,%r14                       # a|c
-
-       xor     %r13,%rdx                       # h=Sigma0(a)
-       and     %r10,%r15                       # a&c
-       add     %r12,%r11                       # d+=T1
-
-       and     %r9,%r14                        # (a|c)&b
-       add     %r12,%rdx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rdx                       # h+=Maj(a,b,c)
-       mov     112(%rsp),%r13
-       mov     88(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     48(%rsp),%r12
-
-       add     104(%rsp),%r12
-       mov     %r11,%r13
-       mov     %r11,%r14
-       mov     %rax,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rbx,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r11,%r15                       # (f^g)&e
-       mov     %r12,104(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rbx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rcx,%r12                       # T1+=h
-
-       mov     %rdx,%rcx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rdx,%r13
-       mov     %rdx,%r14
-
-       ror     $28,%rcx
-       ror     $34,%r13
-       mov     %rdx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rcx
-       ror     $5,%r13
-       or      %r9,%r14                        # a|c
-
-       xor     %r13,%rcx                       # h=Sigma0(a)
-       and     %r9,%r15                        # a&c
-       add     %r12,%r10                       # d+=T1
-
-       and     %r8,%r14                        # (a|c)&b
-       add     %r12,%rcx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rcx                       # h+=Maj(a,b,c)
-       mov     120(%rsp),%r13
-       mov     96(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     56(%rsp),%r12
-
-       add     112(%rsp),%r12
-       mov     %r10,%r13
-       mov     %r10,%r14
-       mov     %r11,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %rax,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r10,%r15                       # (f^g)&e
-       mov     %r12,112(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %rax,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rbx,%r12                       # T1+=h
-
-       mov     %rcx,%rbx
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rcx,%r13
-       mov     %rcx,%r14
-
-       ror     $28,%rbx
-       ror     $34,%r13
-       mov     %rcx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rbx
-       ror     $5,%r13
-       or      %r8,%r14                        # a|c
-
-       xor     %r13,%rbx                       # h=Sigma0(a)
-       and     %r8,%r15                        # a&c
-       add     %r12,%r9                        # d+=T1
-
-       and     %rdx,%r14                       # (a|c)&b
-       add     %r12,%rbx                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rbx                       # h+=Maj(a,b,c)
-       mov     0(%rsp),%r13
-       mov     104(%rsp),%r12
-
-       mov     %r13,%r15
-
-       shr     $7,%r13
-       ror     $1,%r15
-
-       xor     %r15,%r13
-       ror     $7,%r15
-
-       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
-       mov     %r12,%r14
-
-       shr     $6,%r12
-       ror     $19,%r14
-
-       xor     %r14,%r12
-       ror     $42,%r14
-
-       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
-
-       add     %r13,%r12
-
-       add     64(%rsp),%r12
-
-       add     120(%rsp),%r12
-       mov     %r9,%r13
-       mov     %r9,%r14
-       mov     %r10,%r15
-
-       ror     $14,%r13
-       ror     $18,%r14
-       xor     %r11,%r15                       # f^g
-
-       xor     %r14,%r13
-       ror     $23,%r14
-       and     %r9,%r15                        # (f^g)&e
-       mov     %r12,120(%rsp)
-
-       xor     %r14,%r13                       # Sigma1(e)
-       xor     %r11,%r15                       # Ch(e,f,g)=((f^g)&e)^g
-       add     %rax,%r12                       # T1+=h
-
-       mov     %rbx,%rax
-       add     %r13,%r12                       # T1+=Sigma1(e)
-
-       add     %r15,%r12                       # T1+=Ch(e,f,g)
-       mov     %rbx,%r13
-       mov     %rbx,%r14
-
-       ror     $28,%rax
-       ror     $34,%r13
-       mov     %rbx,%r15
-       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
-
-       xor     %r13,%rax
-       ror     $5,%r13
-       or      %rdx,%r14                       # a|c
-
-       xor     %r13,%rax                       # h=Sigma0(a)
-       and     %rdx,%r15                       # a&c
-       add     %r12,%r8                        # d+=T1
-
-       and     %rcx,%r14                       # (a|c)&b
-       add     %r12,%rax                       # h+=T1
-
-       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
-       lea     1(%rdi),%rdi    # round++
-
-       add     %r14,%rax                       # h+=Maj(a,b,c)
-       cmp     $80,%rdi
-       jb      .Lrounds_16_xx
-
-       mov     16*8+0*8(%rsp),%rdi
-       lea     16*8(%rsi),%rsi
-
-       add     8*0(%rdi),%rax
-       add     8*1(%rdi),%rbx
-       add     8*2(%rdi),%rcx
-       add     8*3(%rdi),%rdx
-       add     8*4(%rdi),%r8
-       add     8*5(%rdi),%r9
-       add     8*6(%rdi),%r10
-       add     8*7(%rdi),%r11
-
-       cmp     16*8+2*8(%rsp),%rsi
-
-       mov     %rax,8*0(%rdi)
-       mov     %rbx,8*1(%rdi)
-       mov     %rcx,8*2(%rdi)
-       mov     %rdx,8*3(%rdi)
-       mov     %r8,8*4(%rdi)
-       mov     %r9,8*5(%rdi)
-       mov     %r10,8*6(%rdi)
-       mov     %r11,8*7(%rdi)
-       jb      .Lloop
-
-       mov     16*8+3*8(%rsp),%rsp
-.cfi_def_cfa   %rsp,56
-       pop     %r15
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %r15
-       pop     %r14
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %r14
-       pop     %r13
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %r13
-       pop     %r12
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %r12
-       pop     %rbp
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %rbp
-       pop     %rbx
-.cfi_adjust_cfa_offset -8
-.cfi_restore   %rbx
-
-       RET
-.cfi_endproc
-SET_SIZE(SHA512TransformBlocks)
-
-SECTION_STATIC
-.balign        64
-SET_OBJ(K512)
-K512:
-       .quad   0x428a2f98d728ae22,0x7137449123ef65cd
-       .quad   0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
-       .quad   0x3956c25bf348b538,0x59f111f1b605d019
-       .quad   0x923f82a4af194f9b,0xab1c5ed5da6d8118
-       .quad   0xd807aa98a3030242,0x12835b0145706fbe
-       .quad   0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
-       .quad   0x72be5d74f27b896f,0x80deb1fe3b1696b1
-       .quad   0x9bdc06a725c71235,0xc19bf174cf692694
-       .quad   0xe49b69c19ef14ad2,0xefbe4786384f25e3
-       .quad   0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
-       .quad   0x2de92c6f592b0275,0x4a7484aa6ea6e483
-       .quad   0x5cb0a9dcbd41fbd4,0x76f988da831153b5
-       .quad   0x983e5152ee66dfab,0xa831c66d2db43210
-       .quad   0xb00327c898fb213f,0xbf597fc7beef0ee4
-       .quad   0xc6e00bf33da88fc2,0xd5a79147930aa725
-       .quad   0x06ca6351e003826f,0x142929670a0e6e70
-       .quad   0x27b70a8546d22ffc,0x2e1b21385c26c926
-       .quad   0x4d2c6dfc5ac42aed,0x53380d139d95b3df
-       .quad   0x650a73548baf63de,0x766a0abb3c77b2a8
-       .quad   0x81c2c92e47edaee6,0x92722c851482353b
-       .quad   0xa2bfe8a14cf10364,0xa81a664bbc423001
-       .quad   0xc24b8b70d0f89791,0xc76c51a30654be30
-       .quad   0xd192e819d6ef5218,0xd69906245565a910
-       .quad   0xf40e35855771202a,0x106aa07032bbd1b8
-       .quad   0x19a4c116b8d2d0c8,0x1e376c085141ab53
-       .quad   0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
-       .quad   0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
-       .quad   0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
-       .quad   0x748f82ee5defb2fc,0x78a5636f43172f60
-       .quad   0x84c87814a1f0ab72,0x8cc702081a6439ec
-       .quad   0x90befffa23631e28,0xa4506cebde82bde9
-       .quad   0xbef9a3f7b2c67915,0xc67178f2e372532b
-       .quad   0xca273eceea26619c,0xd186b8c721c0c207
-       .quad   0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
-       .quad   0x06f067aa72176fba,0x0a637dc5a2c898a6
-       .quad   0x113f9804bef90dae,0x1b710b35131c471b
-       .quad   0x28db77f523047d84,0x32caab7b40c72493
-       .quad   0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
-       .quad   0x4cc5d4becb3e42b6,0x597f299cfc657e2a
-       .quad   0x5fcb6fab3ad6faec,0x6c44198c4a475817
-#endif /* !lint && !__lint */
-
-#if defined(__ELF__)
-.section .note.GNU-stack,"",%progbits
-#endif
diff --git a/module/icp/include/sha2/sha2_consts.h b/module/icp/include/sha2/sha2_consts.h
deleted file mode 100644 (file)
index b33ddf8..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-
-#ifndef        _SYS_SHA2_CONSTS_H
-#define        _SYS_SHA2_CONSTS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Loading 32-bit constants on a sparc is expensive since it involves both
- * a `sethi' and an `or'.  thus, we instead use `ld' to load the constants
- * from an array called `sha2_consts'.  however, on intel (and perhaps other
- * processors), it is cheaper to load the constant directly.  thus, the c
- * code in SHA transform functions uses the macro SHA2_CONST() which either
- * expands to a constant or an array reference, depending on
- * the architecture the code is being compiled for.
- *
- * SHA512 constants are used for SHA384
- */
-
-#include <sys/types.h>         /* uint32_t */
-
-extern const uint32_t  sha256_consts[];
-extern const uint64_t  sha512_consts[];
-
-#if    defined(__sparc)
-#define        SHA256_CONST(x)         (sha256_consts[x])
-#define        SHA512_CONST(x)         (sha512_consts[x])
-#else
-#define        SHA256_CONST(x)         (SHA256_CONST_ ## x)
-#define        SHA512_CONST(x)         (SHA512_CONST_ ## x)
-#endif
-
-/* constants, as provided in FIPS 180-2 */
-
-#define        SHA256_CONST_0          0x428a2f98U
-#define        SHA256_CONST_1          0x71374491U
-#define        SHA256_CONST_2          0xb5c0fbcfU
-#define        SHA256_CONST_3          0xe9b5dba5U
-#define        SHA256_CONST_4          0x3956c25bU
-#define        SHA256_CONST_5          0x59f111f1U
-#define        SHA256_CONST_6          0x923f82a4U
-#define        SHA256_CONST_7          0xab1c5ed5U
-
-#define        SHA256_CONST_8          0xd807aa98U
-#define        SHA256_CONST_9          0x12835b01U
-#define        SHA256_CONST_10         0x243185beU
-#define        SHA256_CONST_11         0x550c7dc3U
-#define        SHA256_CONST_12         0x72be5d74U
-#define        SHA256_CONST_13         0x80deb1feU
-#define        SHA256_CONST_14         0x9bdc06a7U
-#define        SHA256_CONST_15         0xc19bf174U
-
-#define        SHA256_CONST_16         0xe49b69c1U
-#define        SHA256_CONST_17         0xefbe4786U
-#define        SHA256_CONST_18         0x0fc19dc6U
-#define        SHA256_CONST_19         0x240ca1ccU
-#define        SHA256_CONST_20         0x2de92c6fU
-#define        SHA256_CONST_21         0x4a7484aaU
-#define        SHA256_CONST_22         0x5cb0a9dcU
-#define        SHA256_CONST_23         0x76f988daU
-
-#define        SHA256_CONST_24         0x983e5152U
-#define        SHA256_CONST_25         0xa831c66dU
-#define        SHA256_CONST_26         0xb00327c8U
-#define        SHA256_CONST_27         0xbf597fc7U
-#define        SHA256_CONST_28         0xc6e00bf3U
-#define        SHA256_CONST_29         0xd5a79147U
-#define        SHA256_CONST_30         0x06ca6351U
-#define        SHA256_CONST_31         0x14292967U
-
-#define        SHA256_CONST_32         0x27b70a85U
-#define        SHA256_CONST_33         0x2e1b2138U
-#define        SHA256_CONST_34         0x4d2c6dfcU
-#define        SHA256_CONST_35         0x53380d13U
-#define        SHA256_CONST_36         0x650a7354U
-#define        SHA256_CONST_37         0x766a0abbU
-#define        SHA256_CONST_38         0x81c2c92eU
-#define        SHA256_CONST_39         0x92722c85U
-
-#define        SHA256_CONST_40         0xa2bfe8a1U
-#define        SHA256_CONST_41         0xa81a664bU
-#define        SHA256_CONST_42         0xc24b8b70U
-#define        SHA256_CONST_43         0xc76c51a3U
-#define        SHA256_CONST_44         0xd192e819U
-#define        SHA256_CONST_45         0xd6990624U
-#define        SHA256_CONST_46         0xf40e3585U
-#define        SHA256_CONST_47         0x106aa070U
-
-#define        SHA256_CONST_48         0x19a4c116U
-#define        SHA256_CONST_49         0x1e376c08U
-#define        SHA256_CONST_50         0x2748774cU
-#define        SHA256_CONST_51         0x34b0bcb5U
-#define        SHA256_CONST_52         0x391c0cb3U
-#define        SHA256_CONST_53         0x4ed8aa4aU
-#define        SHA256_CONST_54         0x5b9cca4fU
-#define        SHA256_CONST_55         0x682e6ff3U
-
-#define        SHA256_CONST_56         0x748f82eeU
-#define        SHA256_CONST_57         0x78a5636fU
-#define        SHA256_CONST_58         0x84c87814U
-#define        SHA256_CONST_59         0x8cc70208U
-#define        SHA256_CONST_60         0x90befffaU
-#define        SHA256_CONST_61         0xa4506cebU
-#define        SHA256_CONST_62         0xbef9a3f7U
-#define        SHA256_CONST_63         0xc67178f2U
-
-#define        SHA512_CONST_0          0x428a2f98d728ae22ULL
-#define        SHA512_CONST_1          0x7137449123ef65cdULL
-#define        SHA512_CONST_2          0xb5c0fbcfec4d3b2fULL
-#define        SHA512_CONST_3          0xe9b5dba58189dbbcULL
-#define        SHA512_CONST_4          0x3956c25bf348b538ULL
-#define        SHA512_CONST_5          0x59f111f1b605d019ULL
-#define        SHA512_CONST_6          0x923f82a4af194f9bULL
-#define        SHA512_CONST_7          0xab1c5ed5da6d8118ULL
-#define        SHA512_CONST_8          0xd807aa98a3030242ULL
-#define        SHA512_CONST_9          0x12835b0145706fbeULL
-#define        SHA512_CONST_10         0x243185be4ee4b28cULL
-#define        SHA512_CONST_11         0x550c7dc3d5ffb4e2ULL
-#define        SHA512_CONST_12         0x72be5d74f27b896fULL
-#define        SHA512_CONST_13         0x80deb1fe3b1696b1ULL
-#define        SHA512_CONST_14         0x9bdc06a725c71235ULL
-#define        SHA512_CONST_15         0xc19bf174cf692694ULL
-#define        SHA512_CONST_16         0xe49b69c19ef14ad2ULL
-#define        SHA512_CONST_17         0xefbe4786384f25e3ULL
-#define        SHA512_CONST_18         0x0fc19dc68b8cd5b5ULL
-#define        SHA512_CONST_19         0x240ca1cc77ac9c65ULL
-#define        SHA512_CONST_20         0x2de92c6f592b0275ULL
-#define        SHA512_CONST_21         0x4a7484aa6ea6e483ULL
-#define        SHA512_CONST_22         0x5cb0a9dcbd41fbd4ULL
-#define        SHA512_CONST_23         0x76f988da831153b5ULL
-#define        SHA512_CONST_24         0x983e5152ee66dfabULL
-#define        SHA512_CONST_25         0xa831c66d2db43210ULL
-#define        SHA512_CONST_26         0xb00327c898fb213fULL
-#define        SHA512_CONST_27         0xbf597fc7beef0ee4ULL
-#define        SHA512_CONST_28         0xc6e00bf33da88fc2ULL
-#define        SHA512_CONST_29         0xd5a79147930aa725ULL
-#define        SHA512_CONST_30         0x06ca6351e003826fULL
-#define        SHA512_CONST_31         0x142929670a0e6e70ULL
-#define        SHA512_CONST_32         0x27b70a8546d22ffcULL
-#define        SHA512_CONST_33         0x2e1b21385c26c926ULL
-#define        SHA512_CONST_34         0x4d2c6dfc5ac42aedULL
-#define        SHA512_CONST_35         0x53380d139d95b3dfULL
-#define        SHA512_CONST_36         0x650a73548baf63deULL
-#define        SHA512_CONST_37         0x766a0abb3c77b2a8ULL
-#define        SHA512_CONST_38         0x81c2c92e47edaee6ULL
-#define        SHA512_CONST_39         0x92722c851482353bULL
-#define        SHA512_CONST_40         0xa2bfe8a14cf10364ULL
-#define        SHA512_CONST_41         0xa81a664bbc423001ULL
-#define        SHA512_CONST_42         0xc24b8b70d0f89791ULL
-#define        SHA512_CONST_43         0xc76c51a30654be30ULL
-#define        SHA512_CONST_44         0xd192e819d6ef5218ULL
-#define        SHA512_CONST_45         0xd69906245565a910ULL
-#define        SHA512_CONST_46         0xf40e35855771202aULL
-#define        SHA512_CONST_47         0x106aa07032bbd1b8ULL
-#define        SHA512_CONST_48         0x19a4c116b8d2d0c8ULL
-#define        SHA512_CONST_49         0x1e376c085141ab53ULL
-#define        SHA512_CONST_50         0x2748774cdf8eeb99ULL
-#define        SHA512_CONST_51         0x34b0bcb5e19b48a8ULL
-#define        SHA512_CONST_52         0x391c0cb3c5c95a63ULL
-#define        SHA512_CONST_53         0x4ed8aa4ae3418acbULL
-#define        SHA512_CONST_54         0x5b9cca4f7763e373ULL
-#define        SHA512_CONST_55         0x682e6ff3d6b2b8a3ULL
-#define        SHA512_CONST_56         0x748f82ee5defb2fcULL
-#define        SHA512_CONST_57         0x78a5636f43172f60ULL
-#define        SHA512_CONST_58         0x84c87814a1f0ab72ULL
-#define        SHA512_CONST_59         0x8cc702081a6439ecULL
-#define        SHA512_CONST_60         0x90befffa23631e28ULL
-#define        SHA512_CONST_61         0xa4506cebde82bde9ULL
-#define        SHA512_CONST_62         0xbef9a3f7b2c67915ULL
-#define        SHA512_CONST_63         0xc67178f2e372532bULL
-#define        SHA512_CONST_64         0xca273eceea26619cULL
-#define        SHA512_CONST_65         0xd186b8c721c0c207ULL
-#define        SHA512_CONST_66         0xeada7dd6cde0eb1eULL
-#define        SHA512_CONST_67         0xf57d4f7fee6ed178ULL
-#define        SHA512_CONST_68         0x06f067aa72176fbaULL
-#define        SHA512_CONST_69         0x0a637dc5a2c898a6ULL
-#define        SHA512_CONST_70         0x113f9804bef90daeULL
-#define        SHA512_CONST_71         0x1b710b35131c471bULL
-#define        SHA512_CONST_72         0x28db77f523047d84ULL
-#define        SHA512_CONST_73         0x32caab7b40c72493ULL
-#define        SHA512_CONST_74         0x3c9ebe0a15c9bebcULL
-#define        SHA512_CONST_75         0x431d67c49c100d4cULL
-#define        SHA512_CONST_76         0x4cc5d4becb3e42b6ULL
-#define        SHA512_CONST_77         0x597f299cfc657e2aULL
-#define        SHA512_CONST_78         0x5fcb6fab3ad6faecULL
-#define        SHA512_CONST_79         0x6c44198c4a475817ULL
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA2_CONSTS_H */
diff --git a/module/os/freebsd/spl/sha224.h b/module/os/freebsd/spl/sha224.h
deleted file mode 100644 (file)
index 0abd430..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef        _SHA224_H_
-#define        _SHA224_H_
-
-#ifndef _KERNEL
-#include <sys/types.h>
-#endif
-
-#define        SHA224_BLOCK_LENGTH             64
-#define        SHA224_DIGEST_LENGTH            28
-#define        SHA224_DIGEST_STRING_LENGTH     (SHA224_DIGEST_LENGTH * 2 + 1)
-
-typedef struct SHA224Context {
-       uint32_t state[8];
-       uint64_t count;
-       uint8_t buf[SHA224_BLOCK_LENGTH];
-} SHA224_CTX;
-
-__BEGIN_DECLS
-
-/* Ensure libmd symbols do not clash with libcrypto */
-
-#ifndef SHA224_Init
-#define        SHA224_Init             _libmd_SHA224_Init
-#endif
-#ifndef SHA224_Update
-#define        SHA224_Update           _libmd_SHA224_Update
-#endif
-#ifndef SHA224_Final
-#define        SHA224_Final            _libmd_SHA224_Final
-#endif
-#ifndef SHA224_End
-#define        SHA224_End              _libmd_SHA224_End
-#endif
-#ifndef SHA224_Fd
-#define        SHA224_Fd               _libmd_SHA224_Fd
-#endif
-#ifndef SHA224_FdChunk
-#define        SHA224_FdChunk          _libmd_SHA224_FdChunk
-#endif
-#ifndef SHA224_File
-#define        SHA224_File             _libmd_SHA224_File
-#endif
-#ifndef SHA224_FileChunk
-#define        SHA224_FileChunk        _libmd_SHA224_FileChunk
-#endif
-#ifndef SHA224_Data
-#define        SHA224_Data             _libmd_SHA224_Data
-#endif
-
-#ifndef SHA224_version
-#define        SHA224_version          _libmd_SHA224_version
-#endif
-
-void   SHA224_Init(SHA224_CTX *);
-void   SHA224_Update(SHA224_CTX *, const void *, size_t);
-void   SHA224_Final(unsigned char [__min_size(SHA224_DIGEST_LENGTH)],
-    SHA224_CTX *);
-#ifndef _KERNEL
-char   *SHA224_End(SHA224_CTX *, char *);
-char   *SHA224_Data(const void *, unsigned int, char *);
-char   *SHA224_Fd(int, char *);
-char   *SHA224_FdChunk(int, char *, off_t, off_t);
-char   *SHA224_File(const char *, char *);
-char   *SHA224_FileChunk(const char *, char *, off_t, off_t);
-#endif
-__END_DECLS
-
-#endif /* !_SHA224_H_ */
diff --git a/module/os/freebsd/spl/sha256.h b/module/os/freebsd/spl/sha256.h
deleted file mode 100644 (file)
index 193c0c0..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _SHA256_H_
-#define        _SHA256_H_
-
-#ifndef _KERNEL
-#include <sys/types.h>
-#endif
-
-#define        SHA256_BLOCK_LENGTH             64
-#define        SHA256_DIGEST_LENGTH            32
-#define        SHA256_DIGEST_STRING_LENGTH     (SHA256_DIGEST_LENGTH * 2 + 1)
-
-typedef struct SHA256Context {
-       uint32_t state[8];
-       uint64_t count;
-       uint8_t buf[SHA256_BLOCK_LENGTH];
-} SHA256_CTX;
-
-__BEGIN_DECLS
-
-/* Ensure libmd symbols do not clash with libcrypto */
-
-#ifndef SHA256_Init
-#define        SHA256_Init             _libmd_SHA256_Init
-#endif
-#ifndef SHA256_Update
-#define        SHA256_Update           _libmd_SHA256_Update
-#endif
-#ifndef SHA256_Final
-#define        SHA256_Final            _libmd_SHA256_Final
-#endif
-#ifndef SHA256_End
-#define        SHA256_End              _libmd_SHA256_End
-#endif
-#ifndef SHA256_Fd
-#define        SHA256_Fd               _libmd_SHA256_Fd
-#endif
-#ifndef SHA256_FdChunk
-#define        SHA256_FdChunk          _libmd_SHA256_FdChunk
-#endif
-#ifndef SHA256_File
-#define        SHA256_File             _libmd_SHA256_File
-#endif
-#ifndef SHA256_FileChunk
-#define        SHA256_FileChunk        _libmd_SHA256_FileChunk
-#endif
-#ifndef SHA256_Data
-#define        SHA256_Data             _libmd_SHA256_Data
-#endif
-
-#ifndef SHA256_Transform
-#define        SHA256_Transform        _libmd_SHA256_Transform
-#endif
-#ifndef SHA256_version
-#define        SHA256_version          _libmd_SHA256_version
-#endif
-
-void   SHA256_Init(SHA256_CTX *);
-void   SHA256_Update(SHA256_CTX *, const void *, size_t);
-void   SHA256_Final(unsigned char [__min_size(SHA256_DIGEST_LENGTH)],
-    SHA256_CTX *);
-#ifndef _KERNEL
-char   *SHA256_End(SHA256_CTX *, char *);
-char   *SHA256_Data(const void *, unsigned int, char *);
-char   *SHA256_Fd(int, char *);
-char   *SHA256_FdChunk(int, char *, off_t, off_t);
-char   *SHA256_File(const char *, char *);
-char   *SHA256_FileChunk(const char *, char *, off_t, off_t);
-#endif
-__END_DECLS
-
-#endif /* !_SHA256_H_ */
diff --git a/module/os/freebsd/spl/sha256c.c b/module/os/freebsd/spl/sha256c.c
deleted file mode 100644 (file)
index 52cf0df..0000000
+++ /dev/null
@@ -1,378 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/types.h>
-
-#ifdef _KERNEL
-#include <sys/systm.h>
-#else
-#include <string.h>
-#endif
-
-
-#include <sys/byteorder.h>
-#include <sys/endian.h>
-#include "sha224.h"
-#include "sha256.h"
-
-#if BYTE_ORDER == BIG_ENDIAN
-
-/* Copy a vector of big-endian uint32_t into a vector of bytes */
-#define        be32enc_vect(dst, src, len)     \
-       memcpy((void *)dst, (const void *)src, (size_t)len)
-
-/* Copy a vector of bytes into a vector of big-endian uint32_t */
-#define        be32dec_vect(dst, src, len)     \
-       memcpy((void *)dst, (const void *)src, (size_t)len)
-
-#else /* BYTE_ORDER != BIG_ENDIAN */
-
-/*
- * Encode a length len/4 vector of (uint32_t) into a length len vector of
- * (unsigned char) in big-endian form.  Assumes len is a multiple of 4.
- */
-static void
-be32enc_vect(unsigned char *dst, const uint32_t *src, size_t len)
-{
-       size_t i;
-
-       for (i = 0; i < len / 4; i++)
-               be32enc(dst + i * 4, src[i]);
-}
-
-/*
- * Decode a big-endian length len vector of (unsigned char) into a length
- * len/4 vector of (uint32_t).  Assumes len is a multiple of 4.
- */
-static void
-be32dec_vect(uint32_t *dst, const unsigned char *src, size_t len)
-{
-       size_t i;
-
-       for (i = 0; i < len / 4; i++)
-               dst[i] = be32dec(src + i * 4);
-}
-
-#endif /* BYTE_ORDER != BIG_ENDIAN */
-
-/* SHA256 round constants. */
-static const uint32_t K[64] = {
-       0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
-       0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-       0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
-       0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-       0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
-       0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-       0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
-       0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-       0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
-       0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-       0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
-       0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-       0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
-       0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-       0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
-       0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-/* Elementary functions used by SHA256 */
-#define        Ch(x, y, z)     ((x & (y ^ z)) ^ z)
-#define        Maj(x, y, z)    ((x & (y | z)) | (y & z))
-#define        SHR(x, n)       (x >> n)
-#define        ROTR(x, n)      ((x >> n) | (x << (32 - n)))
-#define        S0(x)           (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
-#define        S1(x)           (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
-#define        s0(x)           (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
-#define        s1(x)           (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
-
-/* SHA256 round function */
-#define        RND(a, b, c, d, e, f, g, h, k)                  \
-       h += S1(e) + Ch(e, f, g) + k;                   \
-       d += h;                                         \
-       h += S0(a) + Maj(a, b, c);
-
-/* Adjusted round function for rotating state */
-#define        RNDr(S, W, i, ii)                       \
-       RND(S[(64 - i) % 8], S[(65 - i) % 8],   \
-           S[(66 - i) % 8], S[(67 - i) % 8],   \
-           S[(68 - i) % 8], S[(69 - i) % 8],   \
-           S[(70 - i) % 8], S[(71 - i) % 8],   \
-           W[i + ii] + K[i + ii])
-
-/* Message schedule computation */
-#define        MSCH(W, ii, i)                          \
-       W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] +   \
-               s0(W[i + ii + 1]) + W[i + ii]
-
-/*
- * SHA256 block compression function.  The 256-bit state is transformed via
- * the 512-bit input block to produce a new state.
- */
-static void
-SHA256_Transform(uint32_t *state, const unsigned char block[64])
-{
-       uint32_t W[64];
-       uint32_t S[8];
-       int i;
-
-       /* 1. Prepare the first part of the message schedule W. */
-       be32dec_vect(W, block, 64);
-
-       /* 2. Initialize working variables. */
-       memcpy(S, state, 32);
-
-       /* 3. Mix. */
-       for (i = 0; i < 64; i += 16) {
-               RNDr(S, W, 0, i);
-               RNDr(S, W, 1, i);
-               RNDr(S, W, 2, i);
-               RNDr(S, W, 3, i);
-               RNDr(S, W, 4, i);
-               RNDr(S, W, 5, i);
-               RNDr(S, W, 6, i);
-               RNDr(S, W, 7, i);
-               RNDr(S, W, 8, i);
-               RNDr(S, W, 9, i);
-               RNDr(S, W, 10, i);
-               RNDr(S, W, 11, i);
-               RNDr(S, W, 12, i);
-               RNDr(S, W, 13, i);
-               RNDr(S, W, 14, i);
-               RNDr(S, W, 15, i);
-
-               if (i == 48)
-                       break;
-               MSCH(W, 0, i);
-               MSCH(W, 1, i);
-               MSCH(W, 2, i);
-               MSCH(W, 3, i);
-               MSCH(W, 4, i);
-               MSCH(W, 5, i);
-               MSCH(W, 6, i);
-               MSCH(W, 7, i);
-               MSCH(W, 8, i);
-               MSCH(W, 9, i);
-               MSCH(W, 10, i);
-               MSCH(W, 11, i);
-               MSCH(W, 12, i);
-               MSCH(W, 13, i);
-               MSCH(W, 14, i);
-               MSCH(W, 15, i);
-       }
-
-       /* 4. Mix local working variables into global state */
-       for (i = 0; i < 8; i++)
-               state[i] += S[i];
-}
-
-static unsigned char PAD[64] = {
-       0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/* Add padding and terminating bit-count. */
-static void
-SHA256_Pad(SHA256_CTX * ctx)
-{
-       size_t r;
-
-       /* Figure out how many bytes we have buffered. */
-       r = (ctx->count >> 3) & 0x3f;
-
-       /* Pad to 56 mod 64, transforming if we finish a block en route. */
-       if (r < 56) {
-               /* Pad to 56 mod 64. */
-               memcpy(&ctx->buf[r], PAD, 56 - r);
-       } else {
-               /* Finish the current block and mix. */
-               memcpy(&ctx->buf[r], PAD, 64 - r);
-               SHA256_Transform(ctx->state, ctx->buf);
-
-               /* The start of the final block is all zeroes. */
-               memset(&ctx->buf[0], 0, 56);
-       }
-
-       /* Add the terminating bit-count. */
-       be64enc(&ctx->buf[56], ctx->count);
-
-       /* Mix in the final block. */
-       SHA256_Transform(ctx->state, ctx->buf);
-}
-
-/* SHA-256 initialization.  Begins a SHA-256 operation. */
-void
-SHA256_Init(SHA256_CTX * ctx)
-{
-
-       /* Zero bits processed so far */
-       ctx->count = 0;
-
-       /* Magic initialization constants */
-       ctx->state[0] = 0x6A09E667;
-       ctx->state[1] = 0xBB67AE85;
-       ctx->state[2] = 0x3C6EF372;
-       ctx->state[3] = 0xA54FF53A;
-       ctx->state[4] = 0x510E527F;
-       ctx->state[5] = 0x9B05688C;
-       ctx->state[6] = 0x1F83D9AB;
-       ctx->state[7] = 0x5BE0CD19;
-}
-
-/* Add bytes into the hash */
-void
-SHA256_Update(SHA256_CTX * ctx, const void *in, size_t len)
-{
-       uint64_t bitlen;
-       uint32_t r;
-       const unsigned char *src = in;
-
-       /* Number of bytes left in the buffer from previous updates */
-       r = (ctx->count >> 3) & 0x3f;
-
-       /* Convert the length into a number of bits */
-       bitlen = len << 3;
-
-       /* Update number of bits */
-       ctx->count += bitlen;
-
-       /* Handle the case where we don't need to perform any transforms */
-       if (len < 64 - r) {
-               memcpy(&ctx->buf[r], src, len);
-               return;
-       }
-
-       /* Finish the current block */
-       memcpy(&ctx->buf[r], src, 64 - r);
-       SHA256_Transform(ctx->state, ctx->buf);
-       src += 64 - r;
-       len -= 64 - r;
-
-       /* Perform complete blocks */
-       while (len >= 64) {
-               SHA256_Transform(ctx->state, src);
-               src += 64;
-               len -= 64;
-       }
-
-       /* Copy left over data into buffer */
-       memcpy(ctx->buf, src, len);
-}
-
-/*
- * SHA-256 finalization.  Pads the input data, exports the hash value,
- * and clears the context state.
- */
-void
-SHA256_Final(unsigned char digest[static SHA256_DIGEST_LENGTH], SHA256_CTX *ctx)
-{
-
-       /* Add padding */
-       SHA256_Pad(ctx);
-
-       /* Write the hash */
-       be32enc_vect(digest, ctx->state, SHA256_DIGEST_LENGTH);
-
-       /* Clear the context state */
-       memset(ctx, 0, sizeof (*ctx));
-}
-
-/* SHA-224: ******************************************************* */
-/*
- * the SHA224 and SHA256 transforms are identical
- */
-
-/* SHA-224 initialization.  Begins a SHA-224 operation. */
-void
-SHA224_Init(SHA224_CTX * ctx)
-{
-
-       /* Zero bits processed so far */
-       ctx->count = 0;
-
-       /* Magic initialization constants */
-       ctx->state[0] = 0xC1059ED8;
-       ctx->state[1] = 0x367CD507;
-       ctx->state[2] = 0x3070DD17;
-       ctx->state[3] = 0xF70E5939;
-       ctx->state[4] = 0xFFC00B31;
-       ctx->state[5] = 0x68581511;
-       ctx->state[6] = 0x64f98FA7;
-       ctx->state[7] = 0xBEFA4FA4;
-}
-
-/* Add bytes into the SHA-224 hash */
-void
-SHA224_Update(SHA224_CTX * ctx, const void *in, size_t len)
-{
-
-       SHA256_Update((SHA256_CTX *)ctx, in, len);
-}
-
-/*
- * SHA-224 finalization.  Pads the input data, exports the hash value,
- * and clears the context state.
- */
-void
-SHA224_Final(unsigned char digest[static SHA224_DIGEST_LENGTH], SHA224_CTX *ctx)
-{
-
-       /* Add padding */
-       SHA256_Pad((SHA256_CTX *)ctx);
-
-       /* Write the hash */
-       be32enc_vect(digest, ctx->state, SHA224_DIGEST_LENGTH);
-
-       /* Clear the context state */
-       memset(ctx, 0, sizeof (*ctx));
-}
-
-#ifdef WEAK_REFS
-/*
- * When building libmd, provide weak references. Note: this is not
- * activated in the context of compiling these sources for internal
- * use in libcrypt.
- */
-#undef SHA256_Init
-__weak_reference(_libmd_SHA256_Init, SHA256_Init);
-#undef SHA256_Update
-__weak_reference(_libmd_SHA256_Update, SHA256_Update);
-#undef SHA256_Final
-__weak_reference(_libmd_SHA256_Final, SHA256_Final);
-#undef SHA256_Transform
-__weak_reference(_libmd_SHA256_Transform, SHA256_Transform);
-
-#undef SHA224_Init
-__weak_reference(_libmd_SHA224_Init, SHA224_Init);
-#undef SHA224_Update
-__weak_reference(_libmd_SHA224_Update, SHA224_Update);
-#undef SHA224_Final
-__weak_reference(_libmd_SHA224_Final, SHA224_Final);
-#endif
diff --git a/module/os/freebsd/spl/sha384.h b/module/os/freebsd/spl/sha384.h
deleted file mode 100644 (file)
index 67250ce..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _SHA384_H_
-#define        _SHA384_H_
-
-#ifndef _KERNEL
-#include <sys/types.h>
-#endif
-
-#define        SHA384_BLOCK_LENGTH             128
-#define        SHA384_DIGEST_LENGTH            48
-#define        SHA384_DIGEST_STRING_LENGTH     (SHA384_DIGEST_LENGTH * 2 + 1)
-
-typedef struct SHA384Context {
-       uint64_t state[8];
-       uint64_t count[2];
-       uint8_t buf[SHA384_BLOCK_LENGTH];
-} SHA384_CTX;
-
-__BEGIN_DECLS
-
-/* Ensure libmd symbols do not clash with libcrypto */
-#ifndef SHA384_Init
-#define        SHA384_Init             _libmd_SHA384_Init
-#endif
-#ifndef SHA384_Update
-#define        SHA384_Update           _libmd_SHA384_Update
-#endif
-#ifndef SHA384_Final
-#define        SHA384_Final            _libmd_SHA384_Final
-#endif
-#ifndef SHA384_End
-#define        SHA384_End              _libmd_SHA384_End
-#endif
-#ifndef SHA384_Fd
-#define        SHA384_Fd               _libmd_SHA384_Fd
-#endif
-#ifndef SHA384_FdChunk
-#define        SHA384_FdChunk          _libmd_SHA384_FdChunk
-#endif
-#ifndef SHA384_File
-#define        SHA384_File             _libmd_SHA384_File
-#endif
-#ifndef SHA384_FileChunk
-#define        SHA384_FileChunk        _libmd_SHA384_FileChunk
-#endif
-#ifndef SHA384_Data
-#define        SHA384_Data             _libmd_SHA384_Data
-#endif
-
-#ifndef SHA384_version
-#define        SHA384_version          _libmd_SHA384_version
-#endif
-
-void   SHA384_Init(SHA384_CTX *);
-void   SHA384_Update(SHA384_CTX *, const void *, size_t);
-void   SHA384_Final(unsigned char [__min_size(SHA384_DIGEST_LENGTH)],
-    SHA384_CTX *);
-#ifndef _KERNEL
-char   *SHA384_End(SHA384_CTX *, char *);
-char   *SHA384_Data(const void *, unsigned int, char *);
-char   *SHA384_Fd(int, char *);
-char   *SHA384_FdChunk(int, char *, off_t, off_t);
-char   *SHA384_File(const char *, char *);
-char   *SHA384_FileChunk(const char *, char *, off_t, off_t);
-#endif
-
-__END_DECLS
-
-#endif /* !_SHA384_H_ */
diff --git a/module/os/freebsd/spl/sha512.h b/module/os/freebsd/spl/sha512.h
deleted file mode 100644 (file)
index b6fb733..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _SHA512_H_
-#define        _SHA512_H_
-
-#ifndef _KERNEL
-#include <sys/types.h>
-#endif
-
-#define        SHA512_BLOCK_LENGTH             128
-#define        SHA512_DIGEST_LENGTH            64
-#define        SHA512_DIGEST_STRING_LENGTH     (SHA512_DIGEST_LENGTH * 2 + 1)
-
-typedef struct SHA512Context {
-       uint64_t state[8];
-       uint64_t count[2];
-       uint8_t buf[SHA512_BLOCK_LENGTH];
-} SHA512_CTX;
-
-__BEGIN_DECLS
-
-/* Ensure libmd symbols do not clash with libcrypto */
-#if 0
-#ifndef SHA512_Init
-#define        SHA512_Init             _libmd_SHA512_Init
-#endif
-#ifndef SHA512_Update
-#define        SHA512_Update           _libmd_SHA512_Update
-#endif
-#ifndef SHA512_Final
-#define        SHA512_Final            _libmd_SHA512_Final
-#endif
-#endif
-#ifndef SHA512_End
-#define        SHA512_End              _libmd_SHA512_End
-#endif
-#ifndef SHA512_Fd
-#define        SHA512_Fd               _libmd_SHA512_Fd
-#endif
-#ifndef SHA512_FdChunk
-#define        SHA512_FdChunk          _libmd_SHA512_FdChunk
-#endif
-#ifndef SHA512_File
-#define        SHA512_File             _libmd_SHA512_File
-#endif
-#ifndef SHA512_FileChunk
-#define        SHA512_FileChunk        _libmd_SHA512_FileChunk
-#endif
-#ifndef SHA512_Data
-#define        SHA512_Data             _libmd_SHA512_Data
-#endif
-
-#ifndef SHA512_Transform
-#define        SHA512_Transform        _libmd_SHA512_Transform
-#endif
-#ifndef SHA512_version
-#define        SHA512_version          _libmd_SHA512_version
-#endif
-
-void   SHA512_Init(SHA512_CTX *);
-void   SHA512_Update(SHA512_CTX *, const void *, size_t);
-void   SHA512_Final(unsigned char [__min_size(SHA512_DIGEST_LENGTH)],
-    SHA512_CTX *);
-#ifndef _KERNEL
-char   *SHA512_End(SHA512_CTX *, char *);
-char   *SHA512_Data(const void *, unsigned int, char *);
-char   *SHA512_Fd(int, char *);
-char   *SHA512_FdChunk(int, char *, off_t, off_t);
-char   *SHA512_File(const char *, char *);
-char   *SHA512_FileChunk(const char *, char *, off_t, off_t);
-#endif
-
-__END_DECLS
-
-#endif /* !_SHA512_H_ */
diff --git a/module/os/freebsd/spl/sha512c.c b/module/os/freebsd/spl/sha512c.c
deleted file mode 100644 (file)
index 254cc21..0000000
+++ /dev/null
@@ -1,508 +0,0 @@
-/*
- * Copyright 2005 Colin Percival
- * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/endian.h>
-#include <sys/types.h>
-
-#ifdef _KERNEL
-#include <sys/systm.h>
-#else
-#include <string.h>
-#endif
-
-#include "sha512.h"
-#include "sha512t.h"
-#include "sha384.h"
-
-#if BYTE_ORDER == BIG_ENDIAN
-
-/* Copy a vector of big-endian uint64_t into a vector of bytes */
-#define        be64enc_vect(dst, src, len)     \
-       memcpy((void *)dst, (const void *)src, (size_t)len)
-
-/* Copy a vector of bytes into a vector of big-endian uint64_t */
-#define        be64dec_vect(dst, src, len)     \
-       memcpy((void *)dst, (const void *)src, (size_t)len)
-
-#else /* BYTE_ORDER != BIG_ENDIAN */
-
-/*
- * Encode a length len/4 vector of (uint64_t) into a length len vector of
- * (unsigned char) in big-endian form.  Assumes len is a multiple of 8.
- */
-static void
-be64enc_vect(unsigned char *dst, const uint64_t *src, size_t len)
-{
-       size_t i;
-
-       for (i = 0; i < len / 8; i++)
-               be64enc(dst + i * 8, src[i]);
-}
-
-/*
- * Decode a big-endian length len vector of (unsigned char) into a length
- * len/4 vector of (uint64_t).  Assumes len is a multiple of 8.
- */
-static void
-be64dec_vect(uint64_t *dst, const unsigned char *src, size_t len)
-{
-       size_t i;
-
-       for (i = 0; i < len / 8; i++)
-               dst[i] = be64dec(src + i * 8);
-}
-
-#endif /* BYTE_ORDER != BIG_ENDIAN */
-
-/* SHA512 round constants. */
-static const uint64_t K[80] = {
-       0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
-       0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
-       0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
-       0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
-       0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
-       0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
-       0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
-       0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
-       0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
-       0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
-       0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
-       0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
-       0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
-       0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
-       0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
-       0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
-       0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
-       0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
-       0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
-       0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
-       0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
-       0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
-       0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
-       0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
-       0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
-       0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
-       0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
-       0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
-       0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
-       0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
-       0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
-       0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
-       0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
-       0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
-       0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
-       0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
-       0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
-       0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
-       0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
-       0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
-};
-
-/* Elementary functions used by SHA512 */
-#define        Ch(x, y, z)     ((x & (y ^ z)) ^ z)
-#define        Maj(x, y, z)    ((x & (y | z)) | (y & z))
-#define        SHR(x, n)       (x >> n)
-#define        ROTR(x, n)      ((x >> n) | (x << (64 - n)))
-#define        S0(x)           (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39))
-#define        S1(x)           (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41))
-#define        s0(x)           (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x, 7))
-#define        s1(x)           (ROTR(x, 19) ^ ROTR(x, 61) ^ SHR(x, 6))
-
-/* SHA512 round function */
-#define        RND(a, b, c, d, e, f, g, h, k)                  \
-       h += S1(e) + Ch(e, f, g) + k;                   \
-       d += h;                                         \
-       h += S0(a) + Maj(a, b, c);
-
-/* Adjusted round function for rotating state */
-#define        RNDr(S, W, i, ii)                       \
-       RND(S[(80 - i) % 8], S[(81 - i) % 8],   \
-           S[(82 - i) % 8], S[(83 - i) % 8],   \
-           S[(84 - i) % 8], S[(85 - i) % 8],   \
-           S[(86 - i) % 8], S[(87 - i) % 8],   \
-           W[i + ii] + K[i + ii])
-
-/* Message schedule computation */
-#define        MSCH(W, ii, i)                          \
-       W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] +   \
-               s0(W[i + ii + 1]) + W[i + ii]
-
-/*
- * SHA512 block compression function.  The 512-bit state is transformed via
- * the 512-bit input block to produce a new state.
- */
-static void
-SHA512_Transform(uint64_t *state,
-    const unsigned char block[SHA512_BLOCK_LENGTH])
-{
-       uint64_t W[80];
-       uint64_t S[8];
-       int i;
-
-       /* 1. Prepare the first part of the message schedule W. */
-       be64dec_vect(W, block, SHA512_BLOCK_LENGTH);
-
-       /* 2. Initialize working variables. */
-       memcpy(S, state, SHA512_DIGEST_LENGTH);
-
-       /* 3. Mix. */
-       for (i = 0; i < 80; i += 16) {
-               RNDr(S, W, 0, i);
-               RNDr(S, W, 1, i);
-               RNDr(S, W, 2, i);
-               RNDr(S, W, 3, i);
-               RNDr(S, W, 4, i);
-               RNDr(S, W, 5, i);
-               RNDr(S, W, 6, i);
-               RNDr(S, W, 7, i);
-               RNDr(S, W, 8, i);
-               RNDr(S, W, 9, i);
-               RNDr(S, W, 10, i);
-               RNDr(S, W, 11, i);
-               RNDr(S, W, 12, i);
-               RNDr(S, W, 13, i);
-               RNDr(S, W, 14, i);
-               RNDr(S, W, 15, i);
-
-               if (i == 64)
-                       break;
-               MSCH(W, 0, i);
-               MSCH(W, 1, i);
-               MSCH(W, 2, i);
-               MSCH(W, 3, i);
-               MSCH(W, 4, i);
-               MSCH(W, 5, i);
-               MSCH(W, 6, i);
-               MSCH(W, 7, i);
-               MSCH(W, 8, i);
-               MSCH(W, 9, i);
-               MSCH(W, 10, i);
-               MSCH(W, 11, i);
-               MSCH(W, 12, i);
-               MSCH(W, 13, i);
-               MSCH(W, 14, i);
-               MSCH(W, 15, i);
-       }
-
-       /* 4. Mix local working variables into global state */
-       for (i = 0; i < 8; i++)
-               state[i] += S[i];
-}
-
-static unsigned char PAD[SHA512_BLOCK_LENGTH] = {
-       0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
-/* Add padding and terminating bit-count. */
-static void
-SHA512_Pad(SHA512_CTX * ctx)
-{
-       size_t r;
-
-       /* Figure out how many bytes we have buffered. */
-       r = (ctx->count[1] >> 3) & 0x7f;
-
-       /* Pad to 112 mod 128, transforming if we finish a block en route. */
-       if (r < 112) {
-               /* Pad to 112 mod 128. */
-               memcpy(&ctx->buf[r], PAD, 112 - r);
-       } else {
-               /* Finish the current block and mix. */
-               memcpy(&ctx->buf[r], PAD, 128 - r);
-               SHA512_Transform(ctx->state, ctx->buf);
-
-               /* The start of the final block is all zeroes. */
-               memset(&ctx->buf[0], 0, 112);
-       }
-
-       /* Add the terminating bit-count. */
-       be64enc_vect(&ctx->buf[112], ctx->count, 16);
-
-       /* Mix in the final block. */
-       SHA512_Transform(ctx->state, ctx->buf);
-}
-
-/* SHA-512 initialization.  Begins a SHA-512 operation. */
-void
-SHA512_Init(SHA512_CTX * ctx)
-{
-
-       /* Zero bits processed so far */
-       ctx->count[0] = ctx->count[1] = 0;
-
-       /* Magic initialization constants */
-       ctx->state[0] = 0x6a09e667f3bcc908ULL;
-       ctx->state[1] = 0xbb67ae8584caa73bULL;
-       ctx->state[2] = 0x3c6ef372fe94f82bULL;
-       ctx->state[3] = 0xa54ff53a5f1d36f1ULL;
-       ctx->state[4] = 0x510e527fade682d1ULL;
-       ctx->state[5] = 0x9b05688c2b3e6c1fULL;
-       ctx->state[6] = 0x1f83d9abfb41bd6bULL;
-       ctx->state[7] = 0x5be0cd19137e2179ULL;
-}
-
-/* Add bytes into the hash */
-void
-SHA512_Update(SHA512_CTX * ctx, const void *in, size_t len)
-{
-       uint64_t bitlen[2];
-       uint64_t r;
-       const unsigned char *src = in;
-
-       /* Number of bytes left in the buffer from previous updates */
-       r = (ctx->count[1] >> 3) & 0x7f;
-
-       /* Convert the length into a number of bits */
-       bitlen[1] = ((uint64_t)len) << 3;
-       bitlen[0] = ((uint64_t)len) >> 61;
-
-       /* Update number of bits */
-       if ((ctx->count[1] += bitlen[1]) < bitlen[1])
-               ctx->count[0]++;
-       ctx->count[0] += bitlen[0];
-
-       /* Handle the case where we don't need to perform any transforms */
-       if (len < SHA512_BLOCK_LENGTH - r) {
-               memcpy(&ctx->buf[r], src, len);
-               return;
-       }
-
-       /* Finish the current block */
-       memcpy(&ctx->buf[r], src, SHA512_BLOCK_LENGTH - r);
-       SHA512_Transform(ctx->state, ctx->buf);
-       src += SHA512_BLOCK_LENGTH - r;
-       len -= SHA512_BLOCK_LENGTH - r;
-
-       /* Perform complete blocks */
-       while (len >= SHA512_BLOCK_LENGTH) {
-               SHA512_Transform(ctx->state, src);
-               src += SHA512_BLOCK_LENGTH;
-               len -= SHA512_BLOCK_LENGTH;
-       }
-
-       /* Copy left over data into buffer */
-       memcpy(ctx->buf, src, len);
-}
-
-/*
- * SHA-512 finalization.  Pads the input data, exports the hash value,
- * and clears the context state.
- */
-void
-SHA512_Final(unsigned char digest[static SHA512_DIGEST_LENGTH], SHA512_CTX *ctx)
-{
-
-       /* Add padding */
-       SHA512_Pad(ctx);
-
-       /* Write the hash */
-       be64enc_vect(digest, ctx->state, SHA512_DIGEST_LENGTH);
-
-       /* Clear the context state */
-       memset(ctx, 0, sizeof (*ctx));
-}
-
-/* SHA-512t: ******************************************************** */
-/*
- * the SHA512t transforms are identical to SHA512 so reuse the existing function
- */
-void
-SHA512_224_Init(SHA512_CTX * ctx)
-{
-
-       /* Zero bits processed so far */
-       ctx->count[0] = ctx->count[1] = 0;
-
-       /* Magic initialization constants */
-       ctx->state[0] = 0x8c3d37c819544da2ULL;
-       ctx->state[1] = 0x73e1996689dcd4d6ULL;
-       ctx->state[2] = 0x1dfab7ae32ff9c82ULL;
-       ctx->state[3] = 0x679dd514582f9fcfULL;
-       ctx->state[4] = 0x0f6d2b697bd44da8ULL;
-       ctx->state[5] = 0x77e36f7304c48942ULL;
-       ctx->state[6] = 0x3f9d85a86a1d36c8ULL;
-       ctx->state[7] = 0x1112e6ad91d692a1ULL;
-}
-
-void
-SHA512_224_Update(SHA512_CTX * ctx, const void *in, size_t len)
-{
-
-       SHA512_Update(ctx, in, len);
-}
-
-void
-SHA512_224_Final(unsigned char digest[static SHA512_224_DIGEST_LENGTH],
-    SHA512_CTX *ctx)
-{
-
-       /* Add padding */
-       SHA512_Pad(ctx);
-
-       /* Write the hash */
-       be64enc_vect(digest, ctx->state, SHA512_224_DIGEST_LENGTH);
-
-       /* Clear the context state */
-       memset(ctx, 0, sizeof (*ctx));
-}
-
-void
-SHA512_256_Init(SHA512_CTX * ctx)
-{
-
-       /* Zero bits processed so far */
-       ctx->count[0] = ctx->count[1] = 0;
-
-       /* Magic initialization constants */
-       ctx->state[0] = 0x22312194fc2bf72cULL;
-       ctx->state[1] = 0x9f555fa3c84c64c2ULL;
-       ctx->state[2] = 0x2393b86b6f53b151ULL;
-       ctx->state[3] = 0x963877195940eabdULL;
-       ctx->state[4] = 0x96283ee2a88effe3ULL;
-       ctx->state[5] = 0xbe5e1e2553863992ULL;
-       ctx->state[6] = 0x2b0199fc2c85b8aaULL;
-       ctx->state[7] = 0x0eb72ddc81c52ca2ULL;
-}
-
-void
-SHA512_256_Update(SHA512_CTX * ctx, const void *in, size_t len)
-{
-
-       SHA512_Update(ctx, in, len);
-}
-
-void
-SHA512_256_Final(unsigned char digest[static SHA512_256_DIGEST_LENGTH],
-    SHA512_CTX * ctx)
-{
-
-       /* Add padding */
-       SHA512_Pad(ctx);
-
-       /* Write the hash */
-       be64enc_vect(digest, ctx->state, SHA512_256_DIGEST_LENGTH);
-
-       /* Clear the context state */
-       memset(ctx, 0, sizeof (*ctx));
-}
-
-/* ** SHA-384: ******************************************************** */
-/*
- * the SHA384 and SHA512 transforms are identical, so SHA384 is skipped
- */
-
-/* SHA-384 initialization.  Begins a SHA-384 operation. */
-void
-SHA384_Init(SHA384_CTX * ctx)
-{
-
-       /* Zero bits processed so far */
-       ctx->count[0] = ctx->count[1] = 0;
-
-       /* Magic initialization constants */
-       ctx->state[0] = 0xcbbb9d5dc1059ed8ULL;
-       ctx->state[1] = 0x629a292a367cd507ULL;
-       ctx->state[2] = 0x9159015a3070dd17ULL;
-       ctx->state[3] = 0x152fecd8f70e5939ULL;
-       ctx->state[4] = 0x67332667ffc00b31ULL;
-       ctx->state[5] = 0x8eb44a8768581511ULL;
-       ctx->state[6] = 0xdb0c2e0d64f98fa7ULL;
-       ctx->state[7] = 0x47b5481dbefa4fa4ULL;
-}
-
-/* Add bytes into the SHA-384 hash */
-void
-SHA384_Update(SHA384_CTX * ctx, const void *in, size_t len)
-{
-
-       SHA512_Update((SHA512_CTX *)ctx, in, len);
-}
-
-/*
- * SHA-384 finalization.  Pads the input data, exports the hash value,
- * and clears the context state.
- */
-void
-SHA384_Final(unsigned char digest[static SHA384_DIGEST_LENGTH], SHA384_CTX *ctx)
-{
-
-       /* Add padding */
-       SHA512_Pad((SHA512_CTX *)ctx);
-
-       /* Write the hash */
-       be64enc_vect(digest, ctx->state, SHA384_DIGEST_LENGTH);
-
-       /* Clear the context state */
-       memset(ctx, 0, sizeof (*ctx));
-}
-
-#if 0
-/*
- * When building libmd, provide weak references. Note: this is not
- * activated in the context of compiling these sources for internal
- * use in libcrypt.
- */
-#undef SHA512_Init
-__weak_reference(_libmd_SHA512_Init, SHA512_Init);
-#undef SHA512_Update
-__weak_reference(_libmd_SHA512_Update, SHA512_Update);
-#undef SHA512_Final
-__weak_reference(_libmd_SHA512_Final, SHA512_Final);
-#undef SHA512_Transform
-__weak_reference(_libmd_SHA512_Transform, SHA512_Transform);
-
-#undef SHA512_224_Init
-__weak_reference(_libmd_SHA512_224_Init, SHA512_224_Init);
-#undef SHA512_224_Update
-__weak_reference(_libmd_SHA512_224_Update, SHA512_224_Update);
-#undef SHA512_224_Final
-__weak_reference(_libmd_SHA512_224_Final, SHA512_224_Final);
-
-#undef SHA512_256_Init
-__weak_reference(_libmd_SHA512_256_Init, SHA512_256_Init);
-#undef SHA512_256_Update
-__weak_reference(_libmd_SHA512_256_Update, SHA512_256_Update);
-#undef SHA512_256_Final
-__weak_reference(_libmd_SHA512_256_Final, SHA512_256_Final);
-
-#undef SHA384_Init
-__weak_reference(_libmd_SHA384_Init, SHA384_Init);
-#undef SHA384_Update
-__weak_reference(_libmd_SHA384_Update, SHA384_Update);
-#undef SHA384_Final
-__weak_reference(_libmd_SHA384_Final, SHA384_Final);
-#endif
diff --git a/module/os/freebsd/spl/sha512t.h b/module/os/freebsd/spl/sha512t.h
deleted file mode 100644 (file)
index 703867f..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2015 Allan Jude <allanjude@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _SHA512T_H_
-#define        _SHA512T_H_
-
-#include "sha512.h"
-
-#ifndef _KERNEL
-#include <sys/types.h>
-#endif
-
-#define        SHA512_224_DIGEST_LENGTH        28
-#define        SHA512_224_DIGEST_STRING_LENGTH (SHA512_224_DIGEST_LENGTH * 2 + 1)
-#define        SHA512_256_DIGEST_LENGTH        32
-#define        SHA512_256_DIGEST_STRING_LENGTH (SHA512_256_DIGEST_LENGTH * 2 + 1)
-
-__BEGIN_DECLS
-
-/* Ensure libmd symbols do not clash with libcrypto */
-#ifndef SHA512_224_Init
-#define        SHA512_224_Init         _libmd_SHA512_224_Init
-#endif
-#ifndef SHA512_224_Update
-#define        SHA512_224_Update       _libmd_SHA512_224_Update
-#endif
-#ifndef SHA512_224_Final
-#define        SHA512_224_Final        _libmd_SHA512_224_Final
-#endif
-#ifndef SHA512_224_End
-#define        SHA512_224_End          _libmd_SHA512_224_End
-#endif
-#ifndef SHA512_224_Fd
-#define        SHA512_224_Fd           _libmd_SHA512_224_Fd
-#endif
-#ifndef SHA512_224_FdChunk
-#define        SHA512_224_FdChunk      _libmd_SHA512_224_FdChunk
-#endif
-#ifndef SHA512_224_File
-#define        SHA512_224_File         _libmd_SHA512_224_File
-#endif
-#ifndef SHA512_224_FileChunk
-#define        SHA512_224_FileChunk    _libmd_SHA512_224_FileChunk
-#endif
-#ifndef SHA512_224_Data
-#define        SHA512_224_Data         _libmd_SHA512_224_Data
-#endif
-
-#ifndef SHA512_224_Transform
-#define        SHA512_224_Transform    _libmd_SHA512_224_Transform
-#endif
-#ifndef SHA512_224_version
-#define        SHA512_224_version      _libmd_SHA512_224_version
-#endif
-
-#ifndef SHA512_256_Init
-#define        SHA512_256_Init         _libmd_SHA512_256_Init
-#endif
-#ifndef SHA512_256_Update
-#define        SHA512_256_Update       _libmd_SHA512_256_Update
-#endif
-#ifndef SHA512_256_Final
-#define        SHA512_256_Final        _libmd_SHA512_256_Final
-#endif
-#ifndef SHA512_256_End
-#define        SHA512_256_End          _libmd_SHA512_256_End
-#endif
-#ifndef SHA512_256_Fd
-#define        SHA512_256_Fd           _libmd_SHA512_256_Fd
-#endif
-#ifndef SHA512_256_FdChunk
-#define        SHA512_256_FdChunk      _libmd_SHA512_256_FdChunk
-#endif
-#ifndef SHA512_256_File
-#define        SHA512_256_File         _libmd_SHA512_256_File
-#endif
-#ifndef SHA512_256_FileChunk
-#define        SHA512_256_FileChunk    _libmd_SHA512_256_FileChunk
-#endif
-#ifndef SHA512_256_Data
-#define        SHA512_256_Data         _libmd_SHA512_256_Data
-#endif
-
-#ifndef SHA512_256_Transform
-#define        SHA512_256_Transform    _libmd_SHA512_256_Transform
-#endif
-#ifndef SHA512_256_version
-#define        SHA512_256_version      _libmd_SHA512_256_version
-#endif
-
-void   SHA512_224_Init(SHA512_CTX *);
-void   SHA512_224_Update(SHA512_CTX *, const void *, size_t);
-void   SHA512_224_Final(unsigned char [__min_size(SHA512_224_DIGEST_LENGTH)],
-    SHA512_CTX *);
-#ifndef _KERNEL
-char   *SHA512_224_End(SHA512_CTX *, char *);
-char   *SHA512_224_Data(const void *, unsigned int, char *);
-char   *SHA512_224_Fd(int, char *);
-char   *SHA512_224_FdChunk(int, char *, off_t, off_t);
-char   *SHA512_224_File(const char *, char *);
-char   *SHA512_224_FileChunk(const char *, char *, off_t, off_t);
-#endif
-void   SHA512_256_Init(SHA512_CTX *);
-void   SHA512_256_Update(SHA512_CTX *, const void *, size_t);
-void   SHA512_256_Final(unsigned char [__min_size(SHA512_256_DIGEST_LENGTH)],
-    SHA512_CTX *);
-#ifndef _KERNEL
-char   *SHA512_256_End(SHA512_CTX *, char *);
-char   *SHA512_256_Data(const void *, unsigned int, char *);
-char   *SHA512_256_Fd(int, char *);
-char   *SHA512_256_FdChunk(int, char *, off_t, off_t);
-char   *SHA512_256_File(const char *, char *);
-char   *SHA512_256_FileChunk(const char *, char *, off_t, off_t);
-#endif
-
-__END_DECLS
-
-#endif /* !_SHA512T_H_ */