]> git.proxmox.com Git - mirror_zfs.git/commitdiff
OpenZFS 4185 - add new cryptographic checksums to ZFS: SHA-512, Skein, Edon-R
authorTony Hutter <hutter2@llnl.gov>
Wed, 15 Jun 2016 22:47:05 +0000 (15:47 -0700)
committerTony Hutter <hutter2@llnl.gov>
Mon, 3 Oct 2016 21:51:15 +0000 (14:51 -0700)
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Garrett D'Amore <garrett@damore.org>
Ported by: Tony Hutter <hutter2@llnl.gov>

OpenZFS-issue: https://www.illumos.org/issues/4185
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/45818ee

Porting Notes:
This code is ported on top of the Illumos Crypto Framework code:

    https://github.com/zfsonlinux/zfs/pull/4329/commits/b5e030c8dbb9cd393d313571dee4756fbba8c22d

The list of porting changes includes:

- Copied module/icp/include/sha2/sha2.h directly from illumos

- Removed from module/icp/algs/sha2/sha2.c:
#pragma inline(SHA256Init, SHA384Init, SHA512Init)

- Added 'ctx' to lib/libzfs/libzfs_sendrecv.c:zio_checksum_SHA256() since
  it now takes in an extra parameter.

- Added CTASSERT() to assert.h from for module/zfs/edonr_zfs.c

- Added skein & edonr to libicp/Makefile.am

- Added sha512.S.  It was generated from sha512-x86_64.pl in Illumos.

- Updated ztest.c with new fletcher_4_*() args; used NULL for new CTX argument.

- In icp/algs/edonr/edonr_byteorder.h, Removed the #if defined(__linux) section
  to not #include the non-existant endian.h.

- In skein_test.c, renane NULL to 0 in "no test vector" array entries to get
  around a compiler warning.

- Fixup test files:
- Rename <sys/varargs.h> -> <varargs.h>, <strings.h> -> <string.h>,
- Remove <note.h> and define NOTE() as NOP.
- Define u_longlong_t
- Rename "#!/usr/bin/ksh" -> "#!/bin/ksh -p"
- Rename NULL to 0 in "no test vector" array entries to get around a
  compiler warning.
- Remove "for isa in $($ISAINFO); do" stuff
- Add/update Makefiles
- Add some userspace headers like stdio.h/stdlib.h in places of
  sys/types.h.

- EXPORT_SYMBOL *_Init/*_Update/*_Final... routines in ICP modules.

- Update scripts/zfs2zol-patch.sed

- include <sys/sha2.h> in sha2_impl.h

- Add sha2.h to include/sys/Makefile.am

- Add skein and edonr dirs to icp Makefile

- Add new checksums to zpool_get.cfg

- Move checksum switch block from zfs_secpolicy_setprop() to
  zfs_check_settable()

- Fix -Wuninitialized error in edonr_byteorder.h on PPC

- Fix stack frame size errors on ARM32
   - Don't unroll loops in Skein on 32-bit to save stack space
   - Add memory barriers in sha2.c on 32-bit to save stack space

- Add filetest_001_pos.ksh checksum sanity test

- Add option to write psudorandom data in file_write utility

79 files changed:
Makefile.am
cmd/ztest/ztest.c
configure.ac
include/sys/Makefile.am
include/sys/crypto/icp.h
include/sys/dmu.h
include/sys/edonr.h [new file with mode: 0644]
include/sys/sha2.h [new file with mode: 0644]
include/sys/skein.h [new file with mode: 0644]
include/sys/spa.h
include/sys/spa_impl.h
include/sys/zio.h
include/sys/zio_checksum.h
include/zfeature_common.h
include/zfs_fletcher.h
lib/libicp/Makefile.am
lib/libspl/include/assert.h
lib/libzfs/libzfs_dataset.c
lib/libzfs/libzfs_sendrecv.c
lib/libzpool/Makefile.am
man/man5/zpool-features.5
man/man8/zfs.8
module/icp/Makefile.in
module/icp/algs/edonr/edonr.c [new file with mode: 0644]
module/icp/algs/edonr/edonr_byteorder.h [new file with mode: 0644]
module/icp/algs/sha2/sha2.c
module/icp/algs/skein/THIRDPARTYLICENSE [new file with mode: 0644]
module/icp/algs/skein/THIRDPARTYLICENSE.descrip [new file with mode: 0644]
module/icp/algs/skein/skein.c [new file with mode: 0644]
module/icp/algs/skein/skein_block.c [new file with mode: 0644]
module/icp/algs/skein/skein_impl.h [new file with mode: 0644]
module/icp/algs/skein/skein_iv.c [new file with mode: 0644]
module/icp/algs/skein/skein_port.h [new file with mode: 0644]
module/icp/asm-x86_64/sha2/sha256_impl.S
module/icp/asm-x86_64/sha2/sha512_impl.S [new file with mode: 0644]
module/icp/illumos-crypto.c
module/icp/include/sha2/sha2.h [deleted file]
module/icp/include/sha2/sha2_impl.h
module/icp/io/edonr_mod.c [new file with mode: 0644]
module/icp/io/sha2_mod.c
module/icp/io/skein_mod.c [new file with mode: 0644]
module/zcommon/zfs_fletcher.c
module/zcommon/zfs_prop.c
module/zfs/Makefile.in
module/zfs/arc.c
module/zfs/dbuf.c
module/zfs/ddt.c
module/zfs/dmu.c
module/zfs/dmu_send.c
module/zfs/dsl_dataset.c
module/zfs/edonr_zfs.c [new file with mode: 0644]
module/zfs/sha256.c
module/zfs/skein_zfs.c [new file with mode: 0644]
module/zfs/spa.c
module/zfs/spa_misc.c
module/zfs/vdev_raidz.c
module/zfs/zfeature_common.c
module/zfs/zfs_ioctl.c
module/zfs/zio.c
module/zfs/zio_checksum.c
scripts/zfs2zol-patch.sed
tests/runfiles/linux.run
tests/zfs-tests/cmd/file_write/file_write.c
tests/zfs-tests/include/libtest.shlib
tests/zfs-tests/include/properties.shlib
tests/zfs-tests/tests/functional/Makefile.am
tests/zfs-tests/tests/functional/checksum/.gitignore [new file with mode: 0644]
tests/zfs-tests/tests/functional/checksum/Makefile.am [new file with mode: 0644]
tests/zfs-tests/tests/functional/checksum/cleanup.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/checksum/edonr_test.c [new file with mode: 0644]
tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/checksum/run_edonr_test.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/checksum/run_sha2_test.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/checksum/run_skein_test.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/checksum/setup.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/checksum/sha2_test.c [new file with mode: 0644]
tests/zfs-tests/tests/functional/checksum/skein_test.c [new file with mode: 0644]
tests/zfs-tests/tests/functional/cli_root/zfs_set/checksum_001_pos.ksh
tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg

index fe4285b3fd6231d306bd177d686d60f3614dc828..abc98e4ed5e78caf9ce8cc71156718e43c2248b6 100644 (file)
@@ -6,7 +6,7 @@ include config/tgz.am
 
 SUBDIRS = include rpm
 if CONFIG_USER
-SUBDIRS += udev etc man scripts tests lib cmd contrib
+SUBDIRS += udev etc man scripts lib tests cmd contrib
 endif
 if CONFIG_KERNEL
 SUBDIRS += module
index 1b77b6ceed971eda572c848076f39860a44e73a5..912a7f70ed0a1007ba1af9e9014f193f065d0fa2 100644 (file)
@@ -5654,16 +5654,16 @@ ztest_fletcher(ztest_ds_t *zd, uint64_t id)
                        *ptr = ztest_random(UINT_MAX);
 
                VERIFY0(fletcher_4_impl_set("scalar"));
-               fletcher_4_native(buf, size, &zc_ref);
-               fletcher_4_byteswap(buf, size, &zc_ref_byteswap);
+               fletcher_4_native(buf, size, NULL, &zc_ref);
+               fletcher_4_byteswap(buf, size, NULL, &zc_ref_byteswap);
 
                VERIFY0(fletcher_4_impl_set("cycle"));
                while (run_count-- > 0) {
                        zio_cksum_t zc;
                        zio_cksum_t zc_byteswap;
 
-                       fletcher_4_byteswap(buf, size, &zc_byteswap);
-                       fletcher_4_native(buf, size, &zc);
+                       fletcher_4_byteswap(buf, size, NULL, &zc_byteswap);
+                       fletcher_4_native(buf, size, NULL, &zc);
 
                        VERIFY0(bcmp(&zc, &zc_ref, sizeof (zc)));
                        VERIFY0(bcmp(&zc_byteswap, &zc_ref_byteswap,
index c7685550dff8ef929dd0af0d8ad6d01b6ed15c5c..edcf299585c51a7ba6a8ad239a3719df34998d50 100644 (file)
@@ -49,6 +49,7 @@ AC_PROG_INSTALL
 AC_PROG_CC
 AC_PROG_LIBTOOL
 AM_PROG_AS
+AM_PROG_CC_C_O
 
 ZFS_AC_LICENSE
 ZFS_AC_PACKAGE
@@ -178,6 +179,7 @@ AC_CONFIG_FILES([
        tests/zfs-tests/tests/functional/cache/Makefile
        tests/zfs-tests/tests/functional/cachefile/Makefile
        tests/zfs-tests/tests/functional/casenorm/Makefile
+       tests/zfs-tests/tests/functional/checksum/Makefile
        tests/zfs-tests/tests/functional/clean_mirror/Makefile
        tests/zfs-tests/tests/functional/cli_root/Makefile
        tests/zfs-tests/tests/functional/cli_root/zdb/Makefile
index 40cd0597c9b9178ce0ef701db803a42a5c070987..96d77c7b30779449fddaef077e3aec27fd708fc5 100644 (file)
@@ -31,6 +31,7 @@ COMMON_H = \
        $(top_srcdir)/include/sys/dsl_scan.h \
        $(top_srcdir)/include/sys/dsl_synctask.h \
        $(top_srcdir)/include/sys/dsl_userhold.h \
+       $(top_srcdir)/include/sys/edonr.h \
        $(top_srcdir)/include/sys/efi_partition.h \
        $(top_srcdir)/include/sys/metaslab.h \
        $(top_srcdir)/include/sys/metaslab_impl.h \
@@ -46,6 +47,8 @@ COMMON_H = \
        $(top_srcdir)/include/sys/sa.h \
        $(top_srcdir)/include/sys/sa_impl.h \
        $(top_srcdir)/include/sys/sdt.h \
+       $(top_srcdir)/include/sys/sha2.h \
+       $(top_srcdir)/include/sys/skein.h \
        $(top_srcdir)/include/sys/spa_boot.h \
        $(top_srcdir)/include/sys/space_map.h \
        $(top_srcdir)/include/sys/space_reftree.h \
index c7bb78e836233ff4f40c1c96700147443713eb6a..d8948e022a235bd735a68c27aa61b3b7fff72428 100644 (file)
 int aes_mod_init(void);
 int aes_mod_fini(void);
 
+int edonr_mod_init(void);
+int edonr_mod_fini(void);
+
 int sha1_mod_init(void);
 int sha1_mod_fini(void);
 
 int sha2_mod_init(void);
 int sha2_mod_fini(void);
 
+int skein_mod_init(void);
+int skein_mod_fini(void);
+
 int icp_init(void);
 void icp_fini(void);
 
index 4efab7c723cdaaaa2d54019c5b7252a7a382b970..b67acb52c932e6ba053b18f1be2cda9e9a8b2fbf 100644 (file)
@@ -25,6 +25,7 @@
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  * Copyright 2014 HybridCluster. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
@@ -317,6 +318,7 @@ typedef struct dmu_buf {
 #define        DMU_POOL_FREE_BPOBJ             "free_bpobj"
 #define        DMU_POOL_BPTREE_OBJ             "bptree_obj"
 #define        DMU_POOL_EMPTY_BPOBJ            "empty_bpobj"
+#define        DMU_POOL_CHECKSUM_SALT          "org.illumos:checksum_salt"
 #define        DMU_POOL_VDEV_ZAP_MAP           "com.delphix:vdev_zap_map"
 
 /*
diff --git a/include/sys/edonr.h b/include/sys/edonr.h
new file mode 100644 (file)
index 0000000..79b7cd8
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * IDI,NTNU
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen@ntnu.no>
+ *
+ * Tweaked Edon-R implementation for SUPERCOP, based on NIST API.
+ *
+ * $Id: edonr.h 517 2013-02-17 20:34:39Z joern $
+ */
+/*
+ * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
+ */
+
+#ifndef        _SYS_EDONR_H_
+#define        _SYS_EDONR_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef  _KERNEL
+#include <sys/types.h>
+#else
+#include <stdint.h> /* uint32_t... */
+#include <stdlib.h> /* size_t ... */
+#endif
+
+/*
+ * EdonR allows to call EdonRUpdate() consecutively only if the total length
+ * of stored unprocessed data and the new supplied data is less than or equal
+ * to the BLOCK_SIZE on which the compression functions operates.
+ * Otherwise an assertion failure is invoked.
+ */
+
+/* Specific algorithm definitions */
+#define        EdonR224_DIGEST_SIZE    28
+#define        EdonR224_BLOCK_SIZE     64
+#define        EdonR256_DIGEST_SIZE    32
+#define        EdonR256_BLOCK_SIZE     64
+#define        EdonR384_DIGEST_SIZE    48
+#define        EdonR384_BLOCK_SIZE     128
+#define        EdonR512_DIGEST_SIZE    64
+#define        EdonR512_BLOCK_SIZE     128
+
+#define        EdonR256_BLOCK_BITSIZE  512
+#define        EdonR512_BLOCK_BITSIZE  1024
+
+typedef struct {
+       uint32_t DoublePipe[16];
+       uint8_t LastPart[EdonR256_BLOCK_SIZE * 2];
+} EdonRData256;
+typedef struct {
+       uint64_t DoublePipe[16];
+       uint8_t LastPart[EdonR512_BLOCK_SIZE * 2];
+} EdonRData512;
+
+typedef struct {
+       size_t hashbitlen;
+
+       /* + algorithm specific parameters */
+       int unprocessed_bits;
+       uint64_t bits_processed;
+       union {
+               EdonRData256 p256[1];
+               EdonRData512 p512[1];
+       } pipe[1];
+} EdonRState;
+
+void EdonRInit(EdonRState *state, size_t hashbitlen);
+void EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen);
+void EdonRFinal(EdonRState *state, uint8_t *hashval);
+void EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen,
+    uint8_t *hashval);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_EDONR_H_ */
diff --git a/include/sys/sha2.h b/include/sys/sha2.h
new file mode 100644 (file)
index 0000000..9039835
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
+
+#ifndef _SYS_SHA2_H
+#define        _SYS_SHA2_H
+
+#ifdef  _KERNEL
+#include <sys/types.h>         /* for uint_* */
+#else
+#include <stdint.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define        SHA2_HMAC_MIN_KEY_LEN   1       /* SHA2-HMAC min key length in bytes */
+#define        SHA2_HMAC_MAX_KEY_LEN   INT_MAX /* SHA2-HMAC max key length in bytes */
+
+#define        SHA256_DIGEST_LENGTH    32      /* SHA256 digest length in bytes */
+#define        SHA384_DIGEST_LENGTH    48      /* SHA384 digest length in bytes */
+#define        SHA512_DIGEST_LENGTH    64      /* SHA512 digest length in bytes */
+
+/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
+#define        SHA512_224_DIGEST_LENGTH        28      /* SHA512/224 digest length */
+#define        SHA512_256_DIGEST_LENGTH        32      /* SHA512/256 digest length */
+
+#define        SHA256_HMAC_BLOCK_SIZE  64      /* SHA256-HMAC block size */
+#define        SHA512_HMAC_BLOCK_SIZE  128     /* SHA512-HMAC block size */
+
+#define        SHA256                  0
+#define        SHA256_HMAC             1
+#define        SHA256_HMAC_GEN         2
+#define        SHA384                  3
+#define        SHA384_HMAC             4
+#define        SHA384_HMAC_GEN         5
+#define        SHA512                  6
+#define        SHA512_HMAC             7
+#define        SHA512_HMAC_GEN         8
+#define        SHA512_224              9
+#define        SHA512_256              10
+
+/*
+ * SHA2 context.
+ * The contents of this structure are a private interface between the
+ * Init/Update/Final calls of the functions defined below.
+ * Callers must never attempt to read or write any of the fields
+ * in this structure directly.
+ */
+typedef struct         {
+       uint32_t algotype;              /* Algorithm Type */
+
+       /* state (ABCDEFGH) */
+       union {
+               uint32_t s32[8];        /* for SHA256 */
+               uint64_t s64[8];        /* for SHA384/512 */
+       } state;
+       /* number of bits */
+       union {
+               uint32_t c32[2];        /* for SHA256 , modulo 2^64 */
+               uint64_t c64[2];        /* for SHA384/512, modulo 2^128 */
+       } count;
+       union {
+               uint8_t         buf8[128];      /* undigested input */
+               uint32_t        buf32[32];      /* realigned input */
+               uint64_t        buf64[16];      /* realigned input */
+       } buf_un;
+} SHA2_CTX;
+
+typedef SHA2_CTX SHA256_CTX;
+typedef SHA2_CTX SHA384_CTX;
+typedef SHA2_CTX SHA512_CTX;
+
+extern void SHA2Init(uint64_t mech, SHA2_CTX *);
+
+extern void SHA2Update(SHA2_CTX *, const void *, size_t);
+
+extern void SHA2Final(void *, SHA2_CTX *);
+
+extern void SHA256Init(SHA256_CTX *);
+
+extern void SHA256Update(SHA256_CTX *, const void *, size_t);
+
+extern void SHA256Final(void *, SHA256_CTX *);
+
+extern void SHA384Init(SHA384_CTX *);
+
+extern void SHA384Update(SHA384_CTX *, const void *, size_t);
+
+extern void SHA384Final(void *, SHA384_CTX *);
+
+extern void SHA512Init(SHA512_CTX *);
+
+extern void SHA512Update(SHA512_CTX *, const void *, size_t);
+
+extern void SHA512Final(void *, SHA512_CTX *);
+
+#ifdef _SHA2_IMPL
+/*
+ * The following types/functions are all private to the implementation
+ * of the SHA2 functions and must not be used by consumers of the interface
+ */
+
+/*
+ * List of support mechanisms in this module.
+ *
+ * It is important to note that in the module, division or modulus calculations
+ * are used on the enumerated type to determine which mechanism is being used;
+ * therefore, changing the order or additional mechanisms should be done
+ * carefully
+ */
+typedef enum sha2_mech_type {
+       SHA256_MECH_INFO_TYPE,          /* SUN_CKM_SHA256 */
+       SHA256_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA256_HMAC */
+       SHA256_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA256_HMAC_GENERAL */
+       SHA384_MECH_INFO_TYPE,          /* SUN_CKM_SHA384 */
+       SHA384_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA384_HMAC */
+       SHA384_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA384_HMAC_GENERAL */
+       SHA512_MECH_INFO_TYPE,          /* SUN_CKM_SHA512 */
+       SHA512_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA512_HMAC */
+       SHA512_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA512_HMAC_GENERAL */
+       SHA512_224_MECH_INFO_TYPE,      /* SUN_CKM_SHA512_224 */
+       SHA512_256_MECH_INFO_TYPE       /* SUN_CKM_SHA512_256 */
+} sha2_mech_type_t;
+
+#endif /* _SHA2_IMPL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SHA2_H */
diff --git a/include/sys/skein.h b/include/sys/skein.h
new file mode 100644 (file)
index 0000000..2f649d6
--- /dev/null
@@ -0,0 +1,183 @@
+/*
+ * Interface declarations for Skein hashing.
+ * Source code author: Doug Whiting, 2008.
+ * This algorithm and source code is released to the public domain.
+ *
+ * The following compile-time switches may be defined to control some
+ * tradeoffs between speed, code size, error checking, and security.
+ *
+ * The "default" note explains what happens when the switch is not defined.
+ *
+ *  SKEIN_DEBUG            -- make callouts from inside Skein code
+ *                            to examine/display intermediate values.
+ *                            [default: no callouts (no overhead)]
+ *
+ *  SKEIN_ERR_CHECK        -- how error checking is handled inside Skein
+ *                            code. If not defined, most error checking
+ *                            is disabled (for performance). Otherwise,
+ *                            the switch value is interpreted as:
+ *                                0: use assert()      to flag errors
+ *                                1: return SKEIN_FAIL to flag errors
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+#ifndef        _SYS_SKEIN_H_
+#define        _SYS_SKEIN_H_
+
+#ifdef  _KERNEL
+#include <sys/types.h>         /* get size_t definition */
+#else
+#include <stdint.h>
+#include <stdlib.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+       SKEIN_SUCCESS = 0,      /* return codes from Skein calls */
+       SKEIN_FAIL = 1,
+       SKEIN_BAD_HASHLEN = 2
+};
+
+#define        SKEIN_MODIFIER_WORDS    (2)     /* number of modifier (tweak) words */
+
+#define        SKEIN_256_STATE_WORDS   (4)
+#define        SKEIN_512_STATE_WORDS   (8)
+#define        SKEIN1024_STATE_WORDS   (16)
+#define        SKEIN_MAX_STATE_WORDS   (16)
+
+#define        SKEIN_256_STATE_BYTES   (8 * SKEIN_256_STATE_WORDS)
+#define        SKEIN_512_STATE_BYTES   (8 * SKEIN_512_STATE_WORDS)
+#define        SKEIN1024_STATE_BYTES   (8 * SKEIN1024_STATE_WORDS)
+
+#define        SKEIN_256_STATE_BITS    (64 * SKEIN_256_STATE_WORDS)
+#define        SKEIN_512_STATE_BITS    (64 * SKEIN_512_STATE_WORDS)
+#define        SKEIN1024_STATE_BITS    (64 * SKEIN1024_STATE_WORDS)
+
+#define        SKEIN_256_BLOCK_BYTES   (8 * SKEIN_256_STATE_WORDS)
+#define        SKEIN_512_BLOCK_BYTES   (8 * SKEIN_512_STATE_WORDS)
+#define        SKEIN1024_BLOCK_BYTES   (8 * SKEIN1024_STATE_WORDS)
+
+typedef struct {
+       size_t hashBitLen;      /* size of hash result, in bits */
+       size_t bCnt;            /* current byte count in buffer b[] */
+       /* tweak words: T[0]=byte cnt, T[1]=flags */
+       uint64_t T[SKEIN_MODIFIER_WORDS];
+} Skein_Ctxt_Hdr_t;
+
+typedef struct {               /*  256-bit Skein hash context structure */
+       Skein_Ctxt_Hdr_t h;     /* common header context variables */
+       uint64_t X[SKEIN_256_STATE_WORDS];      /* chaining variables */
+       /* partial block buffer (8-byte aligned) */
+       uint8_t b[SKEIN_256_BLOCK_BYTES];
+} Skein_256_Ctxt_t;
+
+typedef struct {               /*  512-bit Skein hash context structure */
+       Skein_Ctxt_Hdr_t h;     /* common header context variables */
+       uint64_t X[SKEIN_512_STATE_WORDS];      /* chaining variables */
+       /* partial block buffer (8-byte aligned) */
+       uint8_t b[SKEIN_512_BLOCK_BYTES];
+} Skein_512_Ctxt_t;
+
+typedef struct {               /* 1024-bit Skein hash context structure */
+       Skein_Ctxt_Hdr_t h;     /* common header context variables */
+       uint64_t X[SKEIN1024_STATE_WORDS];      /* chaining variables */
+       /* partial block buffer (8-byte aligned) */
+       uint8_t b[SKEIN1024_BLOCK_BYTES];
+} Skein1024_Ctxt_t;
+
+/*   Skein APIs for (incremental) "straight hashing" */
+int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen);
+int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen);
+int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen);
+
+int Skein_256_Update(Skein_256_Ctxt_t *ctx, const uint8_t *msg,
+    size_t msgByteCnt);
+int Skein_512_Update(Skein_512_Ctxt_t *ctx, const uint8_t *msg,
+    size_t msgByteCnt);
+int Skein1024_Update(Skein1024_Ctxt_t *ctx, const uint8_t *msg,
+    size_t msgByteCnt);
+
+int Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal);
+
+/*
+ * Skein APIs for "extended" initialization: MAC keys, tree hashing.
+ * After an InitExt() call, just use Update/Final calls as with Init().
+ *
+ * Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes.
+ *          When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL,
+ *              the results of InitExt() are identical to calling Init().
+ *          The function Init() may be called once to "precompute" the IV for
+ *              a given hashBitLen value, then by saving a copy of the context
+ *              the IV computation may be avoided in later calls.
+ *          Similarly, the function InitExt() may be called once per MAC key
+ *              to precompute the MAC IV, then a copy of the context saved and
+ *              reused for each new MAC computation.
+ */
+int Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen,
+    uint64_t treeInfo, const uint8_t *key, size_t keyBytes);
+int Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen,
+    uint64_t treeInfo, const uint8_t *key, size_t keyBytes);
+int Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen,
+    uint64_t treeInfo, const uint8_t *key, size_t keyBytes);
+
+/*
+ * Skein APIs for MAC and tree hash:
+ *     Final_Pad: pad, do final block, but no OUTPUT type
+ *     Output:    do just the output stage
+ */
+int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, uint8_t *hashVal);
+
+#ifndef        SKEIN_TREE_HASH
+#define        SKEIN_TREE_HASH (1)
+#endif
+#if    SKEIN_TREE_HASH
+int Skein_256_Output(Skein_256_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein_512_Output(Skein_512_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein1024_Output(Skein1024_Ctxt_t *ctx, uint8_t *hashVal);
+#endif
+
+/*
+ * When you initialize a Skein KCF hashing method you can pass this param
+ * structure in cm_param to fine-tune the algorithm's defaults.
+ */
+typedef struct skein_param {
+       size_t  sp_digest_bitlen;               /* length of digest in bits */
+} skein_param_t;
+
+/* Module definitions */
+#ifdef SKEIN_MODULE_IMPL
+#define        CKM_SKEIN_256                           "CKM_SKEIN_256"
+#define        CKM_SKEIN_512                           "CKM_SKEIN_512"
+#define        CKM_SKEIN1024                           "CKM_SKEIN1024"
+#define        CKM_SKEIN_256_MAC                       "CKM_SKEIN_256_MAC"
+#define        CKM_SKEIN_512_MAC                       "CKM_SKEIN_512_MAC"
+#define        CKM_SKEIN1024_MAC                       "CKM_SKEIN1024_MAC"
+
+typedef enum skein_mech_type {
+       SKEIN_256_MECH_INFO_TYPE,
+       SKEIN_512_MECH_INFO_TYPE,
+       SKEIN1024_MECH_INFO_TYPE,
+       SKEIN_256_MAC_MECH_INFO_TYPE,
+       SKEIN_512_MAC_MECH_INFO_TYPE,
+       SKEIN1024_MAC_MECH_INFO_TYPE
+} skein_mech_type_t;
+
+#define        VALID_SKEIN_DIGEST_MECH(__mech)                         \
+       ((int)(__mech) >= SKEIN_256_MECH_INFO_TYPE &&           \
+       (__mech) <= SKEIN1024_MECH_INFO_TYPE)
+#define        VALID_SKEIN_MAC_MECH(__mech)                            \
+       ((int)(__mech) >= SKEIN_256_MAC_MECH_INFO_TYPE &&       \
+       (__mech) <= SKEIN1024_MAC_MECH_INFO_TYPE)
+#endif /* SKEIN_MODULE_IMPL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SKEIN_H_ */
index 0c71cca68d47bd2e32a48fcf717b4faa4dff18ab..3d0b962e68895379b4867bcbb4e249a06b1ec49b 100644 (file)
@@ -23,6 +23,7 @@
  * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
  */
 
 #ifndef _SYS_SPA_H
@@ -147,6 +148,14 @@ typedef struct dva {
 } dva_t;
 
 
+/*
+ * Some checksums/hashes need a 256-bit initialization salt. This salt is kept
+ * secret and is suitable for use in MAC algorithms as the key.
+ */
+typedef struct zio_cksum_salt {
+       uint8_t         zcs_bytes[32];
+} zio_cksum_salt_t;
+
 /*
  * Each block is described by its DVAs, time of birth, checksum, etc.
  * The word-by-word, bit-by-bit layout of the blkptr is as follows:
index 59cb44de215d05990d95f8196338b2e28ec2ae79..7b9e1ee0c93e3f9a4de7914a2275ceafd0aa66fc 100644 (file)
@@ -23,6 +23,7 @@
  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  */
 
@@ -172,6 +173,10 @@ struct spa {
        uint64_t        spa_syncing_txg;        /* txg currently syncing */
        bpobj_t         spa_deferred_bpobj;     /* deferred-free bplist */
        bplist_t        spa_free_bplist[TXG_SIZE]; /* bplist of stuff to free */
+       zio_cksum_salt_t spa_cksum_salt;        /* secret salt for cksum */
+       /* checksum context templates */
+       kmutex_t        spa_cksum_tmpls_lock;
+       void            *spa_cksum_tmpls[ZIO_CHECKSUM_FUNCTIONS];
        uberblock_t     spa_ubsync;             /* last synced uberblock */
        uberblock_t     spa_uberblock;          /* current uberblock */
        boolean_t       spa_extreme_rewind;     /* rewind past deferred frees */
index 51b51fbec1708fc7a64311ca85d9ba601f27264e..22001559cb5be39616542d2a6adb19453ade6a84 100644 (file)
@@ -80,6 +80,10 @@ enum zio_checksum {
        ZIO_CHECKSUM_FLETCHER_4,
        ZIO_CHECKSUM_SHA256,
        ZIO_CHECKSUM_ZILOG2,
+       ZIO_CHECKSUM_NOPARITY,
+       ZIO_CHECKSUM_SHA512,
+       ZIO_CHECKSUM_SKEIN,
+       ZIO_CHECKSUM_EDONR,
        ZIO_CHECKSUM_FUNCTIONS
 };
 
index 04573ba5456f8f7f93e59e9fc60c0cc54d6405f9..b4c2c8c08305bb15379433bda5afa713c0ac659e 100644 (file)
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
+ * Copyright Saso Kiselkov 2013, All rights reserved.
  */
 
 #ifndef _SYS_ZIO_CHECKSUM_H
 #define        _SYS_ZIO_CHECKSUM_H
 
 #include <sys/zio.h>
+#include <zfeature_common.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -35,17 +37,36 @@ extern "C" {
 /*
  * Signature for checksum functions.
  */
-typedef void zio_checksum_func_t(const void *, uint64_t, zio_cksum_t *);
+typedef void zio_checksum_func_t(const void *, uint64_t, const void *,
+    zio_cksum_t *);
+typedef void zio_checksum_t(const void *data, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp);
+typedef void *zio_checksum_tmpl_init_t(const zio_cksum_salt_t *salt);
+typedef void zio_checksum_tmpl_free_t(void *ctx_template);
+
+typedef enum zio_checksum_flags {
+       /* Strong enough for metadata? */
+       ZCHECKSUM_FLAG_METADATA = (1 << 1),
+       /* ZIO embedded checksum */
+       ZCHECKSUM_FLAG_EMBEDDED = (1 << 2),
+       /* Strong enough for dedup (without verification)? */
+       ZCHECKSUM_FLAG_DEDUP = (1 << 3),
+       /* Uses salt value */
+       ZCHECKSUM_FLAG_SALTED = (1 << 4),
+       /* Strong enough for nopwrite? */
+       ZCHECKSUM_FLAG_NOPWRITE = (1 << 5)
+} zio_checksum_flags_t;
 
 /*
  * Information about each checksum function.
  */
 typedef const struct zio_checksum_info {
-       zio_checksum_func_t *ci_func[2]; /* checksum function per byteorder */
-       int             ci_correctable; /* number of correctable bits   */
-       int             ci_eck;         /* uses zio embedded checksum? */
-       boolean_t       ci_dedup;       /* strong enough for dedup? */
-       char            *ci_name;       /* descriptive name */
+       /* checksum function for each byteorder */
+       zio_checksum_t                  *ci_func[2];
+       zio_checksum_tmpl_init_t        *ci_tmpl_init;
+       zio_checksum_tmpl_free_t        *ci_tmpl_free;
+       zio_checksum_flags_t            ci_flags;
+       char                            *ci_name;       /* descriptive name */
 } zio_checksum_info_t;
 
 typedef struct zio_bad_cksum {
@@ -62,7 +83,21 @@ extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
 /*
  * Checksum routines.
  */
-extern zio_checksum_func_t zio_checksum_SHA256;
+extern zio_checksum_t zio_checksum_SHA256;
+extern zio_checksum_t zio_checksum_SHA512_native;
+extern zio_checksum_t zio_checksum_SHA512_byteswap;
+
+/* Skein */
+extern zio_checksum_t zio_checksum_skein_native;
+extern zio_checksum_t zio_checksum_skein_byteswap;
+extern zio_checksum_tmpl_init_t zio_checksum_skein_tmpl_init;
+extern zio_checksum_tmpl_free_t zio_checksum_skein_tmpl_free;
+
+/* Edon-R */
+extern zio_checksum_t zio_checksum_edonr_native;
+extern zio_checksum_t zio_checksum_edonr_byteswap;
+extern zio_checksum_tmpl_init_t zio_checksum_edonr_tmpl_init;
+extern zio_checksum_tmpl_free_t zio_checksum_edonr_tmpl_free;
 
 extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum,
     void *, uint64_t, uint64_t, zio_bad_cksum_t *);
@@ -72,6 +107,8 @@ extern int zio_checksum_error_impl(spa_t *, blkptr_t *, enum zio_checksum,
     void *, uint64_t, uint64_t, zio_bad_cksum_t *);
 extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out);
 extern enum zio_checksum spa_dedup_checksum(spa_t *spa);
+extern void zio_checksum_templates_free(spa_t *spa);
+extern spa_feature_t zio_checksum_to_feature(enum zio_checksum cksum);
 
 #ifdef __cplusplus
 }
index 41cfdf8070c5731fe7213c6e5fed788a35abca86..f0548018170fc173772eb5399492b321f5d47786 100644 (file)
@@ -51,6 +51,9 @@ typedef enum spa_feature {
        SPA_FEATURE_FS_SS_LIMIT,
        SPA_FEATURE_LARGE_BLOCKS,
        SPA_FEATURE_LARGE_DNODE,
+       SPA_FEATURE_SHA512,
+       SPA_FEATURE_SKEIN,
+       SPA_FEATURE_EDONR,
        SPA_FEATURES
 } spa_feature_t;
 
index f0cfbd57342acdab75a5e2eae515c146941deefd..83f92a09638d13372fa8212825005dd103c83093 100644 (file)
@@ -22,6 +22,9 @@
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
 
 #ifndef        _ZFS_FLETCHER_H
 #define        _ZFS_FLETCHER_H
@@ -45,11 +48,11 @@ extern "C" {
  * checksum method is added. This method will ignore last (size % 4) bytes of
  * the data buffer.
  */
-void fletcher_2_native(const void *, uint64_t, zio_cksum_t *);
-void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *);
-void fletcher_4_native(const void *, uint64_t, zio_cksum_t *);
+void fletcher_2_native(const void *, uint64_t, const void *, zio_cksum_t *);
+void fletcher_2_byteswap(const void *, uint64_t, const void *, zio_cksum_t *);
+void fletcher_4_native(const void *, uint64_t, const void *, zio_cksum_t *);
 void fletcher_4_native_varsize(const void *, uint64_t, zio_cksum_t *);
-void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *);
+void fletcher_4_byteswap(const void *, uint64_t, const void *, zio_cksum_t *);
 void fletcher_4_incremental_native(const void *, uint64_t,
     zio_cksum_t *);
 void fletcher_4_incremental_byteswap(const void *, uint64_t,
index 41457fd5276bd31c96cba892b742c2f9271ce518..0852a583a7c9b88f3a6458dfa1572b740b5f8219 100644 (file)
@@ -20,7 +20,8 @@ ASM_SOURCES_AS = \
        asm-x86_64/aes/aes_intel.S \
        asm-x86_64/modes/gcm_intel.S \
        asm-x86_64/sha1/sha1-x86_64.S \
-       asm-x86_64/sha2/sha256_impl.S
+       asm-x86_64/sha2/sha256_impl.S \
+       asm-x86_64/sha2/sha512_impl.S
 endif
 
 if TARGET_ASM_I386
@@ -46,6 +47,7 @@ KERNEL_C = \
        api/kcf_mac.c \
        algs/aes/aes_impl.c \
        algs/aes/aes_modes.c \
+       algs/edonr/edonr.c \
        algs/modes/modes.c \
        algs/modes/cbc.c \
        algs/modes/gcm.c \
@@ -54,10 +56,15 @@ KERNEL_C = \
        algs/modes/ecb.c \
        algs/sha1/sha1.c \
        algs/sha2/sha2.c \
+       algs/skein/skein.c \
+       algs/skein/skein_block.c \
+       algs/skein/skein_iv.c \
        illumos-crypto.c \
        io/aes.c \
+       io/edonr_mod.c \
        io/sha1_mod.c \
        io/sha2_mod.c \
+       io/skein_mod.c \
        os/modhash.c \
        os/modconf.c \
        core/kcf_sched.c \
index 6226872e5d67a2ce5fda68504b10444a73506231..bd89ad94fa1c81fbdd408e235a02fb5e1702425b 100644 (file)
@@ -73,6 +73,14 @@ do {                                                                 \
 #undef assert
 #endif
 
+/* Compile time assert */
+#define        CTASSERT_GLOBAL(x)              _CTASSERT(x, __LINE__)
+#define        CTASSERT(x)                     { _CTASSERT(x, __LINE__); }
+#define        _CTASSERT(x, y)                 __CTASSERT(x, y)
+#define        __CTASSERT(x, y)                                                \
+       typedef char __attribute__((unused))                            \
+       __compile_time_assertion__ ## y[(x) ? 1 : -1]
+
 #ifdef NDEBUG
 #define        ASSERT3S(x, y, z)       ((void)0)
 #define        ASSERT3U(x, y, z)       ((void)0)
index 6b09cb6dab7c8738886c163fdd35eec0e6a5c851..5ecf96985377570553b34eb5d5ca805ec6a236f8 100755 (executable)
@@ -1477,6 +1477,12 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
                            "property setting is not allowed on "
                            "bootable datasets"));
                        (void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
+               } else if (prop == ZFS_PROP_CHECKSUM ||
+                   prop == ZFS_PROP_DEDUP) {
+                       (void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+                           "property setting is not allowed on "
+                           "root pools"));
+                       (void) zfs_error(hdl, EZFS_NOTSUP, errbuf);
                } else {
                        (void) zfs_standard_error(hdl, err, errbuf);
                }
index e409899a2deca03e56f481f026ba66ea38dcc3a8..f70e34107cb3d147c3eb81e18c4637e64944ac6b 100644 (file)
@@ -61,6 +61,7 @@
 #include <sys/zio_checksum.h>
 #include <sys/ddt.h>
 #include <sys/socket.h>
+#include <sys/sha2.h>
 
 /* in libzfs_dataset.c */
 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
@@ -365,10 +366,11 @@ cksummer(void *arg)
                        if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
                            zero_cksum) ||
                            !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
+                               SHA256_CTX ctx;
                                zio_cksum_t tmpsha256;
 
                                zio_checksum_SHA256(buf,
-                                   payload_size, &tmpsha256);
+                                   payload_size, &ctx, &tmpsha256);
 
                                drrw->drr_key.ddk_cksum.zc_word[0] =
                                    BE_64(tmpsha256.zc_word[0]);
index 351ddfeac1935d8ec49e3209bb1dd0e83b8e2f06..c2f5a50b119ae1fd2804abd07278a8ae8dc6ca7c 100644 (file)
@@ -61,6 +61,7 @@ KERNEL_C = \
        dsl_synctask.c \
        dsl_destroy.c \
        dsl_userhold.c \
+       edonr_zfs.c \
        fm.c \
        gzip.c \
        lzjb.c \
@@ -73,6 +74,7 @@ KERNEL_C = \
        rrwlock.c \
        sa.c \
        sha256.c \
+       skein_zfs.c \
        spa.c \
        spa_boot.c \
        spa_config.c \
index fa04d6e8132effa9fb22dc2e94d00373970cee75..dcfb30d1874360187b0c0b28fe5af943178a4454 100644 (file)
@@ -1,5 +1,5 @@
 '\" te
-.\" Copyright (c) 2013 by Delphix. All rights reserved.
+.\" Copyright (c) 2012, 2015 by Delphix. All rights reserved.
 .\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
 .\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
 .\" The contents of this file are subject to the terms of the Common Development
@@ -457,5 +457,111 @@ allow more data to be stored in the bonus buffer, thus potentially
 improving performance by avoiding the use of spill blocks.
 .RE
 
+\fB\fBsha512\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID   org.illumos:sha512
+READ\-ONLY COMPATIBLE  no
+DEPENDENCIES   none
+.TE
+
+This feature enables the use of the SHA-512/256 truncated hash algorithm
+(FIPS 180-4) for checksum and dedup. The native 64-bit arithmetic of
+SHA-512 provides an approximate 50% performance boost over SHA-256 on
+64-bit hardware and is thus a good minimum-change replacement candidate
+for systems where hash performance is important, but these systems
+cannot for whatever reason utilize the faster \fBskein\fR and
+\fBedonr\fR algorithms.
+
+When the \fBsha512\fR feature is set to \fBenabled\fR, the administrator
+can turn on the \fBsha512\fR checksum on any dataset using the
+\fBzfs set checksum=sha512\fR(1M) command.  This feature becomes
+\fBactive\fR once a \fBchecksum\fR property has been set to \fBsha512\fR,
+and will return to being \fBenabled\fR once all filesystems that have
+ever had their checksum set to \fBsha512\fR are destroyed.
+
+Booting off of pools utilizing SHA-512/256 is supported (provided that
+the updated GRUB stage2 module is installed).
+
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBskein\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID   org.illumos:skein
+READ\-ONLY COMPATIBLE  no
+DEPENDENCIES   none
+.TE
+
+This feature enables the use of the Skein hash algorithm for checksum
+and dedup. Skein is a high-performance secure hash algorithm that was a
+finalist in the NIST SHA-3 competition. It provides a very high security
+margin and high performance on 64-bit hardware (80% faster than
+SHA-256). This implementation also utilizes the new salted checksumming
+functionality in ZFS, which means that the checksum is pre-seeded with a
+secret 256-bit random key (stored on the pool) before being fed the data
+block to be checksummed. Thus the produced checksums are unique to a
+given pool, preventing hash collision attacks on systems with dedup.
+
+When the \fBskein\fR feature is set to \fBenabled\fR, the administrator
+can turn on the \fBskein\fR checksum on any dataset using the
+\fBzfs set checksum=skein\fR(1M) command.  This feature becomes
+\fBactive\fR once a \fBchecksum\fR property has been set to \fBskein\fR,
+and will return to being \fBenabled\fR once all filesystems that have
+ever had their checksum set to \fBskein\fR are destroyed.
+
+Booting off of pools using \fBskein\fR is \fBNOT\fR supported
+-- any attempt to enable \fBskein\fR on a root pool will fail with an
+error.
+
+.RE
+
+.sp
+.ne 2
+.na
+\fB\fBedonr\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID   org.illumos:edonr
+READ\-ONLY COMPATIBLE  no
+DEPENDENCIES   none
+.TE
+
+This feature enables the use of the Edon-R hash algorithm for checksum,
+including for nopwrite (if compression is also enabled, an overwrite of
+a block whose checksum matches the data being written will be ignored).
+In an abundance of caution, Edon-R can not be used with dedup
+(without verification).
+
+Edon-R is a very high-performance hash algorithm that was part
+of the NIST SHA-3 competition. It provides extremely high hash
+performance (over 350% faster than SHA-256), but was not selected
+because of its unsuitability as a general purpose secure hash algorithm.
+This implementation utilizes the new salted checksumming functionality
+in ZFS, which means that the checksum is pre-seeded with a secret
+256-bit random key (stored on the pool) before being fed the data block
+to be checksummed. Thus the produced checksums are unique to a given
+pool.
+
+When the \fBedonr\fR feature is set to \fBenabled\fR, the administrator
+can turn on the \fBedonr\fR checksum on any dataset using the
+\fBzfs set checksum=edonr\fR(1M) command.  This feature becomes
+\fBactive\fR once a \fBchecksum\fR property has been set to \fBedonr\fR,
+and will return to being \fBenabled\fR once all filesystems that have
+ever had their checksum set to \fBedonr\fR are destroyed.
+
+Booting off of pools using \fBedonr\fR is \fBNOT\fR supported
+-- any attempt to enable \fBedonr\fR on a root pool will fail with an
+error.
+
 .SH "SEE ALSO"
 \fBzpool\fR(8)
index e13fc1a52143db38f4ee4597255d03d2ecb032c6..e543ba51d58fe9cb265b280f5121486134aadef4 100644 (file)
@@ -837,12 +837,23 @@ The values \fBon\fR and \fBnoauto\fR are equivalent to the \fBauto\fR and \fBnoa
 .sp
 .ne 2
 .na
-\fB\fBchecksum\fR=\fBon\fR | \fBoff\fR | \fBfletcher2\fR | \fBfletcher4\fR | \fBsha256\fR\fR
+\fB\fBchecksum\fR=\fBon\fR | \fBoff\fR | \fBfletcher2\fR | \fBfletcher4\fR | \fBsha256\fR | \fBnoparity\fR | \fBsha512\fR | \fBskein\fR | \fBedonr\fR\fR
 .ad
 .sp .6
 .RS 4n
-Controls the checksum used to verify data integrity. The default value is \fBon\fR, which automatically selects an appropriate algorithm (currently, \fBfletcher4\fR, but this may change in future releases). The value \fBoff\fR disables integrity checking on user data. Disabling checksums is \fBNOT\fR a recommended practice.
+Controls the checksum used to verify data integrity. The default value is
+\fBon\fR, which automatically selects an appropriate algorithm (currently,
+\fBfletcher4\fR, but this may change in future releases). The value \fBoff\fR
+disables integrity checking on user data.  The value \fBnoparity\fR not only
+disables integrity but also disables maintaining parity for user data.
+This setting is used internally by a dump device residing on a RAID-Z pool and
+should not be used by any other dataset.  Disabling checksums is \fBNOT\fR a
+recommended practice.
 .sp
+The \fBsha512\fR, \fBskein\fR, and \fBedonr\fR checksum algorithms require
+enabling the appropriate features on the pool. Please see zpool-features for
+more information on these algorithms.
+
 Changing this property affects only newly-written data.
 .RE
 
index 4be03dbae50e081fd799dabd556df0f68336517c..b822635b786208d3a5b1a3698ef92b9a6f74abe4 100644 (file)
@@ -12,6 +12,7 @@ ASM_SOURCES += asm-x86_64/aes/aes_intel.o
 ASM_SOURCES += asm-x86_64/modes/gcm_intel.o
 ASM_SOURCES += asm-x86_64/sha1/sha1-x86_64.o
 ASM_SOURCES += asm-x86_64/sha2/sha256_impl.o
+ASM_SOURCES += asm-x86_64/sha2/sha512_impl.o
 endif
 
 ifeq ($(TARGET_ASM_DIR), asm-i386)
@@ -43,8 +44,10 @@ $(MODULE)-objs += core/kcf_mech_tabs.o
 $(MODULE)-objs += core/kcf_prov_lib.o
 $(MODULE)-objs += spi/kcf_spi.o
 $(MODULE)-objs += io/aes.o
+$(MODULE)-objs += io/edonr_mod.o
 $(MODULE)-objs += io/sha1_mod.o
 $(MODULE)-objs += io/sha2_mod.o
+$(MODULE)-objs += io/skein_mod.o
 $(MODULE)-objs += os/modhash.o
 $(MODULE)-objs += os/modconf.o
 $(MODULE)-objs += algs/modes/cbc.o
@@ -55,8 +58,13 @@ $(MODULE)-objs += algs/modes/gcm.o
 $(MODULE)-objs += algs/modes/modes.o
 $(MODULE)-objs += algs/aes/aes_impl.o
 $(MODULE)-objs += algs/aes/aes_modes.o
+$(MODULE)-objs += algs/edonr/edonr.o
 $(MODULE)-objs += algs/sha1/sha1.o
 $(MODULE)-objs += algs/sha2/sha2.o
+$(MODULE)-objs += algs/sha1/sha1.o
+$(MODULE)-objs += algs/skein/skein.o
+$(MODULE)-objs += algs/skein/skein_block.o
+$(MODULE)-objs += algs/skein/skein_iv.o
 $(MODULE)-objs += $(ASM_SOURCES)
 
 ICP_DIRS = \
@@ -67,9 +75,11 @@ ICP_DIRS = \
        os \
        algs \
        algs/aes \
+       algs/edonr \
        algs/modes \
        algs/sha1 \
        algs/sha2 \
+       algs/skein \
        asm-x86_64 \
        asm-x86_64/aes \
        asm-x86_64/modes \
diff --git a/module/icp/algs/edonr/edonr.c b/module/icp/algs/edonr/edonr.c
new file mode 100644 (file)
index 0000000..8ae9898
--- /dev/null
@@ -0,0 +1,751 @@
+/*
+ * IDI,NTNU
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen@ntnu.no>
+ * Tweaked Edon-R implementation for SUPERCOP, based on NIST API.
+ *
+ * $Id: edonr.c 517 2013-02-17 20:34:39Z joern $
+ */
+/*
+ * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
+ */
+
+/* determine where we can get bcopy/bzero declarations */
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <strings.h>
+#endif
+#include <sys/edonr.h>
+#include <sys/debug.h>
+
+/* big endian support, provides no-op's if run on little endian hosts */
+#include "edonr_byteorder.h"
+
+#define        hashState224(x) ((x)->pipe->p256)
+#define        hashState256(x) ((x)->pipe->p256)
+#define        hashState384(x) ((x)->pipe->p512)
+#define        hashState512(x) ((x)->pipe->p512)
+
+/* shift and rotate shortcuts */
+#define        shl(x, n)       ((x) << n)
+#define        shr(x, n)       ((x) >> n)
+
+#define        rotl32(x, n)    (((x) << (n)) | ((x) >> (32 - (n))))
+#define        rotr32(x, n)    (((x) >> (n)) | ((x) << (32 - (n))))
+
+#define        rotl64(x, n)    (((x) << (n)) | ((x) >> (64 - (n))))
+#define        rotr64(x, n)    (((x) >> (n)) | ((x) << (64 - (n))))
+
+#if !defined(__C99_RESTRICT)
+#define        restrict        /* restrict */
+#endif
+
+#define        EDONR_VALID_HASHBITLEN(x) \
+       ((x) == 512 || (x) == 384 || (x) == 256 || (x) == 224)
+
+/* EdonR224 initial double chaining pipe */
+static const uint32_t i224p2[16] = {
+       0x00010203ul, 0x04050607ul, 0x08090a0bul, 0x0c0d0e0ful,
+       0x10111213ul, 0x14151617ul, 0x18191a1bul, 0x1c1d1e1ful,
+       0x20212223ul, 0x24252627ul, 0x28292a2bul, 0x2c2d2e2ful,
+       0x30313233ul, 0x34353637ul, 0x38393a3bul, 0x3c3d3e3ful,
+};
+
+/* EdonR256 initial double chaining pipe */
+static const uint32_t i256p2[16] = {
+       0x40414243ul, 0x44454647ul, 0x48494a4bul, 0x4c4d4e4ful,
+       0x50515253ul, 0x54555657ul, 0x58595a5bul, 0x5c5d5e5ful,
+       0x60616263ul, 0x64656667ul, 0x68696a6bul, 0x6c6d6e6ful,
+       0x70717273ul, 0x74757677ul, 0x78797a7bul, 0x7c7d7e7ful,
+};
+
+/* EdonR384 initial double chaining pipe */
+static const uint64_t i384p2[16] = {
+       0x0001020304050607ull, 0x08090a0b0c0d0e0full,
+       0x1011121314151617ull, 0x18191a1b1c1d1e1full,
+       0x2021222324252627ull, 0x28292a2b2c2d2e2full,
+       0x3031323334353637ull, 0x38393a3b3c3d3e3full,
+       0x4041424344454647ull, 0x48494a4b4c4d4e4full,
+       0x5051525354555657ull, 0x58595a5b5c5d5e5full,
+       0x6061626364656667ull, 0x68696a6b6c6d6e6full,
+       0x7071727374757677ull, 0x78797a7b7c7d7e7full
+};
+
+/* EdonR512 initial double chaining pipe */
+static const uint64_t i512p2[16] = {
+       0x8081828384858687ull, 0x88898a8b8c8d8e8full,
+       0x9091929394959697ull, 0x98999a9b9c9d9e9full,
+       0xa0a1a2a3a4a5a6a7ull, 0xa8a9aaabacadaeafull,
+       0xb0b1b2b3b4b5b6b7ull, 0xb8b9babbbcbdbebfull,
+       0xc0c1c2c3c4c5c6c7ull, 0xc8c9cacbcccdcecfull,
+       0xd0d1d2d3d4d5d6d7ull, 0xd8d9dadbdcdddedfull,
+       0xe0e1e2e3e4e5e6e7ull, 0xe8e9eaebecedeeefull,
+       0xf0f1f2f3f4f5f6f7ull, 0xf8f9fafbfcfdfeffull
+};
+
+/*
+ * First Latin Square
+ * 0   7   1   3   2   4   6   5
+ * 4   1   7   6   3   0   5   2
+ * 7   0   4   2   5   3   1   6
+ * 1   4   0   5   6   2   7   3
+ * 2   3   6   7   1   5   0   4
+ * 5   2   3   1   7   6   4   0
+ * 3   6   5   0   4   7   2   1
+ * 6   5   2   4   0   1   3   7
+ */
+#define        LS1_256(c, x0, x1, x2, x3, x4, x5, x6, x7)                      \
+{                                                                      \
+       uint32_t x04, x17, x23, x56, x07, x26;                          \
+       x04 = x0+x4, x17 = x1+x7, x07 = x04+x17;                        \
+       s0 = c + x07 + x2;                                              \
+       s1 = rotl32(x07 + x3, 4);                                       \
+       s2 = rotl32(x07 + x6, 8);                                       \
+       x23 = x2 + x3;                                                  \
+       s5 = rotl32(x04 + x23 + x5, 22);                                \
+       x56 = x5 + x6;                                                  \
+       s6 = rotl32(x17 + x56 + x0, 24);                                \
+       x26 = x23+x56;                                                  \
+       s3 = rotl32(x26 + x7, 13);                                      \
+       s4 = rotl32(x26 + x1, 17);                                      \
+       s7 = rotl32(x26 + x4, 29);                                      \
+}
+
+#define        LS1_512(c, x0, x1, x2, x3, x4, x5, x6, x7)                      \
+{                                                                      \
+       uint64_t x04, x17, x23, x56, x07, x26;                          \
+       x04 = x0+x4, x17 = x1+x7, x07 = x04+x17;                        \
+       s0 = c + x07 + x2;                                              \
+       s1 = rotl64(x07 + x3, 5);                                       \
+       s2 = rotl64(x07 + x6, 15);                                      \
+       x23 = x2 + x3;                                                  \
+       s5 = rotl64(x04 + x23 + x5, 40);                                \
+       x56 = x5 + x6;                                                  \
+       s6 = rotl64(x17 + x56 + x0, 50);                                \
+       x26 = x23+x56;                                                  \
+       s3 = rotl64(x26 + x7, 22);                                      \
+       s4 = rotl64(x26 + x1, 31);                                      \
+       s7 = rotl64(x26 + x4, 59);                                      \
+}
+
+/*
+ * Second Orthogonal Latin Square
+ * 0   4   2   3   1   6   5   7
+ * 7   6   3   2   5   4   1   0
+ * 5   3   1   6   0   2   7   4
+ * 1   0   5   4   3   7   2   6
+ * 2   1   0   7   4   5   6   3
+ * 3   5   7   0   6   1   4   2
+ * 4   7   6   1   2   0   3   5
+ * 6   2   4   5   7   3   0   1
+ */
+#define        LS2_256(c, y0, y1, y2, y3, y4, y5, y6, y7)                      \
+{                                                                      \
+       uint32_t y01, y25, y34, y67, y04, y05, y27, y37;                \
+       y01 = y0+y1, y25 = y2+y5, y05 = y01+y25;                        \
+       t0  = ~c + y05 + y7;                                            \
+       t2 = rotl32(y05 + y3, 9);                                       \
+       y34 = y3+y4, y04 = y01+y34;                                     \
+       t1 = rotl32(y04 + y6, 5);                                       \
+       t4 = rotl32(y04 + y5, 15);                                      \
+       y67 = y6+y7, y37 = y34+y67;                                     \
+       t3 = rotl32(y37 + y2, 11);                                      \
+       t7 = rotl32(y37 + y0, 27);                                      \
+       y27 = y25+y67;                                                  \
+       t5 = rotl32(y27 + y4, 20);                                      \
+       t6 = rotl32(y27 + y1, 25);                                      \
+}
+
+#define        LS2_512(c, y0, y1, y2, y3, y4, y5, y6, y7)                      \
+{                                                                      \
+       uint64_t y01, y25, y34, y67, y04, y05, y27, y37;                \
+       y01 = y0+y1, y25 = y2+y5, y05 = y01+y25;                        \
+       t0  = ~c + y05 + y7;                                            \
+       t2 = rotl64(y05 + y3, 19);                                      \
+       y34 = y3+y4, y04 = y01+y34;                                     \
+       t1 = rotl64(y04 + y6, 10);                                      \
+       t4 = rotl64(y04 + y5, 36);                                      \
+       y67 = y6+y7, y37 = y34+y67;                                     \
+       t3 = rotl64(y37 + y2, 29);                                      \
+       t7 = rotl64(y37 + y0, 55);                                      \
+       y27 = y25+y67;                                                  \
+       t5 = rotl64(y27 + y4, 44);                                      \
+       t6 = rotl64(y27 + y1, 48);                                      \
+}
+
+#define        quasi_exform256(r0, r1, r2, r3, r4, r5, r6, r7)                 \
+{                                                                      \
+       uint32_t s04, s17, s23, s56, t01, t25, t34, t67;                \
+       s04 = s0 ^ s4, t01 = t0 ^ t1;                                   \
+       r0 = (s04 ^ s1) + (t01 ^ t5);                                   \
+       t67 = t6 ^ t7;                                                  \
+       r1 = (s04 ^ s7) + (t2 ^ t67);                                   \
+       s23 = s2 ^ s3;                                                  \
+       r7 = (s23 ^ s5) + (t4 ^ t67);                                   \
+       t34 = t3 ^ t4;                                                  \
+       r3 = (s23 ^ s4) + (t0 ^ t34);                                   \
+       s56 = s5 ^ s6;                                                  \
+       r5 = (s3 ^ s56) + (t34 ^ t6);                                   \
+       t25 = t2 ^ t5;                                                  \
+       r6 = (s2 ^ s56) + (t25 ^ t7);                                   \
+       s17 = s1 ^ s7;                                                  \
+       r4 = (s0 ^ s17) + (t1 ^ t25);                                   \
+       r2 = (s17 ^ s6) + (t01 ^ t3);                                   \
+}
+
+#define        quasi_exform512(r0, r1, r2, r3, r4, r5, r6, r7)                 \
+{                                                                      \
+       uint64_t s04, s17, s23, s56, t01, t25, t34, t67;                \
+       s04 = s0 ^ s4, t01 = t0 ^ t1;                                   \
+       r0 = (s04 ^ s1) + (t01 ^ t5);                                   \
+       t67 = t6 ^ t7;                                                  \
+       r1 = (s04 ^ s7) + (t2 ^ t67);                                   \
+       s23 = s2 ^ s3;                                                  \
+       r7 = (s23 ^ s5) + (t4 ^ t67);                                   \
+       t34 = t3 ^ t4;                                                  \
+       r3 = (s23 ^ s4) + (t0 ^ t34);                                   \
+       s56 = s5 ^ s6;                                                  \
+       r5 = (s3 ^ s56) + (t34 ^ t6);                                   \
+       t25 = t2 ^ t5;                                                  \
+       r6 = (s2 ^ s56) + (t25 ^ t7);                                   \
+       s17 = s1 ^ s7;                                                  \
+       r4 = (s0 ^ s17) + (t1 ^ t25);                                   \
+       r2 = (s17 ^ s6) + (t01 ^ t3);                                   \
+}
+
+static size_t
+Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p)
+{
+       size_t bl;
+
+       for (bl = bitlen; bl >= EdonR256_BLOCK_BITSIZE;
+           bl -= EdonR256_BLOCK_BITSIZE, data += 16) {
+               uint32_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4,
+                   t5, t6, t7;
+               uint32_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4,
+                   q5, q6, q7;
+               const uint32_t defix = 0xaaaaaaaa;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+               uint32_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8,
+                   swp9, swp10, swp11, swp12, swp13, swp14, swp15;
+#define        d(j)    swp ## j
+#define        s32(j)  ld_swap32((uint32_t *)data + j, swp ## j)
+#else
+#define        d(j)    data[j]
+#endif
+
+               /* First row of quasigroup e-transformations */
+#if defined(MACHINE_IS_BIG_ENDIAN)
+               s32(8);
+               s32(9);
+               s32(10);
+               s32(11);
+               s32(12);
+               s32(13);
+               s32(14);
+               s32(15);
+#endif
+               LS1_256(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9),
+                   d(8));
+#if defined(MACHINE_IS_BIG_ENDIAN)
+               s32(0);
+               s32(1);
+               s32(2);
+               s32(3);
+               s32(4);
+               s32(5);
+               s32(6);
+               s32(7);
+#undef s32
+#endif
+               LS2_256(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7));
+               quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+               LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               LS2_256(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14),
+                   d(15));
+               quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+               /* Second row of quasigroup e-transformations */
+               LS1_256(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14],
+                   p[15]);
+               LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+               LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+               quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+               /* Third row of quasigroup e-transformations */
+               LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               LS2_256(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
+               quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+               LS1_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+               LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+               /* Fourth row of quasigroup e-transformations */
+               LS1_256(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0));
+               LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+               LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+               quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+               /* Edon-R tweak on the original SHA-3 Edon-R submission. */
+               p[0] ^= d(8) ^ p0;
+               p[1] ^= d(9) ^ p1;
+               p[2] ^= d(10) ^ p2;
+               p[3] ^= d(11) ^ p3;
+               p[4] ^= d(12) ^ p4;
+               p[5] ^= d(13) ^ p5;
+               p[6] ^= d(14) ^ p6;
+               p[7] ^= d(15) ^ p7;
+               p[8] ^= d(0) ^ q0;
+               p[9] ^= d(1) ^ q1;
+               p[10] ^= d(2) ^ q2;
+               p[11] ^= d(3) ^ q3;
+               p[12] ^= d(4) ^ q4;
+               p[13] ^= d(5) ^ q5;
+               p[14] ^= d(6) ^ q6;
+               p[15] ^= d(7) ^ q7;
+       }
+
+#undef d
+       return (bitlen - bl);
+}
+
+/*
+ * Why is this #pragma here?
+ *
+ * Checksum functions like this one can go over the stack frame size check
+ * Linux imposes on 32-bit platforms (-Wframe-larger-than=1024).  We can
+ * safely ignore the compiler error since we know that in ZoL, that
+ * the function will be called from a worker thread that won't be using
+ * much stack.  The only function that goes over the 1k limit is Q512(),
+ * which only goes over it by a hair (1248 bytes on ARM32).
+ */
+#include <sys/isa_defs.h>      /* for _ILP32 */
+#ifdef _ILP32   /* We're 32-bit, assume small stack frames */
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
+
+#if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__)
+static inline size_t
+#else
+static size_t
+#endif
+Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p)
+{
+       size_t bl;
+
+       for (bl = bitlen; bl >= EdonR512_BLOCK_BITSIZE;
+           bl -= EdonR512_BLOCK_BITSIZE, data += 16) {
+               uint64_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4,
+                   t5, t6, t7;
+               uint64_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4,
+                   q5, q6, q7;
+               const uint64_t defix = 0xaaaaaaaaaaaaaaaaull;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+               uint64_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8,
+                   swp9, swp10, swp11, swp12, swp13, swp14, swp15;
+#define        d(j)    swp##j
+#define        s64(j)  ld_swap64((uint64_t *)data+j, swp##j)
+#else
+#define        d(j)    data[j]
+#endif
+
+               /* First row of quasigroup e-transformations */
+#if defined(MACHINE_IS_BIG_ENDIAN)
+               s64(8);
+               s64(9);
+               s64(10);
+               s64(11);
+               s64(12);
+               s64(13);
+               s64(14);
+               s64(15);
+#endif
+               LS1_512(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9),
+                   d(8));
+#if defined(MACHINE_IS_BIG_ENDIAN)
+               s64(0);
+               s64(1);
+               s64(2);
+               s64(3);
+               s64(4);
+               s64(5);
+               s64(6);
+               s64(7);
+#undef s64
+#endif
+               LS2_512(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7));
+               quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+               LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               LS2_512(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14),
+                   d(15));
+               quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+               /* Second row of quasigroup e-transformations */
+               LS1_512(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14],
+                   p[15]);
+               LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+               LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+               quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+               /* Third row of quasigroup e-transformations */
+               LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               LS2_512(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
+               quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+               LS1_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+               LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+               /* Fourth row of quasigroup e-transformations */
+               LS1_512(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0));
+               LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+               LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+               LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+               quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+               /* Edon-R tweak on the original SHA-3 Edon-R submission. */
+               p[0] ^= d(8) ^ p0;
+               p[1] ^= d(9) ^ p1;
+               p[2] ^= d(10) ^ p2;
+               p[3] ^= d(11) ^ p3;
+               p[4] ^= d(12) ^ p4;
+               p[5] ^= d(13) ^ p5;
+               p[6] ^= d(14) ^ p6;
+               p[7] ^= d(15) ^ p7;
+               p[8] ^= d(0) ^ q0;
+               p[9] ^= d(1) ^ q1;
+               p[10] ^= d(2) ^ q2;
+               p[11] ^= d(3) ^ q3;
+               p[12] ^= d(4) ^ q4;
+               p[13] ^= d(5) ^ q5;
+               p[14] ^= d(6) ^ q6;
+               p[15] ^= d(7) ^ q7;
+       }
+
+#undef d
+       return (bitlen - bl);
+}
+
+void
+EdonRInit(EdonRState *state, size_t hashbitlen)
+{
+       ASSERT(EDONR_VALID_HASHBITLEN(hashbitlen));
+       switch (hashbitlen) {
+       case 224:
+               state->hashbitlen = 224;
+               state->bits_processed = 0;
+               state->unprocessed_bits = 0;
+               bcopy(i224p2, hashState224(state)->DoublePipe,
+                   16 * sizeof (uint32_t));
+               break;
+
+       case 256:
+               state->hashbitlen = 256;
+               state->bits_processed = 0;
+               state->unprocessed_bits = 0;
+               bcopy(i256p2, hashState256(state)->DoublePipe,
+                   16 * sizeof (uint32_t));
+               break;
+
+       case 384:
+               state->hashbitlen = 384;
+               state->bits_processed = 0;
+               state->unprocessed_bits = 0;
+               bcopy(i384p2, hashState384(state)->DoublePipe,
+                   16 * sizeof (uint64_t));
+               break;
+
+       case 512:
+               state->hashbitlen = 512;
+               state->bits_processed = 0;
+               state->unprocessed_bits = 0;
+               bcopy(i512p2, hashState224(state)->DoublePipe,
+                   16 * sizeof (uint64_t));
+               break;
+       }
+}
+
+
+void
+EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen)
+{
+       uint32_t *data32;
+       uint64_t *data64;
+
+       size_t bits_processed;
+
+       ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen));
+       switch (state->hashbitlen) {
+       case 224:
+       case 256:
+               if (state->unprocessed_bits > 0) {
+                       /* LastBytes = databitlen / 8 */
+                       int LastBytes = (int)databitlen >> 3;
+
+                       ASSERT(state->unprocessed_bits + databitlen <=
+                           EdonR256_BLOCK_SIZE * 8);
+
+                       bcopy(data, hashState256(state)->LastPart
+                           + (state->unprocessed_bits >> 3), LastBytes);
+                       state->unprocessed_bits += (int)databitlen;
+                       databitlen = state->unprocessed_bits;
+                       /* LINTED E_BAD_PTR_CAST_ALIGN */
+                       data32 = (uint32_t *)hashState256(state)->LastPart;
+               } else
+                       /* LINTED E_BAD_PTR_CAST_ALIGN */
+                       data32 = (uint32_t *)data;
+
+               bits_processed = Q256(databitlen, data32,
+                   hashState256(state)->DoublePipe);
+               state->bits_processed += bits_processed;
+               databitlen -= bits_processed;
+               state->unprocessed_bits = (int)databitlen;
+               if (databitlen > 0) {
+                       /* LastBytes = Ceil(databitlen / 8) */
+                       int LastBytes =
+                           ((~(((-(int)databitlen) >> 3) & 0x01ff)) +
+                           1) & 0x01ff;
+
+                       data32 += bits_processed >> 5;  /* byte size update */
+                       bcopy(data32, hashState256(state)->LastPart, LastBytes);
+               }
+               break;
+
+       case 384:
+       case 512:
+               if (state->unprocessed_bits > 0) {
+                       /* LastBytes = databitlen / 8 */
+                       int LastBytes = (int)databitlen >> 3;
+
+                       ASSERT(state->unprocessed_bits + databitlen <=
+                           EdonR512_BLOCK_SIZE * 8);
+
+                       bcopy(data, hashState512(state)->LastPart
+                           + (state->unprocessed_bits >> 3), LastBytes);
+                       state->unprocessed_bits += (int)databitlen;
+                       databitlen = state->unprocessed_bits;
+                       /* LINTED E_BAD_PTR_CAST_ALIGN */
+                       data64 = (uint64_t *)hashState512(state)->LastPart;
+               } else
+                       /* LINTED E_BAD_PTR_CAST_ALIGN */
+                       data64 = (uint64_t *)data;
+
+               bits_processed = Q512(databitlen, data64,
+                   hashState512(state)->DoublePipe);
+               state->bits_processed += bits_processed;
+               databitlen -= bits_processed;
+               state->unprocessed_bits = (int)databitlen;
+               if (databitlen > 0) {
+                       /* LastBytes = Ceil(databitlen / 8) */
+                       int LastBytes =
+                           ((~(((-(int)databitlen) >> 3) & 0x03ff)) +
+                           1) & 0x03ff;
+
+                       data64 += bits_processed >> 6;  /* byte size update */
+                       bcopy(data64, hashState512(state)->LastPart, LastBytes);
+               }
+               break;
+       }
+}
+
+void
+EdonRFinal(EdonRState *state, uint8_t *hashval)
+{
+       uint32_t *data32;
+       uint64_t *data64, num_bits;
+
+       size_t databitlen;
+       int LastByte, PadOnePosition;
+
+       num_bits = state->bits_processed + state->unprocessed_bits;
+       ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen));
+       switch (state->hashbitlen) {
+       case 224:
+       case 256:
+               LastByte = (int)state->unprocessed_bits >> 3;
+               PadOnePosition = 7 - (state->unprocessed_bits & 0x07);
+               hashState256(state)->LastPart[LastByte] =
+                   (hashState256(state)->LastPart[LastByte]
+                   & (0xff << (PadOnePosition + 1))) ^
+                   (0x01 << PadOnePosition);
+               /* LINTED E_BAD_PTR_CAST_ALIGN */
+               data64 = (uint64_t *)hashState256(state)->LastPart;
+
+               if (state->unprocessed_bits < 448) {
+                       (void) memset((hashState256(state)->LastPart) +
+                           LastByte + 1, 0x00,
+                           EdonR256_BLOCK_SIZE - LastByte - 9);
+                       databitlen = EdonR256_BLOCK_SIZE * 8;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+                       st_swap64(num_bits, data64 + 7);
+#else
+                       data64[7] = num_bits;
+#endif
+               } else {
+                       (void) memset((hashState256(state)->LastPart) +
+                           LastByte + 1, 0x00,
+                           EdonR256_BLOCK_SIZE * 2 - LastByte - 9);
+                       databitlen = EdonR256_BLOCK_SIZE * 16;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+                       st_swap64(num_bits, data64 + 15);
+#else
+                       data64[15] = num_bits;
+#endif
+               }
+
+               /* LINTED E_BAD_PTR_CAST_ALIGN */
+               data32 = (uint32_t *)hashState256(state)->LastPart;
+               state->bits_processed += Q256(databitlen, data32,
+                   hashState256(state)->DoublePipe);
+               break;
+
+       case 384:
+       case 512:
+               LastByte = (int)state->unprocessed_bits >> 3;
+               PadOnePosition = 7 - (state->unprocessed_bits & 0x07);
+               hashState512(state)->LastPart[LastByte] =
+                   (hashState512(state)->LastPart[LastByte]
+                   & (0xff << (PadOnePosition + 1))) ^
+                   (0x01 << PadOnePosition);
+               /* LINTED E_BAD_PTR_CAST_ALIGN */
+               data64 = (uint64_t *)hashState512(state)->LastPart;
+
+               if (state->unprocessed_bits < 960) {
+                       (void) memset((hashState512(state)->LastPart) +
+                           LastByte + 1, 0x00,
+                           EdonR512_BLOCK_SIZE - LastByte - 9);
+                       databitlen = EdonR512_BLOCK_SIZE * 8;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+                       st_swap64(num_bits, data64 + 15);
+#else
+                       data64[15] = num_bits;
+#endif
+               } else {
+                       (void) memset((hashState512(state)->LastPart) +
+                           LastByte + 1, 0x00,
+                           EdonR512_BLOCK_SIZE * 2 - LastByte - 9);
+                       databitlen = EdonR512_BLOCK_SIZE * 16;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+                       st_swap64(num_bits, data64 + 31);
+#else
+                       data64[31] = num_bits;
+#endif
+               }
+
+               state->bits_processed += Q512(databitlen, data64,
+                   hashState512(state)->DoublePipe);
+               break;
+       }
+
+       switch (state->hashbitlen) {
+       case 224: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+               uint32_t *d32 = (uint32_t *)hashval;
+               uint32_t *s32 = hashState224(state)->DoublePipe + 9;
+               int j;
+
+               for (j = 0; j < EdonR224_DIGEST_SIZE >> 2; j++)
+                       st_swap32(s32[j], d32 + j);
+#else
+               bcopy(hashState256(state)->DoublePipe + 9, hashval,
+                   EdonR224_DIGEST_SIZE);
+#endif
+               break;
+       }
+       case 256: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+               uint32_t *d32 = (uint32_t *)hashval;
+               uint32_t *s32 = hashState224(state)->DoublePipe + 8;
+               int j;
+
+               for (j = 0; j < EdonR256_DIGEST_SIZE >> 2; j++)
+                       st_swap32(s32[j], d32 + j);
+#else
+               bcopy(hashState256(state)->DoublePipe + 8, hashval,
+                   EdonR256_DIGEST_SIZE);
+#endif
+               break;
+       }
+       case 384: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+               uint64_t *d64 = (uint64_t *)hashval;
+               uint64_t *s64 = hashState384(state)->DoublePipe + 10;
+               int j;
+
+               for (j = 0; j < EdonR384_DIGEST_SIZE >> 3; j++)
+                       st_swap64(s64[j], d64 + j);
+#else
+               bcopy(hashState384(state)->DoublePipe + 10, hashval,
+                   EdonR384_DIGEST_SIZE);
+#endif
+               break;
+       }
+       case 512: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+               uint64_t *d64 = (uint64_t *)hashval;
+               uint64_t *s64 = hashState512(state)->DoublePipe + 8;
+               int j;
+
+               for (j = 0; j < EdonR512_DIGEST_SIZE >> 3; j++)
+                       st_swap64(s64[j], d64 + j);
+#else
+               bcopy(hashState512(state)->DoublePipe + 8, hashval,
+                   EdonR512_DIGEST_SIZE);
+#endif
+               break;
+       }
+       }
+}
+
+
+void
+EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen,
+    uint8_t *hashval)
+{
+       EdonRState state;
+
+       EdonRInit(&state, hashbitlen);
+       EdonRUpdate(&state, data, databitlen);
+       EdonRFinal(&state, hashval);
+}
+
+#ifdef _KERNEL
+EXPORT_SYMBOL(EdonRInit);
+EXPORT_SYMBOL(EdonRUpdate);
+EXPORT_SYMBOL(EdonRHash);
+EXPORT_SYMBOL(EdonRFinal);
+#endif
diff --git a/module/icp/algs/edonr/edonr_byteorder.h b/module/icp/algs/edonr/edonr_byteorder.h
new file mode 100644 (file)
index 0000000..d17e8f1
--- /dev/null
@@ -0,0 +1,216 @@
+/*
+ * IDI,NTNU
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen@ntnu.no>
+ *
+ * C header file to determine compile machine byte order. Take care when cross
+ * compiling.
+ *
+ * $Id: byteorder.h 517 2013-02-17 20:34:39Z joern $
+ */
+/*
+ * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
+ */
+
+#ifndef _CRYPTO_EDONR_BYTEORDER_H
+#define        _CRYPTO_EDONR_BYTEORDER_H
+
+
+#include <sys/param.h>
+
+#if defined(__BYTE_ORDER)
+#if (__BYTE_ORDER == __BIG_ENDIAN)
+#define        MACHINE_IS_BIG_ENDIAN
+#elif (__BYTE_ORDER == __LITTLE_ENDIAN)
+#define        MACHINE_IS_LITTLE_ENDIAN
+#endif
+#elif defined(BYTE_ORDER)
+#if (BYTE_ORDER == BIG_ENDIAN)
+#define        MACHINE_IS_BIG_ENDIAN
+#elif (BYTE_ORDER == LITTLE_ENDIAN)
+#define        MACHINE_IS_LITTLE_ENDIAN
+#endif
+#endif /* __BYTE_ORDER || BYTE_ORDER */
+
+#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN)
+#if defined(_BIG_ENDIAN) || defined(_MIPSEB)
+#define        MACHINE_IS_BIG_ENDIAN
+#endif
+#if defined(_LITTLE_ENDIAN) || defined(_MIPSEL)
+#define        MACHINE_IS_LITTLE_ENDIAN
+#endif
+#endif /* !MACHINE_IS_BIG_ENDIAN && !MACHINE_IS_LITTLE_ENDIAN */
+
+#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN)
+#error unknown machine byte sex
+#endif
+
+#define        BYTEORDER_INCLUDED
+
+#if defined(MACHINE_IS_BIG_ENDIAN)
+/*
+ * Byte swapping macros for big endian architectures and compilers,
+ * add as appropriate for other architectures and/or compilers.
+ *
+ *     ld_swap64(src,dst) : uint64_t dst = *(src)
+ *     st_swap64(src,dst) : *(dst)       = uint64_t src
+ */
+
+#if defined(__PPC__) || defined(_ARCH_PPC)
+
+#if defined(__64BIT__)
+#if defined(_ARCH_PWR7)
+#define        aix_ld_swap64(s64, d64)\
+       __asm__("ldbrx %0,0,%1" : "=r"(d64) : "r"(s64))
+#define        aix_st_swap64(s64, d64)\
+       __asm__ volatile("stdbrx %1,0,%0" : : "r"(d64), "r"(s64))
+#else
+#define        aix_ld_swap64(s64, d64)                                         \
+{                                                                      \
+       uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */   \
+                                                                       \
+       __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0;rldimi %1,%2,32,0"\
+               : "+r"(s4), "=r"(d64), "=r"(h) : "b"(s64));             \
+}
+
+#define        aix_st_swap64(s64, d64)                                         \
+{                                                                      \
+       uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */   \
+       h = (s64) >> 32;                                                \
+       __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0"   \
+               : "+r"(s4) : "r"(s64), "r"(h), "b"(d64));               \
+}
+#endif /* 64BIT && PWR7 */
+#else
+#define        aix_ld_swap64(s64, d64)                                         \
+{                                                                      \
+       uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\
+       __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0"              \
+               : "+r"(s4), "=r"(l), "=r"(h) : "b"(s64));               \
+       d64 = ((uint64_t)h<<32) | l;                                    \
+}
+
+#define        aix_st_swap64(s64, d64)                                         \
+{                                                                      \
+       uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\
+       l = (s64) & 0xfffffffful, h = (s64) >> 32;                      \
+       __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0"   \
+               : "+r"(s4) : "r"(l), "r"(h), "b"(d64));                 \
+}
+#endif /* __64BIT__ */
+#define        aix_ld_swap32(s32, d32)\
+       __asm__("lwbrx %0,0,%1" : "=r"(d32) : "r"(s32))
+#define        aix_st_swap32(s32, d32)\
+       __asm__ volatile("stwbrx %1,0,%0" : : "r"(d32), "r"(s32))
+#define        ld_swap32(s, d) aix_ld_swap32(s, d)
+#define        st_swap32(s, d) aix_st_swap32(s, d)
+#define        ld_swap64(s, d) aix_ld_swap64(s, d)
+#define        st_swap64(s, d) aix_st_swap64(s, d)
+#endif /* __PPC__ || _ARCH_PPC */
+
+#if defined(__sparc)
+#if !defined(__arch64__) && !defined(__sparcv8) && defined(__sparcv9)
+#define        __arch64__
+#endif
+#if defined(__GNUC__) || (defined(__SUNPRO_C) && __SUNPRO_C > 0x590)
+/* need Sun Studio C 5.10 and above for GNU inline assembly */
+#if defined(__arch64__)
+#define        sparc_ld_swap64(s64, d64)                                       \
+       __asm__("ldxa [%1]0x88,%0" : "=r"(d64) : "r"(s64))
+#define        sparc_st_swap64(s64, d64)                                       \
+       __asm__ volatile("stxa %0,[%1]0x88" : : "r"(s64), "r"(d64))
+#define        st_swap64(s, d) sparc_st_swap64(s, d)
+#else
+#define        sparc_ld_swap64(s64, d64)                                       \
+{                                                                      \
+       uint32_t *s4, h, l;                                             \
+       __asm__("add %3,4,%0\n\tlda [%3]0x88,%1\n\tlda [%0]0x88,%2"     \
+               : "+r"(s4), "=r"(l), "=r"(h) : "r"(s64));               \
+       d64 = ((uint64_t)h<<32) | l;                                    \
+}
+#define        sparc_st_swap64(s64, d64)                                       \
+{                                                                      \
+       uint32_t *s4, h, l;                                             \
+       l = (s64) & 0xfffffffful, h = (s64) >> 32;                      \
+       __asm__ volatile("add %3,4,%0\n\tsta %1,[%3]0x88\n\tsta %2,[%0]0x88"\
+               : "+r"(s4) : "r"(l), "r"(h), "r"(d64));                 \
+}
+#endif /* sparc64 */
+#define        sparc_ld_swap32(s32, d32)\
+       __asm__("lda [%1]0x88,%0" : "=r"(d32) : "r"(s32))
+#define        sparc_st_swap32(s32, d32)\
+       __asm__ volatile("sta %0,[%1]0x88" : : "r"(s32), "r"(d32))
+#define        ld_swap32(s, d) sparc_ld_swap32(s, d)
+#define        st_swap32(s, d) sparc_st_swap32(s, d)
+#define        ld_swap64(s, d) sparc_ld_swap64(s, d)
+#define        st_swap64(s, d) sparc_st_swap64(s, d)
+#endif /* GCC || Sun Studio C > 5.9 */
+#endif /* sparc */
+
+/* GCC fallback */
+#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap32)
+#define        ld_swap32(s, d) (d = __builtin_bswap32(*(s)))
+#define        st_swap32(s, d) (*(d) = __builtin_bswap32(s))
+#endif /* GCC4/PGIC && !swap32 */
+#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap64)
+#define        ld_swap64(s, d) (d = __builtin_bswap64(*(s)))
+#define        st_swap64(s, d) (*(d) = __builtin_bswap64(s))
+#endif /* GCC4/PGIC && !swap64 */
+
+/* generic fallback */
+#if !defined(ld_swap32)
+#define        ld_swap32(s, d)                                                 \
+       (d = (*(s) >> 24) | (*(s) >> 8 & 0xff00) |                      \
+       (*(s) << 8 & 0xff0000) | (*(s) << 24))
+#define        st_swap32(s, d)                                                 \
+       (*(d) = ((s) >> 24) | ((s) >> 8 & 0xff00) |                     \
+       ((s) << 8 & 0xff0000) | ((s) << 24))
+#endif
+#if !defined(ld_swap64)
+#define        ld_swap64(s, d)                                                 \
+       (d = (*(s) >> 56) | (*(s) >> 40 & 0xff00) |                     \
+       (*(s) >> 24 & 0xff0000) | (*(s) >> 8 & 0xff000000) |            \
+       (*(s) & 0xff000000) << 8 | (*(s) & 0xff0000) << 24 |            \
+       (*(s) & 0xff00) << 40 | *(s) << 56)
+#define        st_swap64(s, d)                                                 \
+       (*(d) = ((s) >> 56) | ((s) >> 40 & 0xff00) |                    \
+       ((s) >> 24 & 0xff0000) | ((s) >> 8 & 0xff000000) |              \
+       ((s) & 0xff000000) << 8 | ((s) & 0xff0000) << 24 |              \
+       ((s) & 0xff00) << 40 | (s) << 56)
+#endif
+
+#endif /* MACHINE_IS_BIG_ENDIAN */
+
+
+#if defined(MACHINE_IS_LITTLE_ENDIAN)
+/* replace swaps with simple assignments on little endian systems */
+#undef ld_swap32
+#undef st_swap32
+#define        ld_swap32(s, d) (d = *(s))
+#define        st_swap32(s, d) (*(d) = s)
+#undef ld_swap64
+#undef st_swap64
+#define        ld_swap64(s, d) (d = *(s))
+#define        st_swap64(s, d) (*(d) = s)
+#endif /* MACHINE_IS_LITTLE_ENDIAN */
+
+#endif /* _CRYPTO_EDONR_BYTEORDER_H */
index 792ca8825cbeb7d9e5bc3beac6bd8ab955e7caad..dbe008190688f4966b70945a593906f9e2d4321b 100644 (file)
@@ -38,7 +38,7 @@
 
 #include <sys/zfs_context.h>
 #define        _SHA2_IMPL
-#include <sha2/sha2.h>
+#include <sys/sha2.h>
 #include <sha2/sha2_consts.h>
 
 #define        _RESTRICT_KYWD
 #include <sys/byteorder.h>
 #define        HAVE_HTONL
 #endif
+#include <sys/isa_defs.h>      /* for _ILP32 */
 
 static void Encode(uint8_t *, uint32_t *, size_t);
+static void Encode64(uint8_t *, uint64_t *, size_t);
 
 #if    defined(__amd64)
+#define        SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
 #define        SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
+
+void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
 void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
+
 #else
 static void SHA256Transform(SHA2_CTX *, const uint8_t *);
+static void SHA512Transform(SHA2_CTX *, const uint8_t *);
 #endif /* __amd64 */
 
 static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
 
+/*
+ * The low-level checksum routines use a lot of stack space. On systems where
+ * small stacks are enforced (like 32-bit kernel builds), insert compiler memory
+ * barriers to reduce stack frame size. This can reduce the SHA512Transform()
+ * stack frame usage from 3k to <1k on ARM32, for example.
+ */
+#if defined(_ILP32) || defined(__powerpc)      /* small stack */
+#define        SMALL_STACK_MEMORY_BARRIER      asm volatile("": : :"memory");
+#else
+#define        SMALL_STACK_MEMORY_BARRIER
+#endif
+
 /* Ch and Maj are the basic SHA2 functions. */
 #define        Ch(b, c, d)     (((b) & (c)) ^ ((~b) & (d)))
 #define        Maj(b, c, d)    (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
@@ -82,6 +101,18 @@ static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
        T2 = BIGSIGMA0_256(a) + Maj(a, b, c);                           \
        h = T1 + T2
 
+/* SHA384/512 Functions */
+#define        BIGSIGMA0(x)    (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
+#define        BIGSIGMA1(x)    (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
+#define        SIGMA0(x)       (ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
+#define        SIGMA1(x)       (ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
+#define        SHA512ROUND(a, b, c, d, e, f, g, h, i, w)                       \
+       T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;      \
+       d += T1;                                                        \
+       T2 = BIGSIGMA0(a) + Maj(a, b, c);                               \
+       h = T1 + T2;                                                    \
+       SMALL_STACK_MEMORY_BARRIER;
+
 /*
  * sparc optimization:
  *
@@ -130,6 +161,33 @@ SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
        uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
        uint32_t T1, T2;
 
+#if    defined(__sparc)
+       static const uint32_t sha256_consts[] = {
+               SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
+               SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
+               SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
+               SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
+               SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
+               SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
+               SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
+               SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
+               SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
+               SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
+               SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
+               SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
+               SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
+               SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
+               SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
+               SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
+               SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
+               SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
+               SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
+               SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
+               SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
+               SHA256_CONST_63
+       };
+#endif /* __sparc */
+
        if ((uintptr_t)blk & 0x3) {             /* not 4-byte aligned? */
                bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
                blk = (uint8_t *)ctx->buf_un.buf32;
@@ -292,6 +350,256 @@ SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
        ctx->state.s32[6] += g;
        ctx->state.s32[7] += h;
 }
+
+
+/* SHA384 and SHA512 Transform */
+
+static void
+SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
+{
+
+       uint64_t a = ctx->state.s64[0];
+       uint64_t b = ctx->state.s64[1];
+       uint64_t c = ctx->state.s64[2];
+       uint64_t d = ctx->state.s64[3];
+       uint64_t e = ctx->state.s64[4];
+       uint64_t f = ctx->state.s64[5];
+       uint64_t g = ctx->state.s64[6];
+       uint64_t h = ctx->state.s64[7];
+
+       uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
+       uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
+       uint64_t T1, T2;
+
+#if    defined(__sparc)
+       static const uint64_t sha512_consts[] = {
+               SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
+               SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
+               SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
+               SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
+               SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
+               SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
+               SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
+               SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
+               SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
+               SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
+               SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
+               SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
+               SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
+               SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
+               SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
+               SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
+               SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
+               SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
+               SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
+               SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
+               SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
+               SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
+               SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
+               SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
+               SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
+               SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
+               SHA512_CONST_78, SHA512_CONST_79
+       };
+#endif /* __sparc */
+
+
+       if ((uintptr_t)blk & 0x7) {             /* not 8-byte aligned? */
+               bcopy(blk, ctx->buf_un.buf64,  sizeof (ctx->buf_un.buf64));
+               blk = (uint8_t *)ctx->buf_un.buf64;
+       }
+
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w0 =  LOAD_BIG_64(blk + 8 * 0);
+       SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w1 =  LOAD_BIG_64(blk + 8 * 1);
+       SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w2 =  LOAD_BIG_64(blk + 8 * 2);
+       SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w3 =  LOAD_BIG_64(blk + 8 * 3);
+       SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w4 =  LOAD_BIG_64(blk + 8 * 4);
+       SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w5 =  LOAD_BIG_64(blk + 8 * 5);
+       SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w6 =  LOAD_BIG_64(blk + 8 * 6);
+       SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w7 =  LOAD_BIG_64(blk + 8 * 7);
+       SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w8 =  LOAD_BIG_64(blk + 8 * 8);
+       SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w9 =  LOAD_BIG_64(blk + 8 * 9);
+       SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w10 =  LOAD_BIG_64(blk + 8 * 10);
+       SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w11 =  LOAD_BIG_64(blk + 8 * 11);
+       SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w12 =  LOAD_BIG_64(blk + 8 * 12);
+       SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w13 =  LOAD_BIG_64(blk + 8 * 13);
+       SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w14 =  LOAD_BIG_64(blk + 8 * 14);
+       SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
+       /* LINTED E_BAD_PTR_CAST_ALIGN */
+       w15 =  LOAD_BIG_64(blk + 8 * 15);
+       SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
+
+       w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+       SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
+       w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+       SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
+       w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+       SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
+       w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+       SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
+       w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+       SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
+       w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+       SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
+       w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+       SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
+       w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+       SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
+       w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+       SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
+       w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+       SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
+       w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+       SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
+       w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+       SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
+       w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+       SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
+       w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+       SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
+       w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+       SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
+       w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+       SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
+
+       w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+       SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
+       w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+       SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
+       w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+       SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
+       w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+       SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
+       w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+       SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
+       w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+       SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
+       w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+       SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
+       w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+       SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
+       w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+       SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
+       w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+       SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
+       w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+       SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
+       w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+       SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
+       w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+       SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
+       w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+       SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
+       w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+       SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
+       w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+       SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
+
+       w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+       SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
+       w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+       SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
+       w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+       SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
+       w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+       SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
+       w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+       SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
+       w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+       SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
+       w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+       SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
+       w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+       SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
+       w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+       SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
+       w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+       SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
+       w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+       SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
+       w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+       SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
+       w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+       SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
+       w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+       SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
+       w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+       SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
+       w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+       SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
+
+       w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+       SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
+       w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+       SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
+       w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+       SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
+       w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+       SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
+       w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+       SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
+       w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+       SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
+       w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+       SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
+       w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+       SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
+       w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+       SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
+       w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+       SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
+       w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+       SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
+       w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+       SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
+       w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+       SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
+       w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+       SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
+       w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+       SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
+       w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+       SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
+
+       ctx->state.s64[0] += a;
+       ctx->state.s64[1] += b;
+       ctx->state.s64[2] += c;
+       ctx->state.s64[3] += d;
+       ctx->state.s64[4] += e;
+       ctx->state.s64[5] += f;
+       ctx->state.s64[6] += g;
+       ctx->state.s64[7] += h;
+
+}
 #endif /* !__amd64 */
 
 
@@ -311,14 +619,56 @@ Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
 {
        size_t          i, j;
 
-       for (i = 0, j = 0; j < len; i++, j += 4) {
-               output[j]       = (input[i] >> 24) & 0xff;
-               output[j + 1]   = (input[i] >> 16) & 0xff;
-               output[j + 2]   = (input[i] >>  8) & 0xff;
-               output[j + 3]   = input[i] & 0xff;
+#if    defined(__sparc)
+       if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
+               for (i = 0, j = 0; j < len; i++, j += 4) {
+                       /* LINTED E_BAD_PTR_CAST_ALIGN */
+                       *((uint32_t *)(output + j)) = input[i];
+               }
+       } else {
+#endif /* little endian -- will work on big endian, but slowly */
+               for (i = 0, j = 0; j < len; i++, j += 4) {
+                       output[j]       = (input[i] >> 24) & 0xff;
+                       output[j + 1]   = (input[i] >> 16) & 0xff;
+                       output[j + 2]   = (input[i] >>  8) & 0xff;
+                       output[j + 3]   = input[i] & 0xff;
+               }
+#if    defined(__sparc)
        }
+#endif
 }
 
+static void
+Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
+    size_t len)
+{
+       size_t          i, j;
+
+#if    defined(__sparc)
+       if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
+               for (i = 0, j = 0; j < len; i++, j += 8) {
+                       /* LINTED E_BAD_PTR_CAST_ALIGN */
+                       *((uint64_t *)(output + j)) = input[i];
+               }
+       } else {
+#endif /* little endian -- will work on big endian, but slowly */
+               for (i = 0, j = 0; j < len; i++, j += 8) {
+
+                       output[j]       = (input[i] >> 56) & 0xff;
+                       output[j + 1]   = (input[i] >> 48) & 0xff;
+                       output[j + 2]   = (input[i] >> 40) & 0xff;
+                       output[j + 3]   = (input[i] >> 32) & 0xff;
+                       output[j + 4]   = (input[i] >> 24) & 0xff;
+                       output[j + 5]   = (input[i] >> 16) & 0xff;
+                       output[j + 6]   = (input[i] >>  8) & 0xff;
+                       output[j + 7]   = input[i] & 0xff;
+               }
+#if    defined(__sparc)
+       }
+#endif
+}
+
+
 void
 SHA2Init(uint64_t mech, SHA2_CTX *ctx)
 {
@@ -336,22 +686,86 @@ SHA2Init(uint64_t mech, SHA2_CTX *ctx)
                ctx->state.s32[6] = 0x1f83d9abU;
                ctx->state.s32[7] = 0x5be0cd19U;
                break;
+       case SHA384_MECH_INFO_TYPE:
+       case SHA384_HMAC_MECH_INFO_TYPE:
+       case SHA384_HMAC_GEN_MECH_INFO_TYPE:
+               ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
+               ctx->state.s64[1] = 0x629a292a367cd507ULL;
+               ctx->state.s64[2] = 0x9159015a3070dd17ULL;
+               ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
+               ctx->state.s64[4] = 0x67332667ffc00b31ULL;
+               ctx->state.s64[5] = 0x8eb44a8768581511ULL;
+               ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
+               ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
+               break;
+       case SHA512_MECH_INFO_TYPE:
+       case SHA512_HMAC_MECH_INFO_TYPE:
+       case SHA512_HMAC_GEN_MECH_INFO_TYPE:
+               ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
+               ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
+               ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
+               ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
+               ctx->state.s64[4] = 0x510e527fade682d1ULL;
+               ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
+               ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
+               ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
+               break;
+       case SHA512_224_MECH_INFO_TYPE:
+               ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
+               ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
+               ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
+               ctx->state.s64[3] = 0x679DD514582F9FCFULL;
+               ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
+               ctx->state.s64[5] = 0x77E36F7304C48942ULL;
+               ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
+               ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
+               break;
+       case SHA512_256_MECH_INFO_TYPE:
+               ctx->state.s64[0] = 0x22312194FC2BF72CULL;
+               ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
+               ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
+               ctx->state.s64[3] = 0x963877195940EABDULL;
+               ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
+               ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
+               ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
+               ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
+               break;
+#ifdef _KERNEL
        default:
                cmn_err(CE_PANIC,
                    "sha2_init: failed to find a supported algorithm: 0x%x",
                    (uint32_t)mech);
+
+#endif /* _KERNEL */
        }
 
        ctx->algotype = (uint32_t)mech;
        ctx->count.c64[0] = ctx->count.c64[1] = 0;
 }
 
+#ifndef _KERNEL
+
+// #pragma inline(SHA256Init, SHA384Init, SHA512Init)
 void
 SHA256Init(SHA256_CTX *ctx)
 {
        SHA2Init(SHA256, ctx);
 }
 
+void
+SHA384Init(SHA384_CTX *ctx)
+{
+       SHA2Init(SHA384, ctx);
+}
+
+void
+SHA512Init(SHA512_CTX *ctx)
+{
+       SHA2Init(SHA512, ctx);
+}
+
+#endif /* _KERNEL */
+
 /*
  * SHA2Update()
  *
@@ -422,6 +836,8 @@ SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
                        bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
                        if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
                                SHA256Transform(ctx, ctx->buf_un.buf8);
+                       else
+                               SHA512Transform(ctx, ctx->buf_un.buf8);
 
                        i = buf_len;
                }
@@ -431,6 +847,10 @@ SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
                        for (; i + buf_limit - 1 < input_len; i += buf_limit) {
                                SHA256Transform(ctx, &input[i]);
                        }
+               } else {
+                       for (; i + buf_limit - 1 < input_len; i += buf_limit) {
+                               SHA512Transform(ctx, &input[i]);
+                       }
                }
 
 #else
@@ -441,6 +861,13 @@ SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
                                    block_count);
                                i += block_count << 6;
                        }
+               } else {
+                       block_count = (input_len - i) >> 7;
+                       if (block_count > 0) {
+                               SHA512TransformBlocks(ctx, &input[i],
+                                   block_count);
+                               i += block_count << 7;
+                       }
                }
 #endif /* !__amd64 */
 
@@ -479,6 +906,7 @@ void
 SHA2Final(void *digest, SHA2_CTX *ctx)
 {
        uint8_t         bitcount_be[sizeof (ctx->count.c32)];
+       uint8_t         bitcount_be64[sizeof (ctx->count.c64)];
        uint32_t        index;
        uint32_t        algotype = ctx->algotype;
 
@@ -488,8 +916,45 @@ SHA2Final(void *digest, SHA2_CTX *ctx)
                SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
                SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
                Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
+       } else {
+               index  = (ctx->count.c64[1] >> 3) & 0x7f;
+               Encode64(bitcount_be64, ctx->count.c64,
+                   sizeof (bitcount_be64));
+               SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
+               SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
+               if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
+                       ctx->state.s64[6] = ctx->state.s64[7] = 0;
+                       Encode64(digest, ctx->state.s64,
+                           sizeof (uint64_t) * 6);
+               } else if (algotype == SHA512_224_MECH_INFO_TYPE) {
+                       uint8_t last[sizeof (uint64_t)];
+                       /*
+                        * Since SHA-512/224 doesn't align well to 64-bit
+                        * boundaries, we must do the encoding in three steps:
+                        * 1) encode the three 64-bit words that fit neatly
+                        * 2) encode the last 64-bit word to a temp buffer
+                        * 3) chop out the lower 32-bits from the temp buffer
+                        *    and append them to the digest
+                        */
+                       Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
+                       Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
+                       bcopy(last, (uint8_t *)digest + 24, 4);
+               } else if (algotype == SHA512_256_MECH_INFO_TYPE) {
+                       Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
+               } else {
+                       Encode64(digest, ctx->state.s64,
+                           sizeof (ctx->state.s64));
+               }
        }
 
        /* zeroize sensitive information */
        bzero(ctx, sizeof (*ctx));
 }
+
+
+
+#ifdef _KERNEL
+EXPORT_SYMBOL(SHA2Init);
+EXPORT_SYMBOL(SHA2Update);
+EXPORT_SYMBOL(SHA2Final);
+#endif
diff --git a/module/icp/algs/skein/THIRDPARTYLICENSE b/module/icp/algs/skein/THIRDPARTYLICENSE
new file mode 100644 (file)
index 0000000..b7434fd
--- /dev/null
@@ -0,0 +1,3 @@
+Implementation of the Skein hash function.
+Source code author: Doug Whiting, 2008.
+This algorithm and source code is released to the public domain.
diff --git a/module/icp/algs/skein/THIRDPARTYLICENSE.descrip b/module/icp/algs/skein/THIRDPARTYLICENSE.descrip
new file mode 100644 (file)
index 0000000..0ae89cf
--- /dev/null
@@ -0,0 +1 @@
+LICENSE TERMS OF SKEIN HASH ALGORITHM IMPLEMENTATION
diff --git a/module/icp/algs/skein/skein.c b/module/icp/algs/skein/skein.c
new file mode 100644 (file)
index 0000000..0981eee
--- /dev/null
@@ -0,0 +1,921 @@
+/*
+ * Implementation of the Skein hash function.
+ * Source code author: Doug Whiting, 2008.
+ * This algorithm and source code is released to the public domain.
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#define        SKEIN_PORT_CODE         /* instantiate any code in skein_port.h */
+
+#include <sys/types.h>
+#include <sys/note.h>
+#include <sys/skein.h>         /* get the Skein API definitions   */
+#include "skein_impl.h"                /* get internal definitions */
+
+/* External function to process blkCnt (nonzero) full block(s) of data. */
+void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr,
+    size_t blkCnt, size_t byteCntAdd);
+void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr,
+    size_t blkCnt, size_t byteCntAdd);
+void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr,
+    size_t blkCnt, size_t byteCntAdd);
+
+/* 256-bit Skein */
+/* init the context for a straight hashing operation  */
+int
+Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen)
+{
+       union {
+               uint8_t b[SKEIN_256_STATE_BYTES];
+               uint64_t w[SKEIN_256_STATE_WORDS];
+       } cfg;                  /* config block */
+
+       Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+       ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+       switch (hashBitLen) {   /* use pre-computed values, where available */
+#ifndef        SKEIN_NO_PRECOMP
+       case 256:
+               bcopy(SKEIN_256_IV_256, ctx->X, sizeof (ctx->X));
+               break;
+       case 224:
+               bcopy(SKEIN_256_IV_224, ctx->X, sizeof (ctx->X));
+               break;
+       case 160:
+               bcopy(SKEIN_256_IV_160, ctx->X, sizeof (ctx->X));
+               break;
+       case 128:
+               bcopy(SKEIN_256_IV_128, ctx->X, sizeof (ctx->X));
+               break;
+#endif
+       default:
+               /* here if there is no precomputed IV value available */
+               /*
+                * build/process the config block, type == CONFIG (could be
+                * precomputed)
+                */
+               /* set tweaks: T0=0; T1=CFG | FINAL */
+               Skein_Start_New_Type(ctx, CFG_FINAL);
+
+               /* set the schema, version */
+               cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+               /* hash result length in bits */
+               cfg.w[1] = Skein_Swap64(hashBitLen);
+               cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+               /* zero pad config block */
+               bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+
+               /* compute the initial chaining values from config block */
+               /* zero the chaining variables */
+               bzero(ctx->X, sizeof (ctx->X));
+               Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+               break;
+       }
+       /*
+        * The chaining vars ctx->X are now initialized for the given
+        * hashBitLen.
+        * Set up to process the data message portion of the hash (default)
+        */
+       Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+       return (SKEIN_SUCCESS);
+}
+
+/* init the context for a MAC and/or tree hash operation */
+/*
+ * [identical to Skein_256_Init() when keyBytes == 0 &&
+ * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL]
+ */
+int
+Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
+    const uint8_t *key, size_t keyBytes)
+{
+       union {
+               uint8_t b[SKEIN_256_STATE_BYTES];
+               uint64_t w[SKEIN_256_STATE_WORDS];
+       } cfg;                  /* config block */
+
+       Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+       Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL);
+
+       /* compute the initial chaining values ctx->X[], based on key */
+       if (keyBytes == 0) {    /* is there a key? */
+               /* no key: use all zeroes as key for config block */
+               bzero(ctx->X, sizeof (ctx->X));
+       } else {                /* here to pre-process a key */
+
+               Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
+               /* do a mini-Init right here */
+               /* set output hash bit count = state size */
+               ctx->h.hashBitLen = 8 * sizeof (ctx->X);
+               /* set tweaks: T0 = 0; T1 = KEY type */
+               Skein_Start_New_Type(ctx, KEY);
+               /* zero the initial chaining variables */
+               bzero(ctx->X, sizeof (ctx->X));
+               /* hash the key */
+               (void) Skein_256_Update(ctx, key, keyBytes);
+               /* put result into cfg.b[] */
+               (void) Skein_256_Final_Pad(ctx, cfg.b);
+               /* copy over into ctx->X[] */
+               bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+#if    SKEIN_NEED_SWAP
+               {
+                       uint_t i;
+                       /* convert key bytes to context words */
+                       for (i = 0; i < SKEIN_256_STATE_WORDS; i++)
+                               ctx->X[i] = Skein_Swap64(ctx->X[i]);
+               }
+#endif
+       }
+       /*
+        * build/process the config block, type == CONFIG (could be
+        * precomputed for each key)
+        */
+       ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+       Skein_Start_New_Type(ctx, CFG_FINAL);
+
+       bzero(&cfg.w, sizeof (cfg.w));  /* pre-pad cfg.w[] with zeroes */
+       cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+       cfg.w[1] = Skein_Swap64(hashBitLen);    /* hash result length in bits */
+       /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+       cfg.w[2] = Skein_Swap64(treeInfo);
+
+       Skein_Show_Key(256, &ctx->h, key, keyBytes);
+
+       /* compute the initial chaining values from config block */
+       Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+
+       /* The chaining vars ctx->X are now initialized */
+       /* Set up to process the data message portion of the hash (default) */
+       ctx->h.bCnt = 0;        /* buffer b[] starts out empty */
+       Skein_Start_New_Type(ctx, MSG);
+
+       return (SKEIN_SUCCESS);
+}
+
+/* process the input bytes */
+int
+Skein_256_Update(Skein_256_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
+{
+       size_t n;
+
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+       /* process full blocks, if any */
+       if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES) {
+               /* finish up any buffered message data */
+               if (ctx->h.bCnt) {
+                       /* # bytes free in buffer b[] */
+                       n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt;
+                       if (n) {
+                               /* check on our logic here */
+                               Skein_assert(n < msgByteCnt);
+                               bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+                               msgByteCnt -= n;
+                               msg += n;
+                               ctx->h.bCnt += n;
+                       }
+                       Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES);
+                       Skein_256_Process_Block(ctx, ctx->b, 1,
+                           SKEIN_256_BLOCK_BYTES);
+                       ctx->h.bCnt = 0;
+               }
+               /*
+                * now process any remaining full blocks, directly from input
+                * message data
+                */
+               if (msgByteCnt > SKEIN_256_BLOCK_BYTES) {
+                       /* number of full blocks to process */
+                       n = (msgByteCnt - 1) / SKEIN_256_BLOCK_BYTES;
+                       Skein_256_Process_Block(ctx, msg, n,
+                           SKEIN_256_BLOCK_BYTES);
+                       msgByteCnt -= n * SKEIN_256_BLOCK_BYTES;
+                       msg += n * SKEIN_256_BLOCK_BYTES;
+               }
+               Skein_assert(ctx->h.bCnt == 0);
+       }
+
+       /* copy any remaining source message data bytes into b[] */
+       if (msgByteCnt) {
+               Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES);
+               bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+               ctx->h.bCnt += msgByteCnt;
+       }
+
+       return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the result */
+int
+Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
+{
+       size_t i, n, byteCnt;
+       uint64_t X[SKEIN_256_STATE_WORDS];
+
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+       ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;     /* tag as the final block */
+       /* zero pad b[] if necessary */
+       if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)
+               bzero(&ctx->b[ctx->h.bCnt],
+                   SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+
+       /* process the final block */
+       Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+       /* now output the result */
+       /* total number of output bytes */
+       byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+       /* run Threefish in "counter mode" to generate output */
+       /* zero out b[], so it can hold the counter */
+       bzero(ctx->b, sizeof (ctx->b));
+       /* keep a local copy of counter mode "key" */
+       bcopy(ctx->X, X, sizeof (X));
+       for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) {
+               /* build the counter block */
+               uint64_t tmp = Skein_Swap64((uint64_t)i);
+               bcopy(&tmp, ctx->b, sizeof (tmp));
+               Skein_Start_New_Type(ctx, OUT_FINAL);
+               /* run "counter mode" */
+               Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+               /* number of output bytes left to go */
+               n = byteCnt - i * SKEIN_256_BLOCK_BYTES;
+               if (n >= SKEIN_256_BLOCK_BYTES)
+                       n = SKEIN_256_BLOCK_BYTES;
+               Skein_Put64_LSB_First(hashVal + i * SKEIN_256_BLOCK_BYTES,
+                   ctx->X, n); /* "output" the ctr mode bytes */
+               Skein_Show_Final(256, &ctx->h, n,
+                   hashVal + i * SKEIN_256_BLOCK_BYTES);
+               /* restore the counter mode key for next time */
+               bcopy(X, ctx->X, sizeof (X));
+       }
+       return (SKEIN_SUCCESS);
+}
+
+/* 512-bit Skein */
+
+/* init the context for a straight hashing operation  */
+int
+Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
+{
+       union {
+               uint8_t b[SKEIN_512_STATE_BYTES];
+               uint64_t w[SKEIN_512_STATE_WORDS];
+       } cfg;                  /* config block */
+
+       Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+       ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+       switch (hashBitLen) {   /* use pre-computed values, where available */
+#ifndef        SKEIN_NO_PRECOMP
+       case 512:
+               bcopy(SKEIN_512_IV_512, ctx->X, sizeof (ctx->X));
+               break;
+       case 384:
+               bcopy(SKEIN_512_IV_384, ctx->X, sizeof (ctx->X));
+               break;
+       case 256:
+               bcopy(SKEIN_512_IV_256, ctx->X, sizeof (ctx->X));
+               break;
+       case 224:
+               bcopy(SKEIN_512_IV_224, ctx->X, sizeof (ctx->X));
+               break;
+#endif
+       default:
+               /*
+                * here if there is no precomputed IV value available
+                * build/process the config block, type == CONFIG (could be
+                * precomputed)
+                */
+               /* set tweaks: T0=0; T1=CFG | FINAL */
+               Skein_Start_New_Type(ctx, CFG_FINAL);
+
+               /* set the schema, version */
+               cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+               /* hash result length in bits */
+               cfg.w[1] = Skein_Swap64(hashBitLen);
+               cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+               /* zero pad config block */
+               bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+
+               /* compute the initial chaining values from config block */
+               /* zero the chaining variables */
+               bzero(ctx->X, sizeof (ctx->X));
+               Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+               break;
+       }
+
+       /*
+        * The chaining vars ctx->X are now initialized for the given
+        * hashBitLen. Set up to process the data message portion of the
+        * hash (default)
+        */
+       Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+       return (SKEIN_SUCCESS);
+}
+
+/* init the context for a MAC and/or tree hash operation */
+/*
+ * [identical to Skein_512_Init() when keyBytes == 0 &&
+ * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL]
+ */
+int
+Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
+    const uint8_t *key, size_t keyBytes)
+{
+       union {
+               uint8_t b[SKEIN_512_STATE_BYTES];
+               uint64_t w[SKEIN_512_STATE_WORDS];
+       } cfg;                  /* config block */
+
+       Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+       Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL);
+
+       /* compute the initial chaining values ctx->X[], based on key */
+       if (keyBytes == 0) {    /* is there a key? */
+               /* no key: use all zeroes as key for config block */
+               bzero(ctx->X, sizeof (ctx->X));
+       } else {                /* here to pre-process a key */
+
+               Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
+               /* do a mini-Init right here */
+               /* set output hash bit count = state size */
+               ctx->h.hashBitLen = 8 * sizeof (ctx->X);
+               /* set tweaks: T0 = 0; T1 = KEY type */
+               Skein_Start_New_Type(ctx, KEY);
+               /* zero the initial chaining variables */
+               bzero(ctx->X, sizeof (ctx->X));
+               (void) Skein_512_Update(ctx, key, keyBytes); /* hash the key */
+               /* put result into cfg.b[] */
+               (void) Skein_512_Final_Pad(ctx, cfg.b);
+               /* copy over into ctx->X[] */
+               bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+#if    SKEIN_NEED_SWAP
+               {
+                       uint_t i;
+                       /* convert key bytes to context words */
+                       for (i = 0; i < SKEIN_512_STATE_WORDS; i++)
+                               ctx->X[i] = Skein_Swap64(ctx->X[i]);
+               }
+#endif
+       }
+       /*
+        * build/process the config block, type == CONFIG (could be
+        * precomputed for each key)
+        */
+       ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+       Skein_Start_New_Type(ctx, CFG_FINAL);
+
+       bzero(&cfg.w, sizeof (cfg.w));  /* pre-pad cfg.w[] with zeroes */
+       cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+       cfg.w[1] = Skein_Swap64(hashBitLen);    /* hash result length in bits */
+       /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+       cfg.w[2] = Skein_Swap64(treeInfo);
+
+       Skein_Show_Key(512, &ctx->h, key, keyBytes);
+
+       /* compute the initial chaining values from config block */
+       Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+
+       /* The chaining vars ctx->X are now initialized */
+       /* Set up to process the data message portion of the hash (default) */
+       ctx->h.bCnt = 0;        /* buffer b[] starts out empty */
+       Skein_Start_New_Type(ctx, MSG);
+
+       return (SKEIN_SUCCESS);
+}
+
+/* process the input bytes */
+int
+Skein_512_Update(Skein_512_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
+{
+       size_t n;
+
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+       /* process full blocks, if any */
+       if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) {
+               /* finish up any buffered message data */
+               if (ctx->h.bCnt) {
+                       /* # bytes free in buffer b[] */
+                       n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;
+                       if (n) {
+                               /* check on our logic here */
+                               Skein_assert(n < msgByteCnt);
+                               bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+                               msgByteCnt -= n;
+                               msg += n;
+                               ctx->h.bCnt += n;
+                       }
+                       Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
+                       Skein_512_Process_Block(ctx, ctx->b, 1,
+                           SKEIN_512_BLOCK_BYTES);
+                       ctx->h.bCnt = 0;
+               }
+               /*
+                * now process any remaining full blocks, directly from input
+                * message data
+                */
+               if (msgByteCnt > SKEIN_512_BLOCK_BYTES) {
+                       /* number of full blocks to process */
+                       n = (msgByteCnt - 1) / SKEIN_512_BLOCK_BYTES;
+                       Skein_512_Process_Block(ctx, msg, n,
+                           SKEIN_512_BLOCK_BYTES);
+                       msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
+                       msg += n * SKEIN_512_BLOCK_BYTES;
+               }
+               Skein_assert(ctx->h.bCnt == 0);
+       }
+
+       /* copy any remaining source message data bytes into b[] */
+       if (msgByteCnt) {
+               Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
+               bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+               ctx->h.bCnt += msgByteCnt;
+       }
+
+       return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the result */
+int
+Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
+{
+       size_t i, n, byteCnt;
+       uint64_t X[SKEIN_512_STATE_WORDS];
+
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+       ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;     /* tag as the final block */
+       /* zero pad b[] if necessary */
+       if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
+               bzero(&ctx->b[ctx->h.bCnt],
+                   SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+
+       /* process the final block */
+       Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+       /* now output the result */
+       /* total number of output bytes */
+       byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+       /* run Threefish in "counter mode" to generate output */
+       /* zero out b[], so it can hold the counter */
+       bzero(ctx->b, sizeof (ctx->b));
+       /* keep a local copy of counter mode "key" */
+       bcopy(ctx->X, X, sizeof (X));
+       for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) {
+               /* build the counter block */
+               uint64_t tmp = Skein_Swap64((uint64_t)i);
+               bcopy(&tmp, ctx->b, sizeof (tmp));
+               Skein_Start_New_Type(ctx, OUT_FINAL);
+               /* run "counter mode" */
+               Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+               /* number of output bytes left to go */
+               n = byteCnt - i * SKEIN_512_BLOCK_BYTES;
+               if (n >= SKEIN_512_BLOCK_BYTES)
+                       n = SKEIN_512_BLOCK_BYTES;
+               Skein_Put64_LSB_First(hashVal + i * SKEIN_512_BLOCK_BYTES,
+                   ctx->X, n); /* "output" the ctr mode bytes */
+               Skein_Show_Final(512, &ctx->h, n,
+                   hashVal + i * SKEIN_512_BLOCK_BYTES);
+               /* restore the counter mode key for next time */
+               bcopy(X, ctx->X, sizeof (X));
+       }
+       return (SKEIN_SUCCESS);
+}
+
+/* 1024-bit Skein */
+
+/* init the context for a straight hashing operation  */
+int
+Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen)
+{
+       union {
+               uint8_t b[SKEIN1024_STATE_BYTES];
+               uint64_t w[SKEIN1024_STATE_WORDS];
+       } cfg;                  /* config block */
+
+       Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+       ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+       switch (hashBitLen) {   /* use pre-computed values, where available */
+#ifndef        SKEIN_NO_PRECOMP
+       case 512:
+               bcopy(SKEIN1024_IV_512, ctx->X, sizeof (ctx->X));
+               break;
+       case 384:
+               bcopy(SKEIN1024_IV_384, ctx->X, sizeof (ctx->X));
+               break;
+       case 1024:
+               bcopy(SKEIN1024_IV_1024, ctx->X, sizeof (ctx->X));
+               break;
+#endif
+       default:
+               /* here if there is no precomputed IV value available */
+               /*
+                * build/process the config block, type == CONFIG (could be
+                * precomputed)
+                */
+               /* set tweaks: T0=0; T1=CFG | FINAL */
+               Skein_Start_New_Type(ctx, CFG_FINAL);
+
+               /* set the schema, version */
+               cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+               /* hash result length in bits */
+               cfg.w[1] = Skein_Swap64(hashBitLen);
+               cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+               /* zero pad config block */
+               bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+
+               /* compute the initial chaining values from config block */
+               /* zero the chaining variables */
+               bzero(ctx->X, sizeof (ctx->X));
+               Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+               break;
+       }
+
+       /*
+        * The chaining vars ctx->X are now initialized for the given
+        * hashBitLen. Set up to process the data message portion of the hash
+        * (default)
+        */
+       Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+       return (SKEIN_SUCCESS);
+}
+
+/* init the context for a MAC and/or tree hash operation */
+/*
+ * [identical to Skein1024_Init() when keyBytes == 0 &&
+ * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL]
+ */
+int
+Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
+    const uint8_t *key, size_t keyBytes)
+{
+       union {
+               uint8_t b[SKEIN1024_STATE_BYTES];
+               uint64_t w[SKEIN1024_STATE_WORDS];
+       } cfg;                  /* config block */
+
+       Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+       Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL);
+
+       /* compute the initial chaining values ctx->X[], based on key */
+       if (keyBytes == 0) {    /* is there a key? */
+               /* no key: use all zeroes as key for config block */
+               bzero(ctx->X, sizeof (ctx->X));
+       } else {                /* here to pre-process a key */
+               Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
+               /* do a mini-Init right here */
+               /* set output hash bit count = state size */
+               ctx->h.hashBitLen = 8 * sizeof (ctx->X);
+               /* set tweaks: T0 = 0; T1 = KEY type */
+               Skein_Start_New_Type(ctx, KEY);
+               /* zero the initial chaining variables */
+               bzero(ctx->X, sizeof (ctx->X));
+               (void) Skein1024_Update(ctx, key, keyBytes); /* hash the key */
+               /* put result into cfg.b[] */
+               (void) Skein1024_Final_Pad(ctx, cfg.b);
+               /* copy over into ctx->X[] */
+               bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+#if    SKEIN_NEED_SWAP
+               {
+                       uint_t i;
+                       /* convert key bytes to context words */
+                       for (i = 0; i < SKEIN1024_STATE_WORDS; i++)
+                               ctx->X[i] = Skein_Swap64(ctx->X[i]);
+               }
+#endif
+       }
+       /*
+        * build/process the config block, type == CONFIG (could be
+        * precomputed for each key)
+        */
+       ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+       Skein_Start_New_Type(ctx, CFG_FINAL);
+
+       bzero(&cfg.w, sizeof (cfg.w));  /* pre-pad cfg.w[] with zeroes */
+       cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+       /* hash result length in bits */
+       cfg.w[1] = Skein_Swap64(hashBitLen);
+       /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+       cfg.w[2] = Skein_Swap64(treeInfo);
+
+       Skein_Show_Key(1024, &ctx->h, key, keyBytes);
+
+       /* compute the initial chaining values from config block */
+       Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+
+       /* The chaining vars ctx->X are now initialized */
+       /* Set up to process the data message portion of the hash (default) */
+       ctx->h.bCnt = 0;        /* buffer b[] starts out empty */
+       Skein_Start_New_Type(ctx, MSG);
+
+       return (SKEIN_SUCCESS);
+}
+
+/* process the input bytes */
+int
+Skein1024_Update(Skein1024_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
+{
+       size_t n;
+
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+       /* process full blocks, if any */
+       if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES) {
+               /* finish up any buffered message data */
+               if (ctx->h.bCnt) {
+                       /* # bytes free in buffer b[] */
+                       n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt;
+                       if (n) {
+                               /* check on our logic here */
+                               Skein_assert(n < msgByteCnt);
+                               bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+                               msgByteCnt -= n;
+                               msg += n;
+                               ctx->h.bCnt += n;
+                       }
+                       Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES);
+                       Skein1024_Process_Block(ctx, ctx->b, 1,
+                           SKEIN1024_BLOCK_BYTES);
+                       ctx->h.bCnt = 0;
+               }
+               /*
+                * now process any remaining full blocks, directly from
+                * input message data
+                */
+               if (msgByteCnt > SKEIN1024_BLOCK_BYTES) {
+                       /* number of full blocks to process */
+                       n = (msgByteCnt - 1) / SKEIN1024_BLOCK_BYTES;
+                       Skein1024_Process_Block(ctx, msg, n,
+                           SKEIN1024_BLOCK_BYTES);
+                       msgByteCnt -= n * SKEIN1024_BLOCK_BYTES;
+                       msg += n * SKEIN1024_BLOCK_BYTES;
+               }
+               Skein_assert(ctx->h.bCnt == 0);
+       }
+
+       /* copy any remaining source message data bytes into b[] */
+       if (msgByteCnt) {
+               Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES);
+               bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+               ctx->h.bCnt += msgByteCnt;
+       }
+
+       return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the result */
+int
+Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
+{
+       size_t i, n, byteCnt;
+       uint64_t X[SKEIN1024_STATE_WORDS];
+
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+       ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;     /* tag as the final block */
+       /* zero pad b[] if necessary */
+       if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)
+               bzero(&ctx->b[ctx->h.bCnt],
+                   SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+
+       /* process the final block */
+       Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+       /* now output the result */
+       /* total number of output bytes */
+       byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+       /* run Threefish in "counter mode" to generate output */
+       /* zero out b[], so it can hold the counter */
+       bzero(ctx->b, sizeof (ctx->b));
+       /* keep a local copy of counter mode "key" */
+       bcopy(ctx->X, X, sizeof (X));
+       for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) {
+               /* build the counter block */
+               uint64_t tmp = Skein_Swap64((uint64_t)i);
+               bcopy(&tmp, ctx->b, sizeof (tmp));
+               Skein_Start_New_Type(ctx, OUT_FINAL);
+               /* run "counter mode" */
+               Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+               /* number of output bytes left to go */
+               n = byteCnt - i * SKEIN1024_BLOCK_BYTES;
+               if (n >= SKEIN1024_BLOCK_BYTES)
+                       n = SKEIN1024_BLOCK_BYTES;
+               Skein_Put64_LSB_First(hashVal + i * SKEIN1024_BLOCK_BYTES,
+                   ctx->X, n); /* "output" the ctr mode bytes */
+               Skein_Show_Final(1024, &ctx->h, n,
+                   hashVal + i * SKEIN1024_BLOCK_BYTES);
+               /* restore the counter mode key for next time */
+               bcopy(X, ctx->X, sizeof (X));
+       }
+       return (SKEIN_SUCCESS);
+}
+
+/* Functions to support MAC/tree hashing */
+/* (this code is identical for Optimized and Reference versions) */
+
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int
+Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
+{
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+       ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;     /* tag as the final block */
+       /* zero pad b[] if necessary */
+       if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)
+               bzero(&ctx->b[ctx->h.bCnt],
+                   SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+       /* process the final block */
+       Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+       /* "output" the state bytes */
+       Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_256_BLOCK_BYTES);
+
+       return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int
+Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
+{
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+       ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;     /* tag as the final block */
+       /* zero pad b[] if necessary */
+       if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
+               bzero(&ctx->b[ctx->h.bCnt],
+                   SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+       /* process the final block */
+       Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+       /* "output" the state bytes */
+       Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_512_BLOCK_BYTES);
+
+       return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int
+Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
+{
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+       /* tag as the final block */
+       ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
+       /* zero pad b[] if necessary */
+       if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)
+               bzero(&ctx->b[ctx->h.bCnt],
+                   SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+       /* process the final block */
+       Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+       /* "output" the state bytes */
+       Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN1024_BLOCK_BYTES);
+
+       return (SKEIN_SUCCESS);
+}
+
+#if    SKEIN_TREE_HASH
+/* just do the OUTPUT stage */
+int
+Skein_256_Output(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
+{
+       size_t i, n, byteCnt;
+       uint64_t X[SKEIN_256_STATE_WORDS];
+
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+       /* now output the result */
+       /* total number of output bytes */
+       byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+       /* run Threefish in "counter mode" to generate output */
+       /* zero out b[], so it can hold the counter */
+       bzero(ctx->b, sizeof (ctx->b));
+       /* keep a local copy of counter mode "key" */
+       bcopy(ctx->X, X, sizeof (X));
+       for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) {
+               /* build the counter block */
+               uint64_t tmp = Skein_Swap64((uint64_t)i);
+               bcopy(&tmp, ctx->b, sizeof (tmp));
+               Skein_Start_New_Type(ctx, OUT_FINAL);
+               /* run "counter mode" */
+               Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+               /* number of output bytes left to go */
+               n = byteCnt - i * SKEIN_256_BLOCK_BYTES;
+               if (n >= SKEIN_256_BLOCK_BYTES)
+                       n = SKEIN_256_BLOCK_BYTES;
+               Skein_Put64_LSB_First(hashVal + i * SKEIN_256_BLOCK_BYTES,
+                   ctx->X, n); /* "output" the ctr mode bytes */
+               Skein_Show_Final(256, &ctx->h, n,
+                   hashVal + i * SKEIN_256_BLOCK_BYTES);
+               /* restore the counter mode key for next time */
+               bcopy(X, ctx->X, sizeof (X));
+       }
+       return (SKEIN_SUCCESS);
+}
+
+/* just do the OUTPUT stage */
+int
+Skein_512_Output(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
+{
+       size_t i, n, byteCnt;
+       uint64_t X[SKEIN_512_STATE_WORDS];
+
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+       /* now output the result */
+       /* total number of output bytes */
+       byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+       /* run Threefish in "counter mode" to generate output */
+       /* zero out b[], so it can hold the counter */
+       bzero(ctx->b, sizeof (ctx->b));
+       /* keep a local copy of counter mode "key" */
+       bcopy(ctx->X, X, sizeof (X));
+       for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) {
+               /* build the counter block */
+               uint64_t tmp = Skein_Swap64((uint64_t)i);
+               bcopy(&tmp, ctx->b, sizeof (tmp));
+               Skein_Start_New_Type(ctx, OUT_FINAL);
+               /* run "counter mode" */
+               Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+               /* number of output bytes left to go */
+               n = byteCnt - i * SKEIN_512_BLOCK_BYTES;
+               if (n >= SKEIN_512_BLOCK_BYTES)
+                       n = SKEIN_512_BLOCK_BYTES;
+               Skein_Put64_LSB_First(hashVal + i * SKEIN_512_BLOCK_BYTES,
+                   ctx->X, n); /* "output" the ctr mode bytes */
+               Skein_Show_Final(256, &ctx->h, n,
+                   hashVal + i * SKEIN_512_BLOCK_BYTES);
+               /* restore the counter mode key for next time */
+               bcopy(X, ctx->X, sizeof (X));
+       }
+       return (SKEIN_SUCCESS);
+}
+
+/* just do the OUTPUT stage */
+int
+Skein1024_Output(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
+{
+       size_t i, n, byteCnt;
+       uint64_t X[SKEIN1024_STATE_WORDS];
+
+       /* catch uninitialized context */
+       Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+       /* now output the result */
+       /* total number of output bytes */
+       byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+       /* run Threefish in "counter mode" to generate output */
+       /* zero out b[], so it can hold the counter */
+       bzero(ctx->b, sizeof (ctx->b));
+       /* keep a local copy of counter mode "key" */
+       bcopy(ctx->X, X, sizeof (X));
+       for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) {
+               /* build the counter block */
+               uint64_t tmp = Skein_Swap64((uint64_t)i);
+               bcopy(&tmp, ctx->b, sizeof (tmp));
+               Skein_Start_New_Type(ctx, OUT_FINAL);
+               /* run "counter mode" */
+               Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+               /* number of output bytes left to go */
+               n = byteCnt - i * SKEIN1024_BLOCK_BYTES;
+               if (n >= SKEIN1024_BLOCK_BYTES)
+                       n = SKEIN1024_BLOCK_BYTES;
+               Skein_Put64_LSB_First(hashVal + i * SKEIN1024_BLOCK_BYTES,
+                   ctx->X, n); /* "output" the ctr mode bytes */
+               Skein_Show_Final(256, &ctx->h, n,
+                   hashVal + i * SKEIN1024_BLOCK_BYTES);
+               /* restore the counter mode key for next time */
+               bcopy(X, ctx->X, sizeof (X));
+       }
+       return (SKEIN_SUCCESS);
+}
+#endif
+
+#ifdef _KERNEL
+EXPORT_SYMBOL(Skein_512_Init);
+EXPORT_SYMBOL(Skein_512_InitExt);
+EXPORT_SYMBOL(Skein_512_Update);
+EXPORT_SYMBOL(Skein_512_Final);
+#endif
diff --git a/module/icp/algs/skein/skein_block.c b/module/icp/algs/skein/skein_block.c
new file mode 100644 (file)
index 0000000..d2e8119
--- /dev/null
@@ -0,0 +1,793 @@
+/*
+ * Implementation of the Skein block functions.
+ * Source code author: Doug Whiting, 2008.
+ * This algorithm and source code is released to the public domain.
+ * Compile-time switches:
+ *  SKEIN_USE_ASM  -- set bits (256/512/1024) to select which
+ *                    versions use ASM code for block processing
+ *                    [default: use C for all block sizes]
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#include <sys/skein.h>
+#include "skein_impl.h"
+#include <sys/isa_defs.h>      /* for _ILP32 */
+
+#ifndef        SKEIN_USE_ASM
+#define        SKEIN_USE_ASM   (0)     /* default is all C code (no ASM) */
+#endif
+
+#ifndef        SKEIN_LOOP
+/*
+ * The low-level checksum routines use a lot of stack space. On systems where
+ * small stacks frame are enforced (like 32-bit kernel builds), do not unroll
+ * checksum calculations to save stack space.
+ *
+ * Even with no loops unrolled, we still can exceed the 1k stack frame limit
+ * in Skein1024_Process_Block() (it hits 1272 bytes on ARM32).  We can
+ * safely ignore it though, since that the checksum functions will be called
+ * from a worker thread that won't be using much stack.  That's why we have
+ * the #pragma here to ignore the warning.
+ */
+#if defined(_ILP32) || defined(__powerpc)      /* Assume small stack */
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+/*
+ * We're running on 32-bit, don't unroll loops to save stack frame space
+ *
+ * Due to the ways the calculations on SKEIN_LOOP are done in
+ * Skein_*_Process_Block(), a value of 111 disables unrolling loops
+ * in any of those functions.
+ */
+#define        SKEIN_LOOP 111
+#else
+/* We're compiling with large stacks */
+#define        SKEIN_LOOP 001          /* default: unroll 256 and 512, but not 1024 */
+#endif
+#endif
+
+/* some useful definitions for code here */
+#define        BLK_BITS        (WCNT*64)
+#define        KW_TWK_BASE     (0)
+#define        KW_KEY_BASE     (3)
+#define        ks              (kw + KW_KEY_BASE)
+#define        ts              (kw + KW_TWK_BASE)
+
+/* no debugging in Illumos version */
+#define        DebugSaveTweak(ctx)
+
+/* Skein_256 */
+#if    !(SKEIN_USE_ASM & 256)
+
+void
+Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr,
+    size_t blkCnt, size_t byteCntAdd)
+{                              /* do it in C */
+       enum {
+               WCNT = SKEIN_256_STATE_WORDS
+       };
+#undef  RCNT
+#define        RCNT  (SKEIN_256_ROUNDS_TOTAL / 8)
+
+#ifdef SKEIN_LOOP              /* configure how much to unroll the loop */
+#define        SKEIN_UNROLL_256 (((SKEIN_LOOP) / 100) % 10)
+#else
+#define        SKEIN_UNROLL_256 (0)
+#endif
+
+#if    SKEIN_UNROLL_256
+#if    (RCNT % SKEIN_UNROLL_256)
+#error "Invalid SKEIN_UNROLL_256"      /* sanity check on unroll count */
+#endif
+       size_t r;
+       /* key schedule words : chaining vars + tweak + "rotation" */
+       uint64_t kw[WCNT + 4 + RCNT * 2];
+#else
+       uint64_t kw[WCNT + 4];  /* key schedule words : chaining vars + tweak */
+#endif
+       /* local copy of context vars, for speed */
+       uint64_t X0, X1, X2, X3;
+       uint64_t w[WCNT];               /* local copy of input block */
+#ifdef SKEIN_DEBUG
+       /* use for debugging (help compiler put Xn in registers) */
+       const uint64_t *Xptr[4];
+       Xptr[0] = &X0;
+       Xptr[1] = &X1;
+       Xptr[2] = &X2;
+       Xptr[3] = &X3;
+#endif
+       Skein_assert(blkCnt != 0);      /* never call with blkCnt == 0! */
+       ts[0] = ctx->h.T[0];
+       ts[1] = ctx->h.T[1];
+       do {
+               /*
+                * this implementation only supports 2**64 input bytes
+                * (no carry out here)
+                */
+               ts[0] += byteCntAdd;    /* update processed length */
+
+               /* precompute the key schedule for this block */
+               ks[0] = ctx->X[0];
+               ks[1] = ctx->X[1];
+               ks[2] = ctx->X[2];
+               ks[3] = ctx->X[3];
+               ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
+
+               ts[2] = ts[0] ^ ts[1];
+
+               /* get input block in little-endian format */
+               Skein_Get64_LSB_First(w, blkPtr, WCNT);
+               DebugSaveTweak(ctx);
+               Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+
+               X0 = w[0] + ks[0];      /* do the first full key injection */
+               X1 = w[1] + ks[1] + ts[0];
+               X2 = w[2] + ks[2] + ts[1];
+               X3 = w[3] + ks[3];
+
+               Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
+                   Xptr);      /* show starting state values */
+
+               blkPtr += SKEIN_256_BLOCK_BYTES;
+
+               /* run the rounds */
+
+#define        Round256(p0, p1, p2, p3, ROT, rNum)                          \
+    X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
+    X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
+
+#if    SKEIN_UNROLL_256 == 0
+#define        R256(p0, p1, p2, p3, ROT, rNum)         /* fully unrolled */    \
+    Round256(p0, p1, p2, p3, ROT, rNum)                                        \
+    Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
+
+#define        I256(R)                                                         \
+    X0 += ks[((R) + 1) % 5];   /* inject the key schedule value */     \
+    X1 += ks[((R) + 2) % 5] + ts[((R) + 1) % 3];                       \
+    X2 += ks[((R) + 3) % 5] + ts[((R) + 2) % 3];                       \
+    X3 += ks[((R) + 4) % 5] + (R) + 1;                                 \
+    Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+#else                          /* looping version */
+#define        R256(p0, p1, p2, p3, ROT, rNum)                             \
+    Round256(p0, p1, p2, p3, ROT, rNum)                             \
+    Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
+
+#define        I256(R)                                                         \
+       X0 += ks[r + (R) + 0];  /* inject the key schedule value */     \
+       X1 += ks[r + (R) + 1] + ts[r + (R) + 0];                        \
+       X2 += ks[r + (R) + 2] + ts[r + (R) + 1];                        \
+       X3 += ks[r + (R) + 3] + r + (R);                                \
+       ks[r + (R) + 4] = ks[r + (R) - 1];   /* rotate key schedule */  \
+    ts[r + (R) + 2] = ts[r + (R) - 1];                                 \
+    Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+
+               /* loop thru it */
+               for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256)
+#endif
+               {
+#define        R256_8_rounds(R)                         \
+       R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1);  \
+       R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2);  \
+       R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3);  \
+       R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4);  \
+       I256(2 * (R));                           \
+       R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5);  \
+       R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6);  \
+       R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7);  \
+       R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8);  \
+       I256(2 * (R) + 1);
+
+                       R256_8_rounds(0);
+
+#define        R256_Unroll_R(NN) \
+       ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL / 8 > (NN)) || \
+       (SKEIN_UNROLL_256 > (NN)))
+
+#if    R256_Unroll_R(1)
+                       R256_8_rounds(1);
+#endif
+#if    R256_Unroll_R(2)
+                       R256_8_rounds(2);
+#endif
+#if    R256_Unroll_R(3)
+                       R256_8_rounds(3);
+#endif
+#if    R256_Unroll_R(4)
+                       R256_8_rounds(4);
+#endif
+#if    R256_Unroll_R(5)
+                       R256_8_rounds(5);
+#endif
+#if    R256_Unroll_R(6)
+                       R256_8_rounds(6);
+#endif
+#if    R256_Unroll_R(7)
+                       R256_8_rounds(7);
+#endif
+#if    R256_Unroll_R(8)
+                       R256_8_rounds(8);
+#endif
+#if    R256_Unroll_R(9)
+                       R256_8_rounds(9);
+#endif
+#if    R256_Unroll_R(10)
+                       R256_8_rounds(10);
+#endif
+#if    R256_Unroll_R(11)
+                       R256_8_rounds(11);
+#endif
+#if    R256_Unroll_R(12)
+                       R256_8_rounds(12);
+#endif
+#if    R256_Unroll_R(13)
+                       R256_8_rounds(13);
+#endif
+#if    R256_Unroll_R(14)
+                       R256_8_rounds(14);
+#endif
+#if    (SKEIN_UNROLL_256 > 14)
+#error  "need more unrolling in Skein_256_Process_Block"
+#endif
+               }
+               /*
+                * do the final "feedforward" xor, update context chaining vars
+                */
+               ctx->X[0] = X0 ^ w[0];
+               ctx->X[1] = X1 ^ w[1];
+               ctx->X[2] = X2 ^ w[2];
+               ctx->X[3] = X3 ^ w[3];
+
+               Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+               ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+       }
+       while (--blkCnt);
+       ctx->h.T[0] = ts[0];
+       ctx->h.T[1] = ts[1];
+}
+
+#if    defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t
+Skein_256_Process_Block_CodeSize(void)
+{
+       return ((uint8_t *)Skein_256_Process_Block_CodeSize) -
+           ((uint8_t *)Skein_256_Process_Block);
+}
+
+uint_t
+Skein_256_Unroll_Cnt(void)
+{
+       return (SKEIN_UNROLL_256);
+}
+#endif
+#endif
+
+/* Skein_512 */
+#if    !(SKEIN_USE_ASM & 512)
+void
+Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr,
+    size_t blkCnt, size_t byteCntAdd)
+{                              /* do it in C */
+       enum {
+               WCNT = SKEIN_512_STATE_WORDS
+       };
+#undef  RCNT
+#define        RCNT  (SKEIN_512_ROUNDS_TOTAL / 8)
+
+#ifdef SKEIN_LOOP              /* configure how much to unroll the loop */
+#define        SKEIN_UNROLL_512 (((SKEIN_LOOP) / 10) % 10)
+#else
+#define        SKEIN_UNROLL_512 (0)
+#endif
+
+#if    SKEIN_UNROLL_512
+#if    (RCNT % SKEIN_UNROLL_512)
+#error "Invalid SKEIN_UNROLL_512"      /* sanity check on unroll count */
+#endif
+       size_t r;
+       /* key schedule words : chaining vars + tweak + "rotation" */
+       uint64_t kw[WCNT + 4 + RCNT * 2];
+#else
+       uint64_t kw[WCNT + 4];  /* key schedule words : chaining vars + tweak */
+#endif
+       /* local copy of vars, for speed */
+       uint64_t X0, X1, X2, X3, X4, X5, X6, X7;
+       uint64_t w[WCNT];               /* local copy of input block */
+#ifdef SKEIN_DEBUG
+       /* use for debugging (help compiler put Xn in registers) */
+       const uint64_t *Xptr[8];
+       Xptr[0] = &X0;
+       Xptr[1] = &X1;
+       Xptr[2] = &X2;
+       Xptr[3] = &X3;
+       Xptr[4] = &X4;
+       Xptr[5] = &X5;
+       Xptr[6] = &X6;
+       Xptr[7] = &X7;
+#endif
+
+       Skein_assert(blkCnt != 0);      /* never call with blkCnt == 0! */
+       ts[0] = ctx->h.T[0];
+       ts[1] = ctx->h.T[1];
+       do {
+               /*
+                * this implementation only supports 2**64 input bytes
+                * (no carry out here)
+                */
+               ts[0] += byteCntAdd;    /* update processed length */
+
+               /* precompute the key schedule for this block */
+               ks[0] = ctx->X[0];
+               ks[1] = ctx->X[1];
+               ks[2] = ctx->X[2];
+               ks[3] = ctx->X[3];
+               ks[4] = ctx->X[4];
+               ks[5] = ctx->X[5];
+               ks[6] = ctx->X[6];
+               ks[7] = ctx->X[7];
+               ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
+                   ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
+
+               ts[2] = ts[0] ^ ts[1];
+
+               /* get input block in little-endian format */
+               Skein_Get64_LSB_First(w, blkPtr, WCNT);
+               DebugSaveTweak(ctx);
+               Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+
+               X0 = w[0] + ks[0];      /* do the first full key injection */
+               X1 = w[1] + ks[1];
+               X2 = w[2] + ks[2];
+               X3 = w[3] + ks[3];
+               X4 = w[4] + ks[4];
+               X5 = w[5] + ks[5] + ts[0];
+               X6 = w[6] + ks[6] + ts[1];
+               X7 = w[7] + ks[7];
+
+               blkPtr += SKEIN_512_BLOCK_BYTES;
+
+               Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
+                   Xptr);
+               /* run the rounds */
+#define        Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)             \
+       X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\
+       X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\
+       X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\
+       X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;
+
+#if    SKEIN_UNROLL_512 == 0
+#define        R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) /* unrolled */  \
+       Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)             \
+       Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
+
+#define        I512(R)                                                         \
+       X0 += ks[((R) + 1) % 9];        /* inject the key schedule value */\
+       X1 += ks[((R) + 2) % 9];                                        \
+       X2 += ks[((R) + 3) % 9];                                        \
+       X3 += ks[((R) + 4) % 9];                                        \
+       X4 += ks[((R) + 5) % 9];                                        \
+       X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3];                    \
+       X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3];                    \
+       X7 += ks[((R) + 8) % 9] + (R) + 1;                              \
+       Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+#else                          /* looping version */
+#define        R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)                 \
+       Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum)             \
+       Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
+
+#define        I512(R)                                                         \
+       X0 += ks[r + (R) + 0];  /* inject the key schedule value */     \
+       X1 += ks[r + (R) + 1];                                          \
+       X2 += ks[r + (R) + 2];                                          \
+       X3 += ks[r + (R) + 3];                                          \
+       X4 += ks[r + (R) + 4];                                          \
+       X5 += ks[r + (R) + 5] + ts[r + (R) + 0];                        \
+       X6 += ks[r + (R) + 6] + ts[r + (R) + 1];                        \
+       X7 += ks[r + (R) + 7] + r + (R);                                \
+       ks[r + (R)+8] = ks[r + (R) - 1];        /* rotate key schedule */\
+       ts[r + (R)+2] = ts[r + (R) - 1];                                \
+       Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+
+               /* loop thru it */
+               for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512)
+#endif                         /* end of looped code definitions */
+               {
+#define        R512_8_rounds(R)        /* do 8 full rounds */                  \
+       R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1);             \
+       R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2);             \
+       R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3);             \
+       R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4);             \
+       I512(2 * (R));                                                  \
+       R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5);             \
+       R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6);             \
+       R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7);             \
+       R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8);             \
+       I512(2*(R) + 1);                /* and key injection */
+
+                       R512_8_rounds(0);
+
+#define        R512_Unroll_R(NN) \
+       ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL / 8 > (NN)) || \
+       (SKEIN_UNROLL_512 > (NN)))
+
+#if    R512_Unroll_R(1)
+                       R512_8_rounds(1);
+#endif
+#if    R512_Unroll_R(2)
+                       R512_8_rounds(2);
+#endif
+#if    R512_Unroll_R(3)
+                       R512_8_rounds(3);
+#endif
+#if    R512_Unroll_R(4)
+                       R512_8_rounds(4);
+#endif
+#if    R512_Unroll_R(5)
+                       R512_8_rounds(5);
+#endif
+#if    R512_Unroll_R(6)
+                       R512_8_rounds(6);
+#endif
+#if    R512_Unroll_R(7)
+                       R512_8_rounds(7);
+#endif
+#if    R512_Unroll_R(8)
+                       R512_8_rounds(8);
+#endif
+#if    R512_Unroll_R(9)
+                       R512_8_rounds(9);
+#endif
+#if    R512_Unroll_R(10)
+                       R512_8_rounds(10);
+#endif
+#if    R512_Unroll_R(11)
+                       R512_8_rounds(11);
+#endif
+#if    R512_Unroll_R(12)
+                       R512_8_rounds(12);
+#endif
+#if    R512_Unroll_R(13)
+                       R512_8_rounds(13);
+#endif
+#if    R512_Unroll_R(14)
+                       R512_8_rounds(14);
+#endif
+#if    (SKEIN_UNROLL_512 > 14)
+#error "need more unrolling in Skein_512_Process_Block"
+#endif
+               }
+
+               /*
+                * do the final "feedforward" xor, update context chaining vars
+                */
+               ctx->X[0] = X0 ^ w[0];
+               ctx->X[1] = X1 ^ w[1];
+               ctx->X[2] = X2 ^ w[2];
+               ctx->X[3] = X3 ^ w[3];
+               ctx->X[4] = X4 ^ w[4];
+               ctx->X[5] = X5 ^ w[5];
+               ctx->X[6] = X6 ^ w[6];
+               ctx->X[7] = X7 ^ w[7];
+               Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+               ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+       }
+       while (--blkCnt);
+       ctx->h.T[0] = ts[0];
+       ctx->h.T[1] = ts[1];
+}
+
+#if    defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t
+Skein_512_Process_Block_CodeSize(void)
+{
+       return ((uint8_t *)Skein_512_Process_Block_CodeSize) -
+           ((uint8_t *)Skein_512_Process_Block);
+}
+
+uint_t
+Skein_512_Unroll_Cnt(void)
+{
+       return (SKEIN_UNROLL_512);
+}
+#endif
+#endif
+
+/*  Skein1024 */
+#if    !(SKEIN_USE_ASM & 1024)
+void
+Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr,
+    size_t blkCnt, size_t byteCntAdd)
+{
+       /* do it in C, always looping (unrolled is bigger AND slower!) */
+       enum {
+               WCNT = SKEIN1024_STATE_WORDS
+       };
+#undef  RCNT
+#define        RCNT  (SKEIN1024_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP              /* configure how much to unroll the loop */
+#define        SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
+#else
+#define        SKEIN_UNROLL_1024 (0)
+#endif
+
+#if    (SKEIN_UNROLL_1024 != 0)
+#if    (RCNT % SKEIN_UNROLL_1024)
+#error "Invalid SKEIN_UNROLL_1024"     /* sanity check on unroll count */
+#endif
+       size_t r;
+       /* key schedule words : chaining vars + tweak + "rotation" */
+       uint64_t kw[WCNT + 4 + RCNT * 2];
+#else
+       uint64_t kw[WCNT + 4];  /* key schedule words : chaining vars + tweak */
+#endif
+
+       /* local copy of vars, for speed */
+       uint64_t X00, X01, X02, X03, X04, X05, X06, X07, X08, X09, X10, X11,
+           X12, X13, X14, X15;
+       uint64_t w[WCNT];               /* local copy of input block */
+#ifdef SKEIN_DEBUG
+       /* use for debugging (help compiler put Xn in registers) */
+       const uint64_t *Xptr[16];
+       Xptr[0] = &X00;
+       Xptr[1] = &X01;
+       Xptr[2] = &X02;
+       Xptr[3] = &X03;
+       Xptr[4] = &X04;
+       Xptr[5] = &X05;
+       Xptr[6] = &X06;
+       Xptr[7] = &X07;
+       Xptr[8] = &X08;
+       Xptr[9] = &X09;
+       Xptr[10] = &X10;
+       Xptr[11] = &X11;
+       Xptr[12] = &X12;
+       Xptr[13] = &X13;
+       Xptr[14] = &X14;
+       Xptr[15] = &X15;
+#endif
+
+       Skein_assert(blkCnt != 0);      /* never call with blkCnt == 0! */
+       ts[0] = ctx->h.T[0];
+       ts[1] = ctx->h.T[1];
+       do {
+               /*
+                * this implementation only supports 2**64 input bytes
+                * (no carry out here)
+                */
+               ts[0] += byteCntAdd;    /* update processed length */
+
+               /* precompute the key schedule for this block */
+               ks[0] = ctx->X[0];
+               ks[1] = ctx->X[1];
+               ks[2] = ctx->X[2];
+               ks[3] = ctx->X[3];
+               ks[4] = ctx->X[4];
+               ks[5] = ctx->X[5];
+               ks[6] = ctx->X[6];
+               ks[7] = ctx->X[7];
+               ks[8] = ctx->X[8];
+               ks[9] = ctx->X[9];
+               ks[10] = ctx->X[10];
+               ks[11] = ctx->X[11];
+               ks[12] = ctx->X[12];
+               ks[13] = ctx->X[13];
+               ks[14] = ctx->X[14];
+               ks[15] = ctx->X[15];
+               ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
+                   ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^
+                   ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^
+                   ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
+
+               ts[2] = ts[0] ^ ts[1];
+
+               /* get input block in little-endian format */
+               Skein_Get64_LSB_First(w, blkPtr, WCNT);
+               DebugSaveTweak(ctx);
+               Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+
+               X00 = w[0] + ks[0];     /* do the first full key injection */
+               X01 = w[1] + ks[1];
+               X02 = w[2] + ks[2];
+               X03 = w[3] + ks[3];
+               X04 = w[4] + ks[4];
+               X05 = w[5] + ks[5];
+               X06 = w[6] + ks[6];
+               X07 = w[7] + ks[7];
+               X08 = w[8] + ks[8];
+               X09 = w[9] + ks[9];
+               X10 = w[10] + ks[10];
+               X11 = w[11] + ks[11];
+               X12 = w[12] + ks[12];
+               X13 = w[13] + ks[13] + ts[0];
+               X14 = w[14] + ks[14] + ts[1];
+               X15 = w[15] + ks[15];
+
+               Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
+                   Xptr);
+
+#define        Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC,   \
+       pD, pE, pF, ROT, rNum)                                          \
+       X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\
+       X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\
+       X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\
+       X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;\
+       X##p8 += X##p9; X##p9 = RotL_64(X##p9, ROT##_4); X##p9 ^= X##p8;\
+       X##pA += X##pB; X##pB = RotL_64(X##pB, ROT##_5); X##pB ^= X##pA;\
+       X##pC += X##pD; X##pD = RotL_64(X##pD, ROT##_6); X##pD ^= X##pC;\
+       X##pE += X##pF; X##pF = RotL_64(X##pF, ROT##_7); X##pF ^= X##pE;
+
+#if    SKEIN_UNROLL_1024 == 0
+#define        R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD,   \
+       pE, pF, ROT, rn)                                                \
+       Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC,   \
+       pD, pE, pF, ROT, rn)                                            \
+       Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rn, Xptr);
+
+#define        I1024(R)                                                        \
+       X00 += ks[((R) + 1) % 17];      /* inject the key schedule value */\
+       X01 += ks[((R) + 2) % 17];                                      \
+       X02 += ks[((R) + 3) % 17];                                      \
+       X03 += ks[((R) + 4) % 17];                                      \
+       X04 += ks[((R) + 5) % 17];                                      \
+       X05 += ks[((R) + 6) % 17];                                      \
+       X06 += ks[((R) + 7) % 17];                                      \
+       X07 += ks[((R) + 8) % 17];                                      \
+       X08 += ks[((R) + 9) % 17];                                      \
+       X09 += ks[((R) + 10) % 17];                                     \
+       X10 += ks[((R) + 11) % 17];                                     \
+       X11 += ks[((R) + 12) % 17];                                     \
+       X12 += ks[((R) + 13) % 17];                                     \
+       X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3];                 \
+       X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3];                 \
+       X15 += ks[((R) + 16) % 17] + (R) +1;                            \
+       Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+#else                          /* looping version */
+#define        R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD,   \
+       pE, pF, ROT, rn)                                                \
+       Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC,   \
+       pD, pE, pF, ROT, rn)                                            \
+       Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, Xptr);
+
+#define        I1024(R)                                                        \
+       X00 += ks[r + (R) + 0]; /* inject the key schedule value */     \
+       X01 += ks[r + (R) + 1];                                         \
+       X02 += ks[r + (R) + 2];                                         \
+       X03 += ks[r + (R) + 3];                                         \
+       X04 += ks[r + (R) + 4];                                         \
+       X05 += ks[r + (R) + 5];                                         \
+       X06 += ks[r + (R) + 6];                                         \
+       X07 += ks[r + (R) + 7];                                         \
+       X08 += ks[r + (R) + 8];                                         \
+       X09 += ks[r + (R) + 9];                                         \
+       X10 += ks[r + (R) + 10];                                        \
+       X11 += ks[r + (R) + 11];                                        \
+       X12 += ks[r + (R) + 12];                                        \
+       X13 += ks[r + (R) + 13] + ts[r + (R) + 0];                      \
+       X14 += ks[r + (R) + 14] + ts[r + (R) + 1];                      \
+       X15 += ks[r + (R) + 15] +  r + (R);                             \
+       ks[r + (R) + 16] = ks[r + (R) - 1];     /* rotate key schedule */\
+       ts[r + (R) + 2] = ts[r + (R) - 1];                              \
+       Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+
+               /* loop thru it */
+               for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024)
+#endif
+               {
+#define        R1024_8_rounds(R)       /* do 8 full rounds */                  \
+       R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13,   \
+           14, 15, R1024_0, 8 * (R) + 1);                              \
+       R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05,   \
+           08, 01, R1024_1, 8 * (R) + 2);                              \
+       R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11,   \
+           10, 09, R1024_2, 8 * (R) + 3);                              \
+       R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03,   \
+           12, 07, R1024_3, 8 * (R) + 4);                              \
+       I1024(2 * (R));                                                 \
+       R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13,   \
+           14, 15, R1024_4, 8 * (R) + 5);                              \
+       R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05,   \
+           08, 01, R1024_5, 8 * (R) + 6);                              \
+       R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11,   \
+           10, 09, R1024_6, 8 * (R) + 7);                              \
+       R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03,   \
+           12, 07, R1024_7, 8 * (R) + 8);                              \
+       I1024(2 * (R) + 1);
+
+                       R1024_8_rounds(0);
+
+#define        R1024_Unroll_R(NN)                                              \
+       ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || \
+       (SKEIN_UNROLL_1024 > (NN)))
+
+#if    R1024_Unroll_R(1)
+                       R1024_8_rounds(1);
+#endif
+#if    R1024_Unroll_R(2)
+                       R1024_8_rounds(2);
+#endif
+#if    R1024_Unroll_R(3)
+                       R1024_8_rounds(3);
+#endif
+#if    R1024_Unroll_R(4)
+                       R1024_8_rounds(4);
+#endif
+#if    R1024_Unroll_R(5)
+                       R1024_8_rounds(5);
+#endif
+#if    R1024_Unroll_R(6)
+                       R1024_8_rounds(6);
+#endif
+#if    R1024_Unroll_R(7)
+                       R1024_8_rounds(7);
+#endif
+#if    R1024_Unroll_R(8)
+                       R1024_8_rounds(8);
+#endif
+#if    R1024_Unroll_R(9)
+                       R1024_8_rounds(9);
+#endif
+#if    R1024_Unroll_R(10)
+                       R1024_8_rounds(10);
+#endif
+#if    R1024_Unroll_R(11)
+                       R1024_8_rounds(11);
+#endif
+#if    R1024_Unroll_R(12)
+                       R1024_8_rounds(12);
+#endif
+#if    R1024_Unroll_R(13)
+                       R1024_8_rounds(13);
+#endif
+#if    R1024_Unroll_R(14)
+                       R1024_8_rounds(14);
+#endif
+#if    (SKEIN_UNROLL_1024 > 14)
+#error  "need more unrolling in Skein_1024_Process_Block"
+#endif
+               }
+               /*
+                * do the final "feedforward" xor, update context chaining vars
+                */
+
+               ctx->X[0] = X00 ^ w[0];
+               ctx->X[1] = X01 ^ w[1];
+               ctx->X[2] = X02 ^ w[2];
+               ctx->X[3] = X03 ^ w[3];
+               ctx->X[4] = X04 ^ w[4];
+               ctx->X[5] = X05 ^ w[5];
+               ctx->X[6] = X06 ^ w[6];
+               ctx->X[7] = X07 ^ w[7];
+               ctx->X[8] = X08 ^ w[8];
+               ctx->X[9] = X09 ^ w[9];
+               ctx->X[10] = X10 ^ w[10];
+               ctx->X[11] = X11 ^ w[11];
+               ctx->X[12] = X12 ^ w[12];
+               ctx->X[13] = X13 ^ w[13];
+               ctx->X[14] = X14 ^ w[14];
+               ctx->X[15] = X15 ^ w[15];
+
+               Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+               ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+               blkPtr += SKEIN1024_BLOCK_BYTES;
+       } while (--blkCnt);
+       ctx->h.T[0] = ts[0];
+       ctx->h.T[1] = ts[1];
+}
+
+#if    defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t
+Skein1024_Process_Block_CodeSize(void)
+{
+       return ((uint8_t *)Skein1024_Process_Block_CodeSize) -
+           ((uint8_t *)Skein1024_Process_Block);
+}
+
+uint_t
+Skein1024_Unroll_Cnt(void)
+{
+       return (SKEIN_UNROLL_1024);
+}
+#endif
+#endif
diff --git a/module/icp/algs/skein/skein_impl.h b/module/icp/algs/skein/skein_impl.h
new file mode 100644 (file)
index 0000000..e83a069
--- /dev/null
@@ -0,0 +1,289 @@
+/*
+ * Internal definitions for Skein hashing.
+ * Source code author: Doug Whiting, 2008.
+ * This algorithm and source code is released to the public domain.
+ *
+ * The following compile-time switches may be defined to control some
+ * tradeoffs between speed, code size, error checking, and security.
+ *
+ * The "default" note explains what happens when the switch is not defined.
+ *
+ *  SKEIN_DEBUG            -- make callouts from inside Skein code
+ *                            to examine/display intermediate values.
+ *                            [default: no callouts (no overhead)]
+ *
+ *  SKEIN_ERR_CHECK        -- how error checking is handled inside Skein
+ *                            code. If not defined, most error checking
+ *                            is disabled (for performance). Otherwise,
+ *                            the switch value is interpreted as:
+ *                                0: use assert()      to flag errors
+ *                                1: return SKEIN_FAIL to flag errors
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#ifndef        _SKEIN_IMPL_H_
+#define        _SKEIN_IMPL_H_
+
+#include <sys/skein.h>
+#include "skein_impl.h"
+#include "skein_port.h"
+
+/* determine where we can get bcopy/bzero declarations */
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <strings.h>
+#endif
+
+/*
+ * "Internal" Skein definitions
+ *    -- not needed for sequential hashing API, but will be
+ *           helpful for other uses of Skein (e.g., tree hash mode).
+ *    -- included here so that they can be shared between
+ *           reference and optimized code.
+ */
+
+/* tweak word T[1]: bit field starting positions */
+/* offset 64 because it's the second word  */
+#define        SKEIN_T1_BIT(BIT)       ((BIT) - 64)
+
+/* bits 112..118: level in hash tree */
+#define        SKEIN_T1_POS_TREE_LVL   SKEIN_T1_BIT(112)
+/* bit  119: partial final input byte */
+#define        SKEIN_T1_POS_BIT_PAD    SKEIN_T1_BIT(119)
+/* bits 120..125: type field */
+#define        SKEIN_T1_POS_BLK_TYPE   SKEIN_T1_BIT(120)
+/* bits 126: first block flag */
+#define        SKEIN_T1_POS_FIRST      SKEIN_T1_BIT(126)
+/* bit  127: final block flag */
+#define        SKEIN_T1_POS_FINAL      SKEIN_T1_BIT(127)
+
+/* tweak word T[1]: flag bit definition(s) */
+#define        SKEIN_T1_FLAG_FIRST     (((uint64_t)1) << SKEIN_T1_POS_FIRST)
+#define        SKEIN_T1_FLAG_FINAL     (((uint64_t)1) << SKEIN_T1_POS_FINAL)
+#define        SKEIN_T1_FLAG_BIT_PAD   (((uint64_t)1) << SKEIN_T1_POS_BIT_PAD)
+
+/* tweak word T[1]: tree level bit field mask */
+#define        SKEIN_T1_TREE_LVL_MASK  (((uint64_t)0x7F) << SKEIN_T1_POS_TREE_LVL)
+#define        SKEIN_T1_TREE_LEVEL(n)  (((uint64_t)(n)) << SKEIN_T1_POS_TREE_LVL)
+
+/* tweak word T[1]: block type field */
+#define        SKEIN_BLK_TYPE_KEY      (0)     /* key, for MAC and KDF */
+#define        SKEIN_BLK_TYPE_CFG      (4)     /* configuration block */
+#define        SKEIN_BLK_TYPE_PERS     (8)     /* personalization string */
+#define        SKEIN_BLK_TYPE_PK       (12)    /* public key (for signature hashing) */
+#define        SKEIN_BLK_TYPE_KDF      (16)    /* key identifier for KDF */
+#define        SKEIN_BLK_TYPE_NONCE    (20)    /* nonce for PRNG */
+#define        SKEIN_BLK_TYPE_MSG      (48)    /* message processing */
+#define        SKEIN_BLK_TYPE_OUT      (63)    /* output stage */
+#define        SKEIN_BLK_TYPE_MASK     (63)    /* bit field mask */
+
+#define        SKEIN_T1_BLK_TYPE(T)    \
+       (((uint64_t)(SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE)
+/* key, for MAC and KDF */
+#define        SKEIN_T1_BLK_TYPE_KEY   SKEIN_T1_BLK_TYPE(KEY)
+/* configuration block */
+#define        SKEIN_T1_BLK_TYPE_CFG   SKEIN_T1_BLK_TYPE(CFG)
+/* personalization string */
+#define        SKEIN_T1_BLK_TYPE_PERS  SKEIN_T1_BLK_TYPE(PERS)
+/* public key (for digital signature hashing) */
+#define        SKEIN_T1_BLK_TYPE_PK    SKEIN_T1_BLK_TYPE(PK)
+/* key identifier for KDF */
+#define        SKEIN_T1_BLK_TYPE_KDF   SKEIN_T1_BLK_TYPE(KDF)
+/* nonce for PRNG */
+#define        SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)
+/* message processing */
+#define        SKEIN_T1_BLK_TYPE_MSG   SKEIN_T1_BLK_TYPE(MSG)
+/* output stage */
+#define        SKEIN_T1_BLK_TYPE_OUT   SKEIN_T1_BLK_TYPE(OUT)
+/* field bit mask */
+#define        SKEIN_T1_BLK_TYPE_MASK  SKEIN_T1_BLK_TYPE(MASK)
+
+#define        SKEIN_T1_BLK_TYPE_CFG_FINAL     \
+       (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL)
+#define        SKEIN_T1_BLK_TYPE_OUT_FINAL     \
+       (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL)
+
+#define        SKEIN_VERSION           (1)
+
+#ifndef        SKEIN_ID_STRING_LE      /* allow compile-time personalization */
+#define        SKEIN_ID_STRING_LE      (0x33414853)    /* "SHA3" (little-endian) */
+#endif
+
+#define        SKEIN_MK_64(hi32, lo32) ((lo32) + (((uint64_t)(hi32)) << 32))
+#define        SKEIN_SCHEMA_VER        SKEIN_MK_64(SKEIN_VERSION, SKEIN_ID_STRING_LE)
+#define        SKEIN_KS_PARITY         SKEIN_MK_64(0x1BD11BDA, 0xA9FC1A22)
+
+#define        SKEIN_CFG_STR_LEN       (4*8)
+
+/* bit field definitions in config block treeInfo word */
+#define        SKEIN_CFG_TREE_LEAF_SIZE_POS    (0)
+#define        SKEIN_CFG_TREE_NODE_SIZE_POS    (8)
+#define        SKEIN_CFG_TREE_MAX_LEVEL_POS    (16)
+
+#define        SKEIN_CFG_TREE_LEAF_SIZE_MSK    \
+       (((uint64_t)0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS)
+#define        SKEIN_CFG_TREE_NODE_SIZE_MSK    \
+       (((uint64_t)0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS)
+#define        SKEIN_CFG_TREE_MAX_LEVEL_MSK    \
+       (((uint64_t)0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS)
+
+#define        SKEIN_CFG_TREE_INFO(leaf, node, maxLvl)                 \
+       ((((uint64_t)(leaf)) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \
+       (((uint64_t)(node)) << SKEIN_CFG_TREE_NODE_SIZE_POS) |  \
+       (((uint64_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS))
+
+/* use as treeInfo in InitExt() call for sequential processing */
+#define        SKEIN_CFG_TREE_INFO_SEQUENTIAL  SKEIN_CFG_TREE_INFO(0, 0, 0)
+
+/*
+ * Skein macros for getting/setting tweak words, etc.
+ * These are useful for partial input bytes, hash tree init/update, etc.
+ */
+#define        Skein_Get_Tweak(ctxPtr, TWK_NUM)        ((ctxPtr)->h.T[TWK_NUM])
+#define        Skein_Set_Tweak(ctxPtr, TWK_NUM, tVal)          \
+       do {                                            \
+               (ctxPtr)->h.T[TWK_NUM] = (tVal);        \
+               _NOTE(CONSTCOND)                        \
+       } while (0)
+
+#define        Skein_Get_T0(ctxPtr)            Skein_Get_Tweak(ctxPtr, 0)
+#define        Skein_Get_T1(ctxPtr)            Skein_Get_Tweak(ctxPtr, 1)
+#define        Skein_Set_T0(ctxPtr, T0)        Skein_Set_Tweak(ctxPtr, 0, T0)
+#define        Skein_Set_T1(ctxPtr, T1)        Skein_Set_Tweak(ctxPtr, 1, T1)
+
+/* set both tweak words at once */
+#define        Skein_Set_T0_T1(ctxPtr, T0, T1)         \
+       do {                                    \
+               Skein_Set_T0(ctxPtr, (T0));     \
+               Skein_Set_T1(ctxPtr, (T1));     \
+               _NOTE(CONSTCOND)                \
+       } while (0)
+
+#define        Skein_Set_Type(ctxPtr, BLK_TYPE)        \
+       Skein_Set_T1(ctxPtr, SKEIN_T1_BLK_TYPE_##BLK_TYPE)
+
+/*
+ * set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0;
+ */
+#define        Skein_Start_New_Type(ctxPtr, BLK_TYPE)                          \
+       do {                                                            \
+               Skein_Set_T0_T1(ctxPtr, 0, SKEIN_T1_FLAG_FIRST |        \
+                   SKEIN_T1_BLK_TYPE_ ## BLK_TYPE);                    \
+               (ctxPtr)->h.bCnt = 0;   \
+               _NOTE(CONSTCOND)                                        \
+       } while (0)
+
+#define        Skein_Clear_First_Flag(hdr)                                     \
+       do {                                                            \
+               (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST;                     \
+               _NOTE(CONSTCOND)                                        \
+       } while (0)
+#define        Skein_Set_Bit_Pad_Flag(hdr)                                     \
+       do {                                                            \
+               (hdr).T[1] |=  SKEIN_T1_FLAG_BIT_PAD;                   \
+               _NOTE(CONSTCOND)                                        \
+       } while (0)
+
+#define        Skein_Set_Tree_Level(hdr, height)                               \
+       do {                                                            \
+               (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height);              \
+               _NOTE(CONSTCOND)                                        \
+       } while (0)
+
+/*
+ * "Internal" Skein definitions for debugging and error checking
+ * Note: in Illumos we always disable debugging features.
+ */
+#define        Skein_Show_Block(bits, ctx, X, blkPtr, wPtr, ksEvenPtr, ksOddPtr)
+#define        Skein_Show_Round(bits, ctx, r, X)
+#define        Skein_Show_R_Ptr(bits, ctx, r, X_ptr)
+#define        Skein_Show_Final(bits, ctx, cnt, outPtr)
+#define        Skein_Show_Key(bits, ctx, key, keyBytes)
+
+/* run-time checks (e.g., bad params, uninitialized context)? */
+#ifndef        SKEIN_ERR_CHECK
+/* default: ignore all Asserts, for performance */
+#define        Skein_Assert(x, retCode)
+#define        Skein_assert(x)
+#elif  defined(SKEIN_ASSERT)
+#include <sys/debug.h>
+#define        Skein_Assert(x, retCode)        ASSERT(x)
+#define        Skein_assert(x)                 ASSERT(x)
+#else
+#include <sys/debug.h>
+/*  caller error */
+#define        Skein_Assert(x, retCode)                \
+       do {                                    \
+               if (!(x))                       \
+                       return (retCode);       \
+               _NOTE(CONSTCOND)                \
+       } while (0)
+/* internal error */
+#define        Skein_assert(x) ASSERT(x)
+#endif
+
+/*
+ * Skein block function constants (shared across Ref and Opt code)
+ */
+enum {
+       /* Skein_256 round rotation constants */
+       R_256_0_0 = 14, R_256_0_1 = 16,
+       R_256_1_0 = 52, R_256_1_1 = 57,
+       R_256_2_0 = 23, R_256_2_1 = 40,
+       R_256_3_0 = 5, R_256_3_1 = 37,
+       R_256_4_0 = 25, R_256_4_1 = 33,
+       R_256_5_0 = 46, R_256_5_1 = 12,
+       R_256_6_0 = 58, R_256_6_1 = 22,
+       R_256_7_0 = 32, R_256_7_1 = 32,
+
+       /* Skein_512 round rotation constants */
+       R_512_0_0 = 46, R_512_0_1 = 36, R_512_0_2 = 19, R_512_0_3 = 37,
+       R_512_1_0 = 33, R_512_1_1 = 27, R_512_1_2 = 14, R_512_1_3 = 42,
+       R_512_2_0 = 17, R_512_2_1 = 49, R_512_2_2 = 36, R_512_2_3 = 39,
+       R_512_3_0 = 44, R_512_3_1 = 9, R_512_3_2 = 54, R_512_3_3 = 56,
+       R_512_4_0 = 39, R_512_4_1 = 30, R_512_4_2 = 34, R_512_4_3 = 24,
+       R_512_5_0 = 13, R_512_5_1 = 50, R_512_5_2 = 10, R_512_5_3 = 17,
+       R_512_6_0 = 25, R_512_6_1 = 29, R_512_6_2 = 39, R_512_6_3 = 43,
+       R_512_7_0 = 8, R_512_7_1 = 35, R_512_7_2 = 56, R_512_7_3 = 22,
+
+       /* Skein1024 round rotation constants */
+       R1024_0_0 = 24, R1024_0_1 = 13, R1024_0_2 = 8, R1024_0_3 =
+           47, R1024_0_4 = 8, R1024_0_5 = 17, R1024_0_6 = 22, R1024_0_7 = 37,
+       R1024_1_0 = 38, R1024_1_1 = 19, R1024_1_2 = 10, R1024_1_3 =
+           55, R1024_1_4 = 49, R1024_1_5 = 18, R1024_1_6 = 23, R1024_1_7 = 52,
+       R1024_2_0 = 33, R1024_2_1 = 4, R1024_2_2 = 51, R1024_2_3 =
+           13, R1024_2_4 = 34, R1024_2_5 = 41, R1024_2_6 = 59, R1024_2_7 = 17,
+       R1024_3_0 = 5, R1024_3_1 = 20, R1024_3_2 = 48, R1024_3_3 =
+           41, R1024_3_4 = 47, R1024_3_5 = 28, R1024_3_6 = 16, R1024_3_7 = 25,
+       R1024_4_0 = 41, R1024_4_1 = 9, R1024_4_2 = 37, R1024_4_3 =
+           31, R1024_4_4 = 12, R1024_4_5 = 47, R1024_4_6 = 44, R1024_4_7 = 30,
+       R1024_5_0 = 16, R1024_5_1 = 34, R1024_5_2 = 56, R1024_5_3 =
+           51, R1024_5_4 = 4, R1024_5_5 = 53, R1024_5_6 = 42, R1024_5_7 = 41,
+       R1024_6_0 = 31, R1024_6_1 = 44, R1024_6_2 = 47, R1024_6_3 =
+           46, R1024_6_4 = 19, R1024_6_5 = 42, R1024_6_6 = 44, R1024_6_7 = 25,
+       R1024_7_0 = 9, R1024_7_1 = 48, R1024_7_2 = 35, R1024_7_3 =
+           52, R1024_7_4 = 23, R1024_7_5 = 31, R1024_7_6 = 37, R1024_7_7 = 20
+};
+
+/* number of rounds for the different block sizes */
+#define        SKEIN_256_ROUNDS_TOTAL  (72)
+#define        SKEIN_512_ROUNDS_TOTAL  (72)
+#define        SKEIN1024_ROUNDS_TOTAL  (80)
+
+
+extern const uint64_t SKEIN_256_IV_128[];
+extern const uint64_t SKEIN_256_IV_160[];
+extern const uint64_t SKEIN_256_IV_224[];
+extern const uint64_t SKEIN_256_IV_256[];
+extern const uint64_t SKEIN_512_IV_128[];
+extern const uint64_t SKEIN_512_IV_160[];
+extern const uint64_t SKEIN_512_IV_224[];
+extern const uint64_t SKEIN_512_IV_256[];
+extern const uint64_t SKEIN_512_IV_384[];
+extern const uint64_t SKEIN_512_IV_512[];
+extern const uint64_t SKEIN1024_IV_384[];
+extern const uint64_t SKEIN1024_IV_512[];
+extern const uint64_t SKEIN1024_IV_1024[];
+
+#endif /* _SKEIN_IMPL_H_ */
diff --git a/module/icp/algs/skein/skein_iv.c b/module/icp/algs/skein/skein_iv.c
new file mode 100644 (file)
index 0000000..140d38f
--- /dev/null
@@ -0,0 +1,185 @@
+/*
+ * Pre-computed Skein IVs
+ *
+ * NOTE: these values are not "magic" constants, but
+ * are generated using the Threefish block function.
+ * They are pre-computed here only for speed; i.e., to
+ * avoid the need for a Threefish call during Init().
+ *
+ * The IV for any fixed hash length may be pre-computed.
+ * Only the most common values are included here.
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+/*
+ * Illumos implementation note: these constants are for Skein v1.3 as per:
+ * http://www.skein-hash.info/sites/default/files/skein1.3.pdf
+ */
+
+#include <sys/skein.h>         /* get Skein macros and types */
+#include "skein_impl.h"                /* get internal definitions */
+
+#define        MK_64 SKEIN_MK_64
+
+/* blkSize =  256 bits. hashSize =  128 bits */
+const uint64_t SKEIN_256_IV_128[] = {
+       MK_64(0xE1111906, 0x964D7260),
+       MK_64(0x883DAAA7, 0x7C8D811C),
+       MK_64(0x10080DF4, 0x91960F7A),
+       MK_64(0xCCF7DDE5, 0xB45BC1C2)
+};
+
+/* blkSize =  256 bits. hashSize =  160 bits */
+const uint64_t SKEIN_256_IV_160[] = {
+       MK_64(0x14202314, 0x72825E98),
+       MK_64(0x2AC4E9A2, 0x5A77E590),
+       MK_64(0xD47A5856, 0x8838D63E),
+       MK_64(0x2DD2E496, 0x8586AB7D)
+};
+
+/* blkSize =  256 bits. hashSize =  224 bits */
+const uint64_t SKEIN_256_IV_224[] = {
+       MK_64(0xC6098A8C, 0x9AE5EA0B),
+       MK_64(0x876D5686, 0x08C5191C),
+       MK_64(0x99CB88D7, 0xD7F53884),
+       MK_64(0x384BDDB1, 0xAEDDB5DE)
+};
+
+/* blkSize =  256 bits. hashSize =  256 bits */
+const uint64_t SKEIN_256_IV_256[] = {
+       MK_64(0xFC9DA860, 0xD048B449),
+       MK_64(0x2FCA6647, 0x9FA7D833),
+       MK_64(0xB33BC389, 0x6656840F),
+       MK_64(0x6A54E920, 0xFDE8DA69)
+};
+
+/* blkSize =  512 bits. hashSize =  128 bits */
+const uint64_t SKEIN_512_IV_128[] = {
+       MK_64(0xA8BC7BF3, 0x6FBF9F52),
+       MK_64(0x1E9872CE, 0xBD1AF0AA),
+       MK_64(0x309B1790, 0xB32190D3),
+       MK_64(0xBCFBB854, 0x3F94805C),
+       MK_64(0x0DA61BCD, 0x6E31B11B),
+       MK_64(0x1A18EBEA, 0xD46A32E3),
+       MK_64(0xA2CC5B18, 0xCE84AA82),
+       MK_64(0x6982AB28, 0x9D46982D)
+};
+
+/* blkSize =  512 bits. hashSize =  160 bits */
+const uint64_t SKEIN_512_IV_160[] = {
+       MK_64(0x28B81A2A, 0xE013BD91),
+       MK_64(0xC2F11668, 0xB5BDF78F),
+       MK_64(0x1760D8F3, 0xF6A56F12),
+       MK_64(0x4FB74758, 0x8239904F),
+       MK_64(0x21EDE07F, 0x7EAF5056),
+       MK_64(0xD908922E, 0x63ED70B8),
+       MK_64(0xB8EC76FF, 0xECCB52FA),
+       MK_64(0x01A47BB8, 0xA3F27A6E)
+};
+
+/* blkSize =  512 bits. hashSize =  224 bits */
+const uint64_t SKEIN_512_IV_224[] = {
+       MK_64(0xCCD06162, 0x48677224),
+       MK_64(0xCBA65CF3, 0xA92339EF),
+       MK_64(0x8CCD69D6, 0x52FF4B64),
+       MK_64(0x398AED7B, 0x3AB890B4),
+       MK_64(0x0F59D1B1, 0x457D2BD0),
+       MK_64(0x6776FE65, 0x75D4EB3D),
+       MK_64(0x99FBC70E, 0x997413E9),
+       MK_64(0x9E2CFCCF, 0xE1C41EF7)
+};
+
+/* blkSize =  512 bits. hashSize =  256 bits */
+const uint64_t SKEIN_512_IV_256[] = {
+       MK_64(0xCCD044A1, 0x2FDB3E13),
+       MK_64(0xE8359030, 0x1A79A9EB),
+       MK_64(0x55AEA061, 0x4F816E6F),
+       MK_64(0x2A2767A4, 0xAE9B94DB),
+       MK_64(0xEC06025E, 0x74DD7683),
+       MK_64(0xE7A436CD, 0xC4746251),
+       MK_64(0xC36FBAF9, 0x393AD185),
+       MK_64(0x3EEDBA18, 0x33EDFC13)
+};
+
+/* blkSize =  512 bits. hashSize =  384 bits */
+const uint64_t SKEIN_512_IV_384[] = {
+       MK_64(0xA3F6C6BF, 0x3A75EF5F),
+       MK_64(0xB0FEF9CC, 0xFD84FAA4),
+       MK_64(0x9D77DD66, 0x3D770CFE),
+       MK_64(0xD798CBF3, 0xB468FDDA),
+       MK_64(0x1BC4A666, 0x8A0E4465),
+       MK_64(0x7ED7D434, 0xE5807407),
+       MK_64(0x548FC1AC, 0xD4EC44D6),
+       MK_64(0x266E1754, 0x6AA18FF8)
+};
+
+/* blkSize =  512 bits. hashSize =  512 bits */
+const uint64_t SKEIN_512_IV_512[] = {
+       MK_64(0x4903ADFF, 0x749C51CE),
+       MK_64(0x0D95DE39, 0x9746DF03),
+       MK_64(0x8FD19341, 0x27C79BCE),
+       MK_64(0x9A255629, 0xFF352CB1),
+       MK_64(0x5DB62599, 0xDF6CA7B0),
+       MK_64(0xEABE394C, 0xA9D5C3F4),
+       MK_64(0x991112C7, 0x1A75B523),
+       MK_64(0xAE18A40B, 0x660FCC33)
+};
+
+/* blkSize = 1024 bits. hashSize =  384 bits */
+const uint64_t SKEIN1024_IV_384[] = {
+       MK_64(0x5102B6B8, 0xC1894A35),
+       MK_64(0xFEEBC9E3, 0xFE8AF11A),
+       MK_64(0x0C807F06, 0xE32BED71),
+       MK_64(0x60C13A52, 0xB41A91F6),
+       MK_64(0x9716D35D, 0xD4917C38),
+       MK_64(0xE780DF12, 0x6FD31D3A),
+       MK_64(0x797846B6, 0xC898303A),
+       MK_64(0xB172C2A8, 0xB3572A3B),
+       MK_64(0xC9BC8203, 0xA6104A6C),
+       MK_64(0x65909338, 0xD75624F4),
+       MK_64(0x94BCC568, 0x4B3F81A0),
+       MK_64(0x3EBBF51E, 0x10ECFD46),
+       MK_64(0x2DF50F0B, 0xEEB08542),
+       MK_64(0x3B5A6530, 0x0DBC6516),
+       MK_64(0x484B9CD2, 0x167BBCE1),
+       MK_64(0x2D136947, 0xD4CBAFEA)
+};
+
+/* blkSize = 1024 bits. hashSize =  512 bits */
+const uint64_t SKEIN1024_IV_512[] = {
+       MK_64(0xCAEC0E5D, 0x7C1B1B18),
+       MK_64(0xA01B0E04, 0x5F03E802),
+       MK_64(0x33840451, 0xED912885),
+       MK_64(0x374AFB04, 0xEAEC2E1C),
+       MK_64(0xDF25A0E2, 0x813581F7),
+       MK_64(0xE4004093, 0x8B12F9D2),
+       MK_64(0xA662D539, 0xC2ED39B6),
+       MK_64(0xFA8B85CF, 0x45D8C75A),
+       MK_64(0x8316ED8E, 0x29EDE796),
+       MK_64(0x053289C0, 0x2E9F91B8),
+       MK_64(0xC3F8EF1D, 0x6D518B73),
+       MK_64(0xBDCEC3C4, 0xD5EF332E),
+       MK_64(0x549A7E52, 0x22974487),
+       MK_64(0x67070872, 0x5B749816),
+       MK_64(0xB9CD28FB, 0xF0581BD1),
+       MK_64(0x0E2940B8, 0x15804974)
+};
+
+/* blkSize = 1024 bits. hashSize = 1024 bits */
+const uint64_t SKEIN1024_IV_1024[] = {
+       MK_64(0xD593DA07, 0x41E72355),
+       MK_64(0x15B5E511, 0xAC73E00C),
+       MK_64(0x5180E5AE, 0xBAF2C4F0),
+       MK_64(0x03BD41D3, 0xFCBCAFAF),
+       MK_64(0x1CAEC6FD, 0x1983A898),
+       MK_64(0x6E510B8B, 0xCDD0589F),
+       MK_64(0x77E2BDFD, 0xC6394ADA),
+       MK_64(0xC11E1DB5, 0x24DCB0A3),
+       MK_64(0xD6D14AF9, 0xC6329AB5),
+       MK_64(0x6A9B0BFC, 0x6EB67E0D),
+       MK_64(0x9243C60D, 0xCCFF1332),
+       MK_64(0x1A1F1DDE, 0x743F02D4),
+       MK_64(0x0996753C, 0x10ED0BB8),
+       MK_64(0x6572DD22, 0xF2B4969A),
+       MK_64(0x61FD3062, 0xD00A579A),
+       MK_64(0x1DE0536E, 0x8682E539)
+};
diff --git a/module/icp/algs/skein/skein_port.h b/module/icp/algs/skein/skein_port.h
new file mode 100644 (file)
index 0000000..1b02252
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+ * Platform-specific definitions for Skein hash function.
+ *
+ * Source code author: Doug Whiting, 2008.
+ *
+ * This algorithm and source code is released to the public domain.
+ *
+ * Many thanks to Brian Gladman for his portable header files.
+ *
+ * To port Skein to an "unsupported" platform, change the definitions
+ * in this file appropriately.
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#ifndef        _SKEIN_PORT_H_
+#define        _SKEIN_PORT_H_
+
+#include <sys/types.h> /* get integer type definitions */
+#include <sys/systm.h> /* for bcopy() */
+
+#ifndef        RotL_64
+#define        RotL_64(x, N)   (((x) << (N)) | ((x) >> (64 - (N))))
+#endif
+
+/*
+ * Skein is "natively" little-endian (unlike SHA-xxx), for optimal
+ * performance on x86 CPUs. The Skein code requires the following
+ * definitions for dealing with endianness:
+ *
+ *    SKEIN_NEED_SWAP:  0 for little-endian, 1 for big-endian
+ *    Skein_Put64_LSB_First
+ *    Skein_Get64_LSB_First
+ *    Skein_Swap64
+ *
+ * If SKEIN_NEED_SWAP is defined at compile time, it is used here
+ * along with the portable versions of Put64/Get64/Swap64, which
+ * are slow in general.
+ *
+ * Otherwise, an "auto-detect" of endianness is attempted below.
+ * If the default handling doesn't work well, the user may insert
+ * platform-specific code instead (e.g., for big-endian CPUs).
+ *
+ */
+#ifndef        SKEIN_NEED_SWAP         /* compile-time "override" for endianness? */
+
+#include <sys/isa_defs.h>      /* get endianness selection */
+
+#define        PLATFORM_MUST_ALIGN     _ALIGNMENT_REQUIRED
+#if    defined(_BIG_ENDIAN)
+/* here for big-endian CPUs */
+#define        SKEIN_NEED_SWAP   (1)
+#else
+/* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
+#define        SKEIN_NEED_SWAP   (0)
+#if    PLATFORM_MUST_ALIGN == 0        /* ok to use "fast" versions? */
+#define        Skein_Put64_LSB_First(dst08, src64, bCnt) bcopy(src64, dst08, bCnt)
+#define        Skein_Get64_LSB_First(dst64, src08, wCnt) \
+       bcopy(src08, dst64, 8 * (wCnt))
+#endif
+#endif
+
+#endif                         /* ifndef SKEIN_NEED_SWAP */
+
+/*
+ * Provide any definitions still needed.
+ */
+#ifndef        Skein_Swap64    /* swap for big-endian, nop for little-endian */
+#if    SKEIN_NEED_SWAP
+#define        Skein_Swap64(w64)                               \
+       (((((uint64_t)(w64)) & 0xFF) << 56) |           \
+       (((((uint64_t)(w64)) >> 8) & 0xFF) << 48) |     \
+       (((((uint64_t)(w64)) >> 16) & 0xFF) << 40) |    \
+       (((((uint64_t)(w64)) >> 24) & 0xFF) << 32) |    \
+       (((((uint64_t)(w64)) >> 32) & 0xFF) << 24) |    \
+       (((((uint64_t)(w64)) >> 40) & 0xFF) << 16) |    \
+       (((((uint64_t)(w64)) >> 48) & 0xFF) << 8) |     \
+       (((((uint64_t)(w64)) >> 56) & 0xFF)))
+#else
+#define        Skein_Swap64(w64)  (w64)
+#endif
+#endif                         /* ifndef Skein_Swap64 */
+
+#ifndef        Skein_Put64_LSB_First
+void
+Skein_Put64_LSB_First(uint8_t *dst, const uint64_t *src, size_t bCnt)
+#ifdef SKEIN_PORT_CODE         /* instantiate the function code here? */
+{
+       /*
+        * this version is fully portable (big-endian or little-endian),
+        * but slow
+        */
+       size_t n;
+
+       for (n = 0; n < bCnt; n++)
+               dst[n] = (uint8_t)(src[n >> 3] >> (8 * (n & 7)));
+}
+#else
+;                              /* output only the function prototype */
+#endif
+#endif                         /* ifndef Skein_Put64_LSB_First */
+
+#ifndef        Skein_Get64_LSB_First
+void
+Skein_Get64_LSB_First(uint64_t *dst, const uint8_t *src, size_t wCnt)
+#ifdef SKEIN_PORT_CODE         /* instantiate the function code here? */
+{
+       /*
+        * this version is fully portable (big-endian or little-endian),
+        * but slow
+        */
+       size_t n;
+
+       for (n = 0; n < 8 * wCnt; n += 8)
+               dst[n / 8] = (((uint64_t)src[n])) +
+                   (((uint64_t)src[n + 1]) << 8) +
+                   (((uint64_t)src[n + 2]) << 16) +
+                   (((uint64_t)src[n + 3]) << 24) +
+                   (((uint64_t)src[n + 4]) << 32) +
+                   (((uint64_t)src[n + 5]) << 40) +
+                   (((uint64_t)src[n + 6]) << 48) +
+                   (((uint64_t)src[n + 7]) << 56);
+}
+#else
+;                              /* output only the function prototype */
+#endif
+#endif                         /* ifndef Skein_Get64_LSB_First */
+
+#endif /* _SKEIN_PORT_H_ */
index b689c9022566fc5cbf70120df31fcd2dd0a7edc1..d55c5eb483f1a043dd41e3c023b02f6cbb468936 100644 (file)
  */
 
 /*
- * This file was generated by a perl script (sha512-x86_64.pl) that could
- * be used to generate sha256 and sha512 variants from the same code base.
- * For our purposes, we only need sha256 and so getting the perl script to
- * run as part of the build process seemed superfluous. The comments from
- * the original file have been pasted above.
+ * This file was generated by a perl script (sha512-x86_64.pl) that were 
+ * used to generate sha256 and sha512 variants from the same code base.
+ * The comments from the original file have been pasted above.
  */
 
 #if defined(lint) || defined(__lint)
diff --git a/module/icp/asm-x86_64/sha2/sha512_impl.S b/module/icp/asm-x86_64/sha2/sha512_impl.S
new file mode 100644 (file)
index 0000000..24a4174
--- /dev/null
@@ -0,0 +1,2083 @@
+/*
+ * ====================================================================
+ * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+ * project. Rights for redistribution and usage in source and binary
+ * forms are granted according to the OpenSSL license.
+ * ====================================================================
+ *
+ * sha256/512_block procedure for x86_64.
+ *
+ * 40% improvement over compiler-generated code on Opteron. On EM64T
+ * sha256 was observed to run >80% faster and sha512 - >40%. No magical
+ * tricks, just straight implementation... I really wonder why gcc
+ * [being armed with inline assembler] fails to generate as fast code.
+ * The only thing which is cool about this module is that it's very
+ * same instruction sequence used for both SHA-256 and SHA-512. In
+ * former case the instructions operate on 32-bit operands, while in
+ * latter - on 64-bit ones. All I had to do is to get one flavor right,
+ * the other one passed the test right away:-)
+ *
+ * sha256_block runs in ~1005 cycles on Opteron, which gives you
+ * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
+ * frequency in GHz. sha512_block runs in ~1275 cycles, which results
+ * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
+ * Well, if you compare it to IA-64 implementation, which maintains
+ * X[16] in register bank[!], tends to 4 instructions per CPU clock
+ * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
+ * issue Opteron pipeline and X[16] maintained in memory. So that *if*
+ * there is a way to improve it, *then* the only way would be to try to
+ * offload X[16] updates to SSE unit, but that would require "deeper"
+ * loop unroll, which in turn would naturally cause size blow-up, not
+ * to mention increased complexity! And once again, only *if* it's
+ * actually possible to noticeably improve overall ILP, instruction
+ * level parallelism, on a given CPU implementation in this case.
+ *
+ * Special note on Intel EM64T. While Opteron CPU exhibits perfect
+ * perfromance ratio of 1.5 between 64- and 32-bit flavors [see above],
+ * [currently available] EM64T CPUs apparently are far from it. On the
+ * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
+ * sha256_block:-( This is presumably because 64-bit shifts/rotates
+ * apparently are not atomic instructions, but implemented in microcode.
+ */
+
+/*
+ * OpenSolaris OS modifications
+ *
+ * Sun elects to use this software under the BSD license.
+ *
+ * This source originates from OpenSSL file sha512-x86_64.pl at
+ * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
+ * (presumably for future OpenSSL release 0.9.8h), with these changes:
+ *
+ * 1. Added perl "use strict" and declared variables.
+ *
+ * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
+ *
+ * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
+ * assemblers).  Replaced the .picmeup macro with assembler code.
+ *
+ * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
+ * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
+ */
+
+/*
+ * This file was generated by a perl script (sha512-x86_64.pl) that were
+ * used to generate sha256 and sha512 variants from the same code base.
+ * The comments from the original file have been pasted above.
+ */
+
+
+#if defined(lint) || defined(__lint)
+#include <sys/stdint.h>
+#include <sha2/sha2.h>
+
+/* ARGSUSED */
+void
+SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
+{
+}
+
+
+#else
+#define _ASM
+#include <sys/asm_linkage.h>
+
+ENTRY_NP(SHA512TransformBlocks)
+       push    %rbx
+       push    %rbp
+       push    %r12
+       push    %r13
+       push    %r14
+       push    %r15
+       mov     %rsp,%rbp               # copy %rsp
+       shl     $4,%rdx         # num*16
+       sub     $16*8+4*8,%rsp
+       lea     (%rsi,%rdx,8),%rdx      # inp+num*16*8
+       and     $-64,%rsp               # align stack frame
+       add     $8,%rdi         # Skip OpenSolaris field, "algotype"
+       mov     %rdi,16*8+0*8(%rsp)             # save ctx, 1st arg
+       mov     %rsi,16*8+1*8(%rsp)             # save inp, 2nd arg
+       mov     %rdx,16*8+2*8(%rsp)             # save end pointer, "3rd" arg
+       mov     %rbp,16*8+3*8(%rsp)             # save copy of %rsp
+
+       /.picmeup %rbp
+       / The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
+       / the address of the "next" instruction into the target register
+       / (%rbp).  This generates these 2 instructions:
+       lea     .Llea(%rip),%rbp
+       /nop    / .picmeup generates a nop for mod 8 alignment--not needed here
+
+.Llea:
+       lea     K512-.(%rbp),%rbp
+
+       mov     8*0(%rdi),%rax
+       mov     8*1(%rdi),%rbx
+       mov     8*2(%rdi),%rcx
+       mov     8*3(%rdi),%rdx
+       mov     8*4(%rdi),%r8
+       mov     8*5(%rdi),%r9
+       mov     8*6(%rdi),%r10
+       mov     8*7(%rdi),%r11
+       jmp     .Lloop
+
+.align 16
+.Lloop:
+       xor     %rdi,%rdi
+       mov     8*0(%rsi),%r12
+       bswap   %r12
+       mov     %r8,%r13
+       mov     %r8,%r14
+       mov     %r9,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r10,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r8,%r15                        # (f^g)&e
+       mov     %r12,0(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r10,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %r11,%r12                       # T1+=h
+
+       mov     %rax,%r11
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rax,%r13
+       mov     %rax,%r14
+
+       ror     $28,%r11
+       ror     $34,%r13
+       mov     %rax,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r11
+       ror     $5,%r13
+       or      %rcx,%r14                       # a|c
+
+       xor     %r13,%r11                       # h=Sigma0(a)
+       and     %rcx,%r15                       # a&c
+       add     %r12,%rdx                       # d+=T1
+
+       and     %rbx,%r14                       # (a|c)&b
+       add     %r12,%r11                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r11                       # h+=Maj(a,b,c)
+       mov     8*1(%rsi),%r12
+       bswap   %r12
+       mov     %rdx,%r13
+       mov     %rdx,%r14
+       mov     %r8,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r9,%r15                        # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rdx,%r15                       # (f^g)&e
+       mov     %r12,8(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r9,%r15                        # Ch(e,f,g)=((f^g)&e)^g
+       add     %r10,%r12                       # T1+=h
+
+       mov     %r11,%r10
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r11,%r13
+       mov     %r11,%r14
+
+       ror     $28,%r10
+       ror     $34,%r13
+       mov     %r11,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r10
+       ror     $5,%r13
+       or      %rbx,%r14                       # a|c
+
+       xor     %r13,%r10                       # h=Sigma0(a)
+       and     %rbx,%r15                       # a&c
+       add     %r12,%rcx                       # d+=T1
+
+       and     %rax,%r14                       # (a|c)&b
+       add     %r12,%r10                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r10                       # h+=Maj(a,b,c)
+       mov     8*2(%rsi),%r12
+       bswap   %r12
+       mov     %rcx,%r13
+       mov     %rcx,%r14
+       mov     %rdx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r8,%r15                        # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rcx,%r15                       # (f^g)&e
+       mov     %r12,16(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r8,%r15                        # Ch(e,f,g)=((f^g)&e)^g
+       add     %r9,%r12                        # T1+=h
+
+       mov     %r10,%r9
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r10,%r13
+       mov     %r10,%r14
+
+       ror     $28,%r9
+       ror     $34,%r13
+       mov     %r10,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r9
+       ror     $5,%r13
+       or      %rax,%r14                       # a|c
+
+       xor     %r13,%r9                        # h=Sigma0(a)
+       and     %rax,%r15                       # a&c
+       add     %r12,%rbx                       # d+=T1
+
+       and     %r11,%r14                       # (a|c)&b
+       add     %r12,%r9                        # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r9                        # h+=Maj(a,b,c)
+       mov     8*3(%rsi),%r12
+       bswap   %r12
+       mov     %rbx,%r13
+       mov     %rbx,%r14
+       mov     %rcx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rdx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rbx,%r15                       # (f^g)&e
+       mov     %r12,24(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rdx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %r8,%r12                        # T1+=h
+
+       mov     %r9,%r8
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r9,%r13
+       mov     %r9,%r14
+
+       ror     $28,%r8
+       ror     $34,%r13
+       mov     %r9,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r8
+       ror     $5,%r13
+       or      %r11,%r14                       # a|c
+
+       xor     %r13,%r8                        # h=Sigma0(a)
+       and     %r11,%r15                       # a&c
+       add     %r12,%rax                       # d+=T1
+
+       and     %r10,%r14                       # (a|c)&b
+       add     %r12,%r8                        # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r8                        # h+=Maj(a,b,c)
+       mov     8*4(%rsi),%r12
+       bswap   %r12
+       mov     %rax,%r13
+       mov     %rax,%r14
+       mov     %rbx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rcx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rax,%r15                       # (f^g)&e
+       mov     %r12,32(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rcx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rdx,%r12                       # T1+=h
+
+       mov     %r8,%rdx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r8,%r13
+       mov     %r8,%r14
+
+       ror     $28,%rdx
+       ror     $34,%r13
+       mov     %r8,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rdx
+       ror     $5,%r13
+       or      %r10,%r14                       # a|c
+
+       xor     %r13,%rdx                       # h=Sigma0(a)
+       and     %r10,%r15                       # a&c
+       add     %r12,%r11                       # d+=T1
+
+       and     %r9,%r14                        # (a|c)&b
+       add     %r12,%rdx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rdx                       # h+=Maj(a,b,c)
+       mov     8*5(%rsi),%r12
+       bswap   %r12
+       mov     %r11,%r13
+       mov     %r11,%r14
+       mov     %rax,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rbx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r11,%r15                       # (f^g)&e
+       mov     %r12,40(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rbx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rcx,%r12                       # T1+=h
+
+       mov     %rdx,%rcx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rdx,%r13
+       mov     %rdx,%r14
+
+       ror     $28,%rcx
+       ror     $34,%r13
+       mov     %rdx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rcx
+       ror     $5,%r13
+       or      %r9,%r14                        # a|c
+
+       xor     %r13,%rcx                       # h=Sigma0(a)
+       and     %r9,%r15                        # a&c
+       add     %r12,%r10                       # d+=T1
+
+       and     %r8,%r14                        # (a|c)&b
+       add     %r12,%rcx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rcx                       # h+=Maj(a,b,c)
+       mov     8*6(%rsi),%r12
+       bswap   %r12
+       mov     %r10,%r13
+       mov     %r10,%r14
+       mov     %r11,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rax,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r10,%r15                       # (f^g)&e
+       mov     %r12,48(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rax,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rbx,%r12                       # T1+=h
+
+       mov     %rcx,%rbx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rcx,%r13
+       mov     %rcx,%r14
+
+       ror     $28,%rbx
+       ror     $34,%r13
+       mov     %rcx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rbx
+       ror     $5,%r13
+       or      %r8,%r14                        # a|c
+
+       xor     %r13,%rbx                       # h=Sigma0(a)
+       and     %r8,%r15                        # a&c
+       add     %r12,%r9                        # d+=T1
+
+       and     %rdx,%r14                       # (a|c)&b
+       add     %r12,%rbx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rbx                       # h+=Maj(a,b,c)
+       mov     8*7(%rsi),%r12
+       bswap   %r12
+       mov     %r9,%r13
+       mov     %r9,%r14
+       mov     %r10,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r11,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r9,%r15                        # (f^g)&e
+       mov     %r12,56(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r11,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rax,%r12                       # T1+=h
+
+       mov     %rbx,%rax
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rbx,%r13
+       mov     %rbx,%r14
+
+       ror     $28,%rax
+       ror     $34,%r13
+       mov     %rbx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rax
+       ror     $5,%r13
+       or      %rdx,%r14                       # a|c
+
+       xor     %r13,%rax                       # h=Sigma0(a)
+       and     %rdx,%r15                       # a&c
+       add     %r12,%r8                        # d+=T1
+
+       and     %rcx,%r14                       # (a|c)&b
+       add     %r12,%rax                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rax                       # h+=Maj(a,b,c)
+       mov     8*8(%rsi),%r12
+       bswap   %r12
+       mov     %r8,%r13
+       mov     %r8,%r14
+       mov     %r9,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r10,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r8,%r15                        # (f^g)&e
+       mov     %r12,64(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r10,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %r11,%r12                       # T1+=h
+
+       mov     %rax,%r11
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rax,%r13
+       mov     %rax,%r14
+
+       ror     $28,%r11
+       ror     $34,%r13
+       mov     %rax,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r11
+       ror     $5,%r13
+       or      %rcx,%r14                       # a|c
+
+       xor     %r13,%r11                       # h=Sigma0(a)
+       and     %rcx,%r15                       # a&c
+       add     %r12,%rdx                       # d+=T1
+
+       and     %rbx,%r14                       # (a|c)&b
+       add     %r12,%r11                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r11                       # h+=Maj(a,b,c)
+       mov     8*9(%rsi),%r12
+       bswap   %r12
+       mov     %rdx,%r13
+       mov     %rdx,%r14
+       mov     %r8,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r9,%r15                        # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rdx,%r15                       # (f^g)&e
+       mov     %r12,72(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r9,%r15                        # Ch(e,f,g)=((f^g)&e)^g
+       add     %r10,%r12                       # T1+=h
+
+       mov     %r11,%r10
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r11,%r13
+       mov     %r11,%r14
+
+       ror     $28,%r10
+       ror     $34,%r13
+       mov     %r11,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r10
+       ror     $5,%r13
+       or      %rbx,%r14                       # a|c
+
+       xor     %r13,%r10                       # h=Sigma0(a)
+       and     %rbx,%r15                       # a&c
+       add     %r12,%rcx                       # d+=T1
+
+       and     %rax,%r14                       # (a|c)&b
+       add     %r12,%r10                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r10                       # h+=Maj(a,b,c)
+       mov     8*10(%rsi),%r12
+       bswap   %r12
+       mov     %rcx,%r13
+       mov     %rcx,%r14
+       mov     %rdx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r8,%r15                        # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rcx,%r15                       # (f^g)&e
+       mov     %r12,80(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r8,%r15                        # Ch(e,f,g)=((f^g)&e)^g
+       add     %r9,%r12                        # T1+=h
+
+       mov     %r10,%r9
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r10,%r13
+       mov     %r10,%r14
+
+       ror     $28,%r9
+       ror     $34,%r13
+       mov     %r10,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r9
+       ror     $5,%r13
+       or      %rax,%r14                       # a|c
+
+       xor     %r13,%r9                        # h=Sigma0(a)
+       and     %rax,%r15                       # a&c
+       add     %r12,%rbx                       # d+=T1
+
+       and     %r11,%r14                       # (a|c)&b
+       add     %r12,%r9                        # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r9                        # h+=Maj(a,b,c)
+       mov     8*11(%rsi),%r12
+       bswap   %r12
+       mov     %rbx,%r13
+       mov     %rbx,%r14
+       mov     %rcx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rdx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rbx,%r15                       # (f^g)&e
+       mov     %r12,88(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rdx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %r8,%r12                        # T1+=h
+
+       mov     %r9,%r8
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r9,%r13
+       mov     %r9,%r14
+
+       ror     $28,%r8
+       ror     $34,%r13
+       mov     %r9,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r8
+       ror     $5,%r13
+       or      %r11,%r14                       # a|c
+
+       xor     %r13,%r8                        # h=Sigma0(a)
+       and     %r11,%r15                       # a&c
+       add     %r12,%rax                       # d+=T1
+
+       and     %r10,%r14                       # (a|c)&b
+       add     %r12,%r8                        # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r8                        # h+=Maj(a,b,c)
+       mov     8*12(%rsi),%r12
+       bswap   %r12
+       mov     %rax,%r13
+       mov     %rax,%r14
+       mov     %rbx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rcx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rax,%r15                       # (f^g)&e
+       mov     %r12,96(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rcx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rdx,%r12                       # T1+=h
+
+       mov     %r8,%rdx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r8,%r13
+       mov     %r8,%r14
+
+       ror     $28,%rdx
+       ror     $34,%r13
+       mov     %r8,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rdx
+       ror     $5,%r13
+       or      %r10,%r14                       # a|c
+
+       xor     %r13,%rdx                       # h=Sigma0(a)
+       and     %r10,%r15                       # a&c
+       add     %r12,%r11                       # d+=T1
+
+       and     %r9,%r14                        # (a|c)&b
+       add     %r12,%rdx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rdx                       # h+=Maj(a,b,c)
+       mov     8*13(%rsi),%r12
+       bswap   %r12
+       mov     %r11,%r13
+       mov     %r11,%r14
+       mov     %rax,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rbx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r11,%r15                       # (f^g)&e
+       mov     %r12,104(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rbx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rcx,%r12                       # T1+=h
+
+       mov     %rdx,%rcx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rdx,%r13
+       mov     %rdx,%r14
+
+       ror     $28,%rcx
+       ror     $34,%r13
+       mov     %rdx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rcx
+       ror     $5,%r13
+       or      %r9,%r14                        # a|c
+
+       xor     %r13,%rcx                       # h=Sigma0(a)
+       and     %r9,%r15                        # a&c
+       add     %r12,%r10                       # d+=T1
+
+       and     %r8,%r14                        # (a|c)&b
+       add     %r12,%rcx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rcx                       # h+=Maj(a,b,c)
+       mov     8*14(%rsi),%r12
+       bswap   %r12
+       mov     %r10,%r13
+       mov     %r10,%r14
+       mov     %r11,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rax,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r10,%r15                       # (f^g)&e
+       mov     %r12,112(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rax,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rbx,%r12                       # T1+=h
+
+       mov     %rcx,%rbx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rcx,%r13
+       mov     %rcx,%r14
+
+       ror     $28,%rbx
+       ror     $34,%r13
+       mov     %rcx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rbx
+       ror     $5,%r13
+       or      %r8,%r14                        # a|c
+
+       xor     %r13,%rbx                       # h=Sigma0(a)
+       and     %r8,%r15                        # a&c
+       add     %r12,%r9                        # d+=T1
+
+       and     %rdx,%r14                       # (a|c)&b
+       add     %r12,%rbx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rbx                       # h+=Maj(a,b,c)
+       mov     8*15(%rsi),%r12
+       bswap   %r12
+       mov     %r9,%r13
+       mov     %r9,%r14
+       mov     %r10,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r11,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r9,%r15                        # (f^g)&e
+       mov     %r12,120(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r11,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rax,%r12                       # T1+=h
+
+       mov     %rbx,%rax
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rbx,%r13
+       mov     %rbx,%r14
+
+       ror     $28,%rax
+       ror     $34,%r13
+       mov     %rbx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rax
+       ror     $5,%r13
+       or      %rdx,%r14                       # a|c
+
+       xor     %r13,%rax                       # h=Sigma0(a)
+       and     %rdx,%r15                       # a&c
+       add     %r12,%r8                        # d+=T1
+
+       and     %rcx,%r14                       # (a|c)&b
+       add     %r12,%rax                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rax                       # h+=Maj(a,b,c)
+       jmp     .Lrounds_16_xx
+.align 16
+.Lrounds_16_xx:
+       mov     8(%rsp),%r13
+       mov     112(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     72(%rsp),%r12
+
+       add     0(%rsp),%r12
+       mov     %r8,%r13
+       mov     %r8,%r14
+       mov     %r9,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r10,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r8,%r15                        # (f^g)&e
+       mov     %r12,0(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r10,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %r11,%r12                       # T1+=h
+
+       mov     %rax,%r11
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rax,%r13
+       mov     %rax,%r14
+
+       ror     $28,%r11
+       ror     $34,%r13
+       mov     %rax,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r11
+       ror     $5,%r13
+       or      %rcx,%r14                       # a|c
+
+       xor     %r13,%r11                       # h=Sigma0(a)
+       and     %rcx,%r15                       # a&c
+       add     %r12,%rdx                       # d+=T1
+
+       and     %rbx,%r14                       # (a|c)&b
+       add     %r12,%r11                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r11                       # h+=Maj(a,b,c)
+       mov     16(%rsp),%r13
+       mov     120(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     80(%rsp),%r12
+
+       add     8(%rsp),%r12
+       mov     %rdx,%r13
+       mov     %rdx,%r14
+       mov     %r8,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r9,%r15                        # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rdx,%r15                       # (f^g)&e
+       mov     %r12,8(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r9,%r15                        # Ch(e,f,g)=((f^g)&e)^g
+       add     %r10,%r12                       # T1+=h
+
+       mov     %r11,%r10
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r11,%r13
+       mov     %r11,%r14
+
+       ror     $28,%r10
+       ror     $34,%r13
+       mov     %r11,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r10
+       ror     $5,%r13
+       or      %rbx,%r14                       # a|c
+
+       xor     %r13,%r10                       # h=Sigma0(a)
+       and     %rbx,%r15                       # a&c
+       add     %r12,%rcx                       # d+=T1
+
+       and     %rax,%r14                       # (a|c)&b
+       add     %r12,%r10                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r10                       # h+=Maj(a,b,c)
+       mov     24(%rsp),%r13
+       mov     0(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     88(%rsp),%r12
+
+       add     16(%rsp),%r12
+       mov     %rcx,%r13
+       mov     %rcx,%r14
+       mov     %rdx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r8,%r15                        # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rcx,%r15                       # (f^g)&e
+       mov     %r12,16(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r8,%r15                        # Ch(e,f,g)=((f^g)&e)^g
+       add     %r9,%r12                        # T1+=h
+
+       mov     %r10,%r9
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r10,%r13
+       mov     %r10,%r14
+
+       ror     $28,%r9
+       ror     $34,%r13
+       mov     %r10,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r9
+       ror     $5,%r13
+       or      %rax,%r14                       # a|c
+
+       xor     %r13,%r9                        # h=Sigma0(a)
+       and     %rax,%r15                       # a&c
+       add     %r12,%rbx                       # d+=T1
+
+       and     %r11,%r14                       # (a|c)&b
+       add     %r12,%r9                        # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r9                        # h+=Maj(a,b,c)
+       mov     32(%rsp),%r13
+       mov     8(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     96(%rsp),%r12
+
+       add     24(%rsp),%r12
+       mov     %rbx,%r13
+       mov     %rbx,%r14
+       mov     %rcx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rdx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rbx,%r15                       # (f^g)&e
+       mov     %r12,24(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rdx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %r8,%r12                        # T1+=h
+
+       mov     %r9,%r8
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r9,%r13
+       mov     %r9,%r14
+
+       ror     $28,%r8
+       ror     $34,%r13
+       mov     %r9,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r8
+       ror     $5,%r13
+       or      %r11,%r14                       # a|c
+
+       xor     %r13,%r8                        # h=Sigma0(a)
+       and     %r11,%r15                       # a&c
+       add     %r12,%rax                       # d+=T1
+
+       and     %r10,%r14                       # (a|c)&b
+       add     %r12,%r8                        # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r8                        # h+=Maj(a,b,c)
+       mov     40(%rsp),%r13
+       mov     16(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     104(%rsp),%r12
+
+       add     32(%rsp),%r12
+       mov     %rax,%r13
+       mov     %rax,%r14
+       mov     %rbx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rcx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rax,%r15                       # (f^g)&e
+       mov     %r12,32(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rcx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rdx,%r12                       # T1+=h
+
+       mov     %r8,%rdx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r8,%r13
+       mov     %r8,%r14
+
+       ror     $28,%rdx
+       ror     $34,%r13
+       mov     %r8,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rdx
+       ror     $5,%r13
+       or      %r10,%r14                       # a|c
+
+       xor     %r13,%rdx                       # h=Sigma0(a)
+       and     %r10,%r15                       # a&c
+       add     %r12,%r11                       # d+=T1
+
+       and     %r9,%r14                        # (a|c)&b
+       add     %r12,%rdx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rdx                       # h+=Maj(a,b,c)
+       mov     48(%rsp),%r13
+       mov     24(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     112(%rsp),%r12
+
+       add     40(%rsp),%r12
+       mov     %r11,%r13
+       mov     %r11,%r14
+       mov     %rax,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rbx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r11,%r15                       # (f^g)&e
+       mov     %r12,40(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rbx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rcx,%r12                       # T1+=h
+
+       mov     %rdx,%rcx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rdx,%r13
+       mov     %rdx,%r14
+
+       ror     $28,%rcx
+       ror     $34,%r13
+       mov     %rdx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rcx
+       ror     $5,%r13
+       or      %r9,%r14                        # a|c
+
+       xor     %r13,%rcx                       # h=Sigma0(a)
+       and     %r9,%r15                        # a&c
+       add     %r12,%r10                       # d+=T1
+
+       and     %r8,%r14                        # (a|c)&b
+       add     %r12,%rcx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rcx                       # h+=Maj(a,b,c)
+       mov     56(%rsp),%r13
+       mov     32(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     120(%rsp),%r12
+
+       add     48(%rsp),%r12
+       mov     %r10,%r13
+       mov     %r10,%r14
+       mov     %r11,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rax,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r10,%r15                       # (f^g)&e
+       mov     %r12,48(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rax,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rbx,%r12                       # T1+=h
+
+       mov     %rcx,%rbx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rcx,%r13
+       mov     %rcx,%r14
+
+       ror     $28,%rbx
+       ror     $34,%r13
+       mov     %rcx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rbx
+       ror     $5,%r13
+       or      %r8,%r14                        # a|c
+
+       xor     %r13,%rbx                       # h=Sigma0(a)
+       and     %r8,%r15                        # a&c
+       add     %r12,%r9                        # d+=T1
+
+       and     %rdx,%r14                       # (a|c)&b
+       add     %r12,%rbx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rbx                       # h+=Maj(a,b,c)
+       mov     64(%rsp),%r13
+       mov     40(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     0(%rsp),%r12
+
+       add     56(%rsp),%r12
+       mov     %r9,%r13
+       mov     %r9,%r14
+       mov     %r10,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r11,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r9,%r15                        # (f^g)&e
+       mov     %r12,56(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r11,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rax,%r12                       # T1+=h
+
+       mov     %rbx,%rax
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rbx,%r13
+       mov     %rbx,%r14
+
+       ror     $28,%rax
+       ror     $34,%r13
+       mov     %rbx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rax
+       ror     $5,%r13
+       or      %rdx,%r14                       # a|c
+
+       xor     %r13,%rax                       # h=Sigma0(a)
+       and     %rdx,%r15                       # a&c
+       add     %r12,%r8                        # d+=T1
+
+       and     %rcx,%r14                       # (a|c)&b
+       add     %r12,%rax                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rax                       # h+=Maj(a,b,c)
+       mov     72(%rsp),%r13
+       mov     48(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     8(%rsp),%r12
+
+       add     64(%rsp),%r12
+       mov     %r8,%r13
+       mov     %r8,%r14
+       mov     %r9,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r10,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r8,%r15                        # (f^g)&e
+       mov     %r12,64(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r10,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %r11,%r12                       # T1+=h
+
+       mov     %rax,%r11
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rax,%r13
+       mov     %rax,%r14
+
+       ror     $28,%r11
+       ror     $34,%r13
+       mov     %rax,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r11
+       ror     $5,%r13
+       or      %rcx,%r14                       # a|c
+
+       xor     %r13,%r11                       # h=Sigma0(a)
+       and     %rcx,%r15                       # a&c
+       add     %r12,%rdx                       # d+=T1
+
+       and     %rbx,%r14                       # (a|c)&b
+       add     %r12,%r11                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r11                       # h+=Maj(a,b,c)
+       mov     80(%rsp),%r13
+       mov     56(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     16(%rsp),%r12
+
+       add     72(%rsp),%r12
+       mov     %rdx,%r13
+       mov     %rdx,%r14
+       mov     %r8,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r9,%r15                        # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rdx,%r15                       # (f^g)&e
+       mov     %r12,72(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r9,%r15                        # Ch(e,f,g)=((f^g)&e)^g
+       add     %r10,%r12                       # T1+=h
+
+       mov     %r11,%r10
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r11,%r13
+       mov     %r11,%r14
+
+       ror     $28,%r10
+       ror     $34,%r13
+       mov     %r11,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r10
+       ror     $5,%r13
+       or      %rbx,%r14                       # a|c
+
+       xor     %r13,%r10                       # h=Sigma0(a)
+       and     %rbx,%r15                       # a&c
+       add     %r12,%rcx                       # d+=T1
+
+       and     %rax,%r14                       # (a|c)&b
+       add     %r12,%r10                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r10                       # h+=Maj(a,b,c)
+       mov     88(%rsp),%r13
+       mov     64(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     24(%rsp),%r12
+
+       add     80(%rsp),%r12
+       mov     %rcx,%r13
+       mov     %rcx,%r14
+       mov     %rdx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r8,%r15                        # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rcx,%r15                       # (f^g)&e
+       mov     %r12,80(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r8,%r15                        # Ch(e,f,g)=((f^g)&e)^g
+       add     %r9,%r12                        # T1+=h
+
+       mov     %r10,%r9
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r10,%r13
+       mov     %r10,%r14
+
+       ror     $28,%r9
+       ror     $34,%r13
+       mov     %r10,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r9
+       ror     $5,%r13
+       or      %rax,%r14                       # a|c
+
+       xor     %r13,%r9                        # h=Sigma0(a)
+       and     %rax,%r15                       # a&c
+       add     %r12,%rbx                       # d+=T1
+
+       and     %r11,%r14                       # (a|c)&b
+       add     %r12,%r9                        # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r9                        # h+=Maj(a,b,c)
+       mov     96(%rsp),%r13
+       mov     72(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     32(%rsp),%r12
+
+       add     88(%rsp),%r12
+       mov     %rbx,%r13
+       mov     %rbx,%r14
+       mov     %rcx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rdx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rbx,%r15                       # (f^g)&e
+       mov     %r12,88(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rdx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %r8,%r12                        # T1+=h
+
+       mov     %r9,%r8
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r9,%r13
+       mov     %r9,%r14
+
+       ror     $28,%r8
+       ror     $34,%r13
+       mov     %r9,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%r8
+       ror     $5,%r13
+       or      %r11,%r14                       # a|c
+
+       xor     %r13,%r8                        # h=Sigma0(a)
+       and     %r11,%r15                       # a&c
+       add     %r12,%rax                       # d+=T1
+
+       and     %r10,%r14                       # (a|c)&b
+       add     %r12,%r8                        # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%r8                        # h+=Maj(a,b,c)
+       mov     104(%rsp),%r13
+       mov     80(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     40(%rsp),%r12
+
+       add     96(%rsp),%r12
+       mov     %rax,%r13
+       mov     %rax,%r14
+       mov     %rbx,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rcx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %rax,%r15                       # (f^g)&e
+       mov     %r12,96(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rcx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rdx,%r12                       # T1+=h
+
+       mov     %r8,%rdx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %r8,%r13
+       mov     %r8,%r14
+
+       ror     $28,%rdx
+       ror     $34,%r13
+       mov     %r8,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rdx
+       ror     $5,%r13
+       or      %r10,%r14                       # a|c
+
+       xor     %r13,%rdx                       # h=Sigma0(a)
+       and     %r10,%r15                       # a&c
+       add     %r12,%r11                       # d+=T1
+
+       and     %r9,%r14                        # (a|c)&b
+       add     %r12,%rdx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rdx                       # h+=Maj(a,b,c)
+       mov     112(%rsp),%r13
+       mov     88(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     48(%rsp),%r12
+
+       add     104(%rsp),%r12
+       mov     %r11,%r13
+       mov     %r11,%r14
+       mov     %rax,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rbx,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r11,%r15                       # (f^g)&e
+       mov     %r12,104(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rbx,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rcx,%r12                       # T1+=h
+
+       mov     %rdx,%rcx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rdx,%r13
+       mov     %rdx,%r14
+
+       ror     $28,%rcx
+       ror     $34,%r13
+       mov     %rdx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rcx
+       ror     $5,%r13
+       or      %r9,%r14                        # a|c
+
+       xor     %r13,%rcx                       # h=Sigma0(a)
+       and     %r9,%r15                        # a&c
+       add     %r12,%r10                       # d+=T1
+
+       and     %r8,%r14                        # (a|c)&b
+       add     %r12,%rcx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rcx                       # h+=Maj(a,b,c)
+       mov     120(%rsp),%r13
+       mov     96(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     56(%rsp),%r12
+
+       add     112(%rsp),%r12
+       mov     %r10,%r13
+       mov     %r10,%r14
+       mov     %r11,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %rax,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r10,%r15                       # (f^g)&e
+       mov     %r12,112(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %rax,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rbx,%r12                       # T1+=h
+
+       mov     %rcx,%rbx
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rcx,%r13
+       mov     %rcx,%r14
+
+       ror     $28,%rbx
+       ror     $34,%r13
+       mov     %rcx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rbx
+       ror     $5,%r13
+       or      %r8,%r14                        # a|c
+
+       xor     %r13,%rbx                       # h=Sigma0(a)
+       and     %r8,%r15                        # a&c
+       add     %r12,%r9                        # d+=T1
+
+       and     %rdx,%r14                       # (a|c)&b
+       add     %r12,%rbx                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rbx                       # h+=Maj(a,b,c)
+       mov     0(%rsp),%r13
+       mov     104(%rsp),%r12
+
+       mov     %r13,%r15
+
+       shr     $7,%r13
+       ror     $1,%r15
+
+       xor     %r15,%r13
+       ror     $7,%r15
+
+       xor     %r15,%r13                       # sigma0(X[(i+1)&0xf])
+       mov     %r12,%r14
+
+       shr     $6,%r12
+       ror     $19,%r14
+
+       xor     %r14,%r12
+       ror     $42,%r14
+
+       xor     %r14,%r12                       # sigma1(X[(i+14)&0xf])
+
+       add     %r13,%r12
+
+       add     64(%rsp),%r12
+
+       add     120(%rsp),%r12
+       mov     %r9,%r13
+       mov     %r9,%r14
+       mov     %r10,%r15
+
+       ror     $14,%r13
+       ror     $18,%r14
+       xor     %r11,%r15                       # f^g
+
+       xor     %r14,%r13
+       ror     $23,%r14
+       and     %r9,%r15                        # (f^g)&e
+       mov     %r12,120(%rsp)
+
+       xor     %r14,%r13                       # Sigma1(e)
+       xor     %r11,%r15                       # Ch(e,f,g)=((f^g)&e)^g
+       add     %rax,%r12                       # T1+=h
+
+       mov     %rbx,%rax
+       add     %r13,%r12                       # T1+=Sigma1(e)
+
+       add     %r15,%r12                       # T1+=Ch(e,f,g)
+       mov     %rbx,%r13
+       mov     %rbx,%r14
+
+       ror     $28,%rax
+       ror     $34,%r13
+       mov     %rbx,%r15
+       add     (%rbp,%rdi,8),%r12      # T1+=K[round]
+
+       xor     %r13,%rax
+       ror     $5,%r13
+       or      %rdx,%r14                       # a|c
+
+       xor     %r13,%rax                       # h=Sigma0(a)
+       and     %rdx,%r15                       # a&c
+       add     %r12,%r8                        # d+=T1
+
+       and     %rcx,%r14                       # (a|c)&b
+       add     %r12,%rax                       # h+=T1
+
+       or      %r15,%r14                       # Maj(a,b,c)=((a|c)&b)|(a&c)
+       lea     1(%rdi),%rdi    # round++
+
+       add     %r14,%rax                       # h+=Maj(a,b,c)
+       cmp     $80,%rdi
+       jb      .Lrounds_16_xx
+
+       mov     16*8+0*8(%rsp),%rdi
+       lea     16*8(%rsi),%rsi
+
+       add     8*0(%rdi),%rax
+       add     8*1(%rdi),%rbx
+       add     8*2(%rdi),%rcx
+       add     8*3(%rdi),%rdx
+       add     8*4(%rdi),%r8
+       add     8*5(%rdi),%r9
+       add     8*6(%rdi),%r10
+       add     8*7(%rdi),%r11
+
+       cmp     16*8+2*8(%rsp),%rsi
+
+       mov     %rax,8*0(%rdi)
+       mov     %rbx,8*1(%rdi)
+       mov     %rcx,8*2(%rdi)
+       mov     %rdx,8*3(%rdi)
+       mov     %r8,8*4(%rdi)
+       mov     %r9,8*5(%rdi)
+       mov     %r10,8*6(%rdi)
+       mov     %r11,8*7(%rdi)
+       jb      .Lloop
+
+       mov     16*8+3*8(%rsp),%rsp
+       pop     %r15
+       pop     %r14
+       pop     %r13
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+
+       ret
+SET_SIZE(SHA512TransformBlocks)
+
+.align 64
+.type  K512,@object
+K512:
+       .quad   0x428a2f98d728ae22,0x7137449123ef65cd
+       .quad   0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+       .quad   0x3956c25bf348b538,0x59f111f1b605d019
+       .quad   0x923f82a4af194f9b,0xab1c5ed5da6d8118
+       .quad   0xd807aa98a3030242,0x12835b0145706fbe
+       .quad   0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+       .quad   0x72be5d74f27b896f,0x80deb1fe3b1696b1
+       .quad   0x9bdc06a725c71235,0xc19bf174cf692694
+       .quad   0xe49b69c19ef14ad2,0xefbe4786384f25e3
+       .quad   0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+       .quad   0x2de92c6f592b0275,0x4a7484aa6ea6e483
+       .quad   0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+       .quad   0x983e5152ee66dfab,0xa831c66d2db43210
+       .quad   0xb00327c898fb213f,0xbf597fc7beef0ee4
+       .quad   0xc6e00bf33da88fc2,0xd5a79147930aa725
+       .quad   0x06ca6351e003826f,0x142929670a0e6e70
+       .quad   0x27b70a8546d22ffc,0x2e1b21385c26c926
+       .quad   0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+       .quad   0x650a73548baf63de,0x766a0abb3c77b2a8
+       .quad   0x81c2c92e47edaee6,0x92722c851482353b
+       .quad   0xa2bfe8a14cf10364,0xa81a664bbc423001
+       .quad   0xc24b8b70d0f89791,0xc76c51a30654be30
+       .quad   0xd192e819d6ef5218,0xd69906245565a910
+       .quad   0xf40e35855771202a,0x106aa07032bbd1b8
+       .quad   0x19a4c116b8d2d0c8,0x1e376c085141ab53
+       .quad   0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+       .quad   0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+       .quad   0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+       .quad   0x748f82ee5defb2fc,0x78a5636f43172f60
+       .quad   0x84c87814a1f0ab72,0x8cc702081a6439ec
+       .quad   0x90befffa23631e28,0xa4506cebde82bde9
+       .quad   0xbef9a3f7b2c67915,0xc67178f2e372532b
+       .quad   0xca273eceea26619c,0xd186b8c721c0c207
+       .quad   0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+       .quad   0x06f067aa72176fba,0x0a637dc5a2c898a6
+       .quad   0x113f9804bef90dae,0x1b710b35131c471b
+       .quad   0x28db77f523047d84,0x32caab7b40c72493
+       .quad   0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+       .quad   0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+       .quad   0x5fcb6fab3ad6faec,0x6c44198c4a475817
+#endif /* !lint && !__lint */
index 7dd5dbf42fc4ab134c398dbcb000b56c930d727b..aa63e431f94b6d9327a2fd47f4fd856804090dbe 100644 (file)
 void __exit
 icp_fini(void)
 {
+       skein_mod_fini();
        sha2_mod_fini();
        sha1_mod_fini();
+       edonr_mod_fini();
        aes_mod_fini();
        kcf_sched_destroy();
        kcf_prov_tab_destroy();
@@ -139,8 +141,10 @@ icp_init(void)
 
        /* initialize algorithms */
        aes_mod_init();
+       edonr_mod_init();
        sha1_mod_init();
        sha2_mod_init();
+       skein_mod_init();
 
        return (0);
 }
diff --git a/module/icp/include/sha2/sha2.h b/module/icp/include/sha2/sha2.h
deleted file mode 100644 (file)
index 8e53987..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
- * Use is subject to license terms.
- */
-/* Copyright 2013 Saso Kiselkov.  All rights reserved. */
-
-#ifndef _SYS_SHA2_H
-#define        _SYS_SHA2_H
-
-#include <sys/types.h>         /* for uint_* */
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define        SHA2_HMAC_MIN_KEY_LEN   1       /* SHA2-HMAC min key length in bytes */
-#define        SHA2_HMAC_MAX_KEY_LEN   INT_MAX /* SHA2-HMAC max key length in bytes */
-
-#define        SHA256_DIGEST_LENGTH    32      /* SHA256 digest length in bytes */
-
-#define        SHA256_HMAC_BLOCK_SIZE  64      /* SHA256-HMAC block size */
-
-#define        SHA256                  0
-#define        SHA256_HMAC             1
-#define        SHA256_HMAC_GEN         2
-
-/*
- * SHA2 context.
- * The contents of this structure are a private interface between the
- * Init/Update/Final calls of the functions defined below.
- * Callers must never attempt to read or write any of the fields
- * in this structure directly.
- */
-typedef struct         {
-       uint32_t algotype;              /* Algorithm Type */
-
-       /* state (ABCDEFGH) */
-       union {
-               uint32_t s32[8];        /* for SHA256 */
-               uint64_t s64[8];        /* for SHA384/512 */
-       } state;
-       /* number of bits */
-       union {
-               uint32_t c32[2];        /* for SHA256 , modulo 2^64 */
-               uint64_t c64[2];        /* for SHA384/512, modulo 2^128 */
-       } count;
-       union {
-               uint8_t         buf8[128];      /* undigested input */
-               uint32_t        buf32[32];      /* realigned input */
-               uint64_t        buf64[16];      /* realigned input */
-       } buf_un;
-} SHA2_CTX;
-
-typedef SHA2_CTX SHA256_CTX;
-typedef SHA2_CTX SHA384_CTX;
-typedef SHA2_CTX SHA512_CTX;
-
-extern void SHA2Init(uint64_t mech, SHA2_CTX *);
-
-extern void SHA2Update(SHA2_CTX *, const void *, size_t);
-
-extern void SHA2Final(void *, SHA2_CTX *);
-
-extern void SHA256Init(SHA256_CTX *);
-
-extern void SHA256Update(SHA256_CTX *, const void *, size_t);
-
-extern void SHA256Final(void *, SHA256_CTX *);
-
-#ifdef _SHA2_IMPL
-/*
- * The following types/functions are all private to the implementation
- * of the SHA2 functions and must not be used by consumers of the interface
- */
-
-/*
- * List of support mechanisms in this module.
- *
- * It is important to note that in the module, division or modulus calculations
- * are used on the enumerated type to determine which mechanism is being used;
- * therefore, changing the order or additional mechanisms should be done
- * carefully
- */
-typedef enum sha2_mech_type {
-       SHA256_MECH_INFO_TYPE,          /* SUN_CKM_SHA256 */
-       SHA256_HMAC_MECH_INFO_TYPE,     /* SUN_CKM_SHA256_HMAC */
-       SHA256_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA256_HMAC_GENERAL */
-} sha2_mech_type_t;
-
-#endif /* _SHA2_IMPL */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_SHA2_H */
index bb42c3cd4bc3bd1326e37a346e1e39e120e22218..b9768d344e95a12eed74a6fa3aba2379a2e6eca2 100644 (file)
@@ -26,6 +26,8 @@
 #ifndef        _SHA2_IMPL_H
 #define        _SHA2_IMPL_H
 
+#include <sys/sha2.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/module/icp/io/edonr_mod.c b/module/icp/io/edonr_mod.c
new file mode 100644 (file)
index 0000000..19b5c96
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
+
+#include <sys/modctl.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/spi.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/edonr.h>
+
+/*
+ * Unlike sha2 or skein, we won't expose edonr via the Kernel Cryptographic
+ * Framework (KCF), because Edon-R is *NOT* suitable for general-purpose
+ * cryptographic use. Users of Edon-R must interface directly to this module.
+ */
+
+static struct modlmisc modlmisc = {
+       &mod_cryptoops,
+       "Edon-R Message-Digest Algorithm"
+};
+
+static struct modlinkage modlinkage = {
+       MODREV_1, {&modlmisc, NULL}
+};
+
+int
+edonr_mod_init(void)
+{
+       int error;
+
+       if ((error = mod_install(&modlinkage)) != 0)
+               return (error);
+
+       return (0);
+}
+
+int
+edonr_mod_fini(void) {
+       return (mod_remove(&modlinkage));
+}
index be0f7a42cb1840a3337f7a83e2cd9998ea9accb6..3913d7618227cca84e9f74cd45fb0dd2f9ac6620 100644 (file)
@@ -30,7 +30,7 @@
 #include <sys/crypto/spi.h>
 #include <sys/crypto/icp.h>
 #define        _SHA2_IMPL
-#include <sha2/sha2.h>
+#include <sys/sha2.h>
 #include <sha2/sha2_impl.h>
 
 /*
diff --git a/module/icp/io/skein_mod.c b/module/icp/io/skein_mod.c
new file mode 100644 (file)
index 0000000..e909a7e
--- /dev/null
@@ -0,0 +1,721 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
+
+#include <sys/modctl.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/spi.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#define        SKEIN_MODULE_IMPL
+#include <sys/skein.h>
+
+/*
+ * Like the sha2 module, we create the skein module with two modlinkages:
+ * - modlmisc to allow direct calls to Skein_* API functions.
+ * - modlcrypto to integrate well into the Kernel Crypto Framework (KCF).
+ */
+static struct modlmisc modlmisc = {
+       &mod_cryptoops,
+       "Skein Message-Digest Algorithm"
+};
+
+static struct modlcrypto modlcrypto = {
+       &mod_cryptoops,
+       "Skein Kernel SW Provider"
+};
+
+static struct modlinkage modlinkage = {
+       MODREV_1, {&modlmisc, &modlcrypto, NULL}
+};
+
+static crypto_mech_info_t skein_mech_info_tab[] = {
+       {CKM_SKEIN_256, SKEIN_256_MECH_INFO_TYPE,
+           CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
+           0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+       {CKM_SKEIN_256_MAC, SKEIN_256_MAC_MECH_INFO_TYPE,
+           CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
+           CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+       {CKM_SKEIN_512, SKEIN_512_MECH_INFO_TYPE,
+           CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
+           0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+       {CKM_SKEIN_512_MAC, SKEIN_512_MAC_MECH_INFO_TYPE,
+           CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
+           CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+       {CKM_SKEIN1024, SKEIN1024_MECH_INFO_TYPE,
+           CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
+           0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+       {CKM_SKEIN1024_MAC, SKEIN1024_MAC_MECH_INFO_TYPE,
+           CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
+           CRYPTO_KEYSIZE_UNIT_IN_BYTES}
+};
+
+static void skein_provider_status(crypto_provider_handle_t, uint_t *);
+
+static crypto_control_ops_t skein_control_ops = {
+       skein_provider_status
+};
+
+static int skein_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
+    crypto_req_handle_t);
+static int skein_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
+    crypto_req_handle_t);
+static int skein_update(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
+static int skein_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
+static int skein_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
+    crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
+    crypto_req_handle_t);
+
+static crypto_digest_ops_t skein_digest_ops = {
+       skein_digest_init,
+       skein_digest,
+       skein_update,
+       NULL,
+       skein_final,
+       skein_digest_atomic
+};
+
+static int skein_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
+    crypto_spi_ctx_template_t, crypto_req_handle_t);
+static int skein_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
+    crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
+    crypto_spi_ctx_template_t, crypto_req_handle_t);
+
+static crypto_mac_ops_t skein_mac_ops = {
+       skein_mac_init,
+       NULL,
+       skein_update,   /* using regular digest update is OK here */
+       skein_final,    /* using regular digest final is OK here */
+       skein_mac_atomic,
+       NULL
+};
+
+static int skein_create_ctx_template(crypto_provider_handle_t,
+    crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
+    size_t *, crypto_req_handle_t);
+static int skein_free_context(crypto_ctx_t *);
+
+static crypto_ctx_ops_t skein_ctx_ops = {
+       skein_create_ctx_template,
+       skein_free_context
+};
+
+static crypto_ops_t skein_crypto_ops = {{{{{
+       &skein_control_ops,
+       &skein_digest_ops,
+       NULL,
+       &skein_mac_ops,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       &skein_ctx_ops,
+}}}}};
+
+static crypto_provider_info_t skein_prov_info = {{{{
+       CRYPTO_SPI_VERSION_1,
+       "Skein Software Provider",
+       CRYPTO_SW_PROVIDER,
+       NULL,
+       &skein_crypto_ops,
+       sizeof (skein_mech_info_tab) / sizeof (crypto_mech_info_t),
+       skein_mech_info_tab
+}}}};
+
+static crypto_kcf_provider_handle_t skein_prov_handle = 0;
+
+typedef struct skein_ctx {
+       skein_mech_type_t               sc_mech_type;
+       size_t                          sc_digest_bitlen;
+       /*LINTED(E_ANONYMOUS_UNION_DECL)*/
+       union {
+               Skein_256_Ctxt_t        sc_256;
+               Skein_512_Ctxt_t        sc_512;
+               Skein1024_Ctxt_t        sc_1024;
+       };
+} skein_ctx_t;
+#define        SKEIN_CTX(_ctx_)        ((skein_ctx_t *)((_ctx_)->cc_provider_private))
+#define        SKEIN_CTX_LVALUE(_ctx_) (_ctx_)->cc_provider_private
+#define        SKEIN_OP(_skein_ctx, _op, ...)                                  \
+       do {                                                            \
+               skein_ctx_t     *sc = (_skein_ctx);                     \
+               switch (sc->sc_mech_type) {                             \
+               case SKEIN_256_MECH_INFO_TYPE:                          \
+               case SKEIN_256_MAC_MECH_INFO_TYPE:                      \
+                       (void) Skein_256_ ## _op(&sc->sc_256, __VA_ARGS__);\
+                       break;                                          \
+               case SKEIN_512_MECH_INFO_TYPE:                          \
+               case SKEIN_512_MAC_MECH_INFO_TYPE:                      \
+                       (void) Skein_512_ ## _op(&sc->sc_512, __VA_ARGS__);\
+                       break;                                          \
+               case SKEIN1024_MECH_INFO_TYPE:                          \
+               case SKEIN1024_MAC_MECH_INFO_TYPE:                      \
+                       (void) Skein1024_ ## _op(&sc->sc_1024, __VA_ARGS__);\
+                       break;                                          \
+               }                                                       \
+               _NOTE(CONSTCOND)                                        \
+       } while (0)
+
+static int
+skein_get_digest_bitlen(const crypto_mechanism_t *mechanism, size_t *result)
+{
+       if (mechanism->cm_param != NULL) {
+               /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
+               skein_param_t   *param = (skein_param_t *)mechanism->cm_param;
+
+               if (mechanism->cm_param_len != sizeof (*param) ||
+                   param->sp_digest_bitlen == 0) {
+                       return (CRYPTO_MECHANISM_PARAM_INVALID);
+               }
+               *result = param->sp_digest_bitlen;
+       } else {
+               switch (mechanism->cm_type) {
+               case SKEIN_256_MECH_INFO_TYPE:
+                       *result = 256;
+                       break;
+               case SKEIN_512_MECH_INFO_TYPE:
+                       *result = 512;
+                       break;
+               case SKEIN1024_MECH_INFO_TYPE:
+                       *result = 1024;
+                       break;
+               default:
+                       return (CRYPTO_MECHANISM_INVALID);
+               }
+       }
+       return (CRYPTO_SUCCESS);
+}
+
+int
+skein_mod_init(void)
+{
+       int error;
+
+       if ((error = mod_install(&modlinkage)) != 0)
+               return (error);
+
+       /*
+        * Try to register with KCF - failure shouldn't unload us, since we
+        * still may want to continue providing misc/skein functionality.
+        */
+       (void) crypto_register_provider(&skein_prov_info, &skein_prov_handle);
+
+       return (0);
+}
+
+int
+skein_mod_fini(void) {
+       return (mod_remove(&modlinkage));
+}
+
+/*
+ * KCF software provider control entry points.
+ */
+/* ARGSUSED */
+static void
+skein_provider_status(crypto_provider_handle_t provider, uint_t *status)
+{
+       *status = CRYPTO_PROVIDER_READY;
+}
+
+/*
+ * General Skein hashing helper functions.
+ */
+
+/*
+ * Performs an Update on a context with uio input data.
+ */
+static int
+skein_digest_update_uio(skein_ctx_t *ctx, const crypto_data_t *data)
+{
+       off_t           offset = data->cd_offset;
+       size_t          length = data->cd_length;
+       uint_t          vec_idx;
+       size_t          cur_len;
+       const uio_t     *uio = data->cd_uio;
+
+       /* we support only kernel buffer */
+       if (uio->uio_segflg != UIO_SYSSPACE)
+               return (CRYPTO_ARGUMENTS_BAD);
+
+       /*
+        * Jump to the first iovec containing data to be
+        * digested.
+        */
+       for (vec_idx = 0; vec_idx < uio->uio_iovcnt &&
+           offset >= uio->uio_iov[vec_idx].iov_len;
+           offset -= uio->uio_iov[vec_idx++].iov_len)
+               ;
+       if (vec_idx == uio->uio_iovcnt) {
+               /*
+                * The caller specified an offset that is larger than the
+                * total size of the buffers it provided.
+                */
+               return (CRYPTO_DATA_LEN_RANGE);
+       }
+
+       /*
+        * Now do the digesting on the iovecs.
+        */
+       while (vec_idx < uio->uio_iovcnt && length > 0) {
+               cur_len = MIN(uio->uio_iov[vec_idx].iov_len - offset, length);
+               SKEIN_OP(ctx, Update, (uint8_t *)uio->uio_iov[vec_idx].iov_base
+                   + offset, cur_len);
+               length -= cur_len;
+               vec_idx++;
+               offset = 0;
+       }
+
+       if (vec_idx == uio->uio_iovcnt && length > 0) {
+               /*
+                * The end of the specified iovec's was reached but
+                * the length requested could not be processed, i.e.
+                * The caller requested to digest more data than it provided.
+                */
+               return (CRYPTO_DATA_LEN_RANGE);
+       }
+
+       return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Performs a Final on a context and writes to a uio digest output.
+ */
+static int
+skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest,
+    crypto_req_handle_t req)
+{
+       off_t   offset = digest->cd_offset;
+       uint_t  vec_idx;
+       uio_t   *uio = digest->cd_uio;
+
+       /* we support only kernel buffer */
+       if (uio->uio_segflg != UIO_SYSSPACE)
+               return (CRYPTO_ARGUMENTS_BAD);
+
+       /*
+        * Jump to the first iovec containing ptr to the digest to be returned.
+        */
+       for (vec_idx = 0; offset >= uio->uio_iov[vec_idx].iov_len &&
+           vec_idx < uio->uio_iovcnt;
+           offset -= uio->uio_iov[vec_idx++].iov_len)
+               ;
+       if (vec_idx == uio->uio_iovcnt) {
+               /*
+                * The caller specified an offset that is larger than the
+                * total size of the buffers it provided.
+                */
+               return (CRYPTO_DATA_LEN_RANGE);
+       }
+       if (offset + CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen) <=
+           uio->uio_iov[vec_idx].iov_len) {
+               /* The computed digest will fit in the current iovec. */
+               SKEIN_OP(ctx, Final,
+                   (uchar_t *)uio->uio_iov[vec_idx].iov_base + offset);
+       } else {
+               uint8_t *digest_tmp;
+               off_t scratch_offset = 0;
+               size_t length = CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen);
+               size_t cur_len;
+
+               digest_tmp = kmem_alloc(CRYPTO_BITS2BYTES(
+                   ctx->sc_digest_bitlen), crypto_kmflag(req));
+               if (digest_tmp == NULL)
+                       return (CRYPTO_HOST_MEMORY);
+               SKEIN_OP(ctx, Final, digest_tmp);
+               while (vec_idx < uio->uio_iovcnt && length > 0) {
+                       cur_len = MIN(uio->uio_iov[vec_idx].iov_len - offset,
+                           length);
+                       bcopy(digest_tmp + scratch_offset,
+                           uio->uio_iov[vec_idx].iov_base + offset, cur_len);
+
+                       length -= cur_len;
+                       vec_idx++;
+                       scratch_offset += cur_len;
+                       offset = 0;
+               }
+               kmem_free(digest_tmp, CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen));
+
+               if (vec_idx == uio->uio_iovcnt && length > 0) {
+                       /*
+                        * The end of the specified iovec's was reached but
+                        * the length requested could not be processed, i.e.
+                        * The caller requested to digest more data than it
+                        * provided.
+                        */
+                       return (CRYPTO_DATA_LEN_RANGE);
+               }
+       }
+
+       return (CRYPTO_SUCCESS);
+}
+
+/*
+ * KCF software provider digest entry points.
+ */
+
+/*
+ * Initializes a skein digest context to the configuration in `mechanism'.
+ * The mechanism cm_type must be one of SKEIN_*_MECH_INFO_TYPE. The cm_param
+ * field may contain a skein_param_t structure indicating the length of the
+ * digest the algorithm should produce. Otherwise the default output lengths
+ * are applied (32 bytes for Skein-256, 64 bytes for Skein-512 and 128 bytes
+ * for Skein-1024).
+ */
+static int
+skein_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
+    crypto_req_handle_t req)
+{
+       int     error = CRYPTO_SUCCESS;
+
+       if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type))
+               return (CRYPTO_MECHANISM_INVALID);
+
+       SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)),
+           crypto_kmflag(req));
+       if (SKEIN_CTX(ctx) == NULL)
+               return (CRYPTO_HOST_MEMORY);
+
+       SKEIN_CTX(ctx)->sc_mech_type = mechanism->cm_type;
+       error = skein_get_digest_bitlen(mechanism,
+           &SKEIN_CTX(ctx)->sc_digest_bitlen);
+       if (error != CRYPTO_SUCCESS)
+               goto errout;
+       SKEIN_OP(SKEIN_CTX(ctx), Init, SKEIN_CTX(ctx)->sc_digest_bitlen);
+
+       return (CRYPTO_SUCCESS);
+errout:
+       bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+       kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+       SKEIN_CTX_LVALUE(ctx) = NULL;
+       return (error);
+}
+
+/*
+ * Executes a skein_update and skein_digest on a pre-initialized crypto
+ * context in a single step. See the documentation to these functions to
+ * see what to pass here.
+ */
+static int
+skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
+    crypto_req_handle_t req)
+{
+       int error = CRYPTO_SUCCESS;
+
+       ASSERT(SKEIN_CTX(ctx) != NULL);
+
+       if (digest->cd_length <
+           CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen)) {
+               digest->cd_length =
+                   CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen);
+               return (CRYPTO_BUFFER_TOO_SMALL);
+       }
+
+       error = skein_update(ctx, data, req);
+       if (error != CRYPTO_SUCCESS) {
+               bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+               kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+               SKEIN_CTX_LVALUE(ctx) = NULL;
+               digest->cd_length = 0;
+               return (error);
+       }
+       error = skein_final(ctx, digest, req);
+
+       return (error);
+}
+
+/*
+ * Performs a skein Update with the input message in `data' (successive calls
+ * can push more data). This is used both for digest and MAC operation.
+ * Supported input data formats are raw, uio and mblk.
+ */
+/*ARGSUSED*/
+static int
+skein_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req)
+{
+       int error = CRYPTO_SUCCESS;
+
+       ASSERT(SKEIN_CTX(ctx) != NULL);
+
+       switch (data->cd_format) {
+       case CRYPTO_DATA_RAW:
+               SKEIN_OP(SKEIN_CTX(ctx), Update,
+                   (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
+                   data->cd_length);
+               break;
+       case CRYPTO_DATA_UIO:
+               error = skein_digest_update_uio(SKEIN_CTX(ctx), data);
+               break;
+       default:
+               error = CRYPTO_ARGUMENTS_BAD;
+       }
+
+       return (error);
+}
+
+/*
+ * Performs a skein Final, writing the output to `digest'. This is used both
+ * for digest and MAC operation.
+ * Supported output digest formats are raw, uio and mblk.
+ */
+/*ARGSUSED*/
+static int
+skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
+{
+       int error = CRYPTO_SUCCESS;
+
+       ASSERT(SKEIN_CTX(ctx) != NULL);
+
+       if (digest->cd_length <
+           CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen)) {
+               digest->cd_length =
+                   CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen);
+               return (CRYPTO_BUFFER_TOO_SMALL);
+       }
+
+       switch (digest->cd_format) {
+       case CRYPTO_DATA_RAW:
+               SKEIN_OP(SKEIN_CTX(ctx), Final,
+                   (uint8_t *)digest->cd_raw.iov_base + digest->cd_offset);
+               break;
+       case CRYPTO_DATA_UIO:
+               error = skein_digest_final_uio(SKEIN_CTX(ctx), digest, req);
+               break;
+       default:
+               error = CRYPTO_ARGUMENTS_BAD;
+       }
+
+       if (error == CRYPTO_SUCCESS)
+               digest->cd_length =
+                   CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen);
+       else
+               digest->cd_length = 0;
+
+       bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+       kmem_free(SKEIN_CTX(ctx), sizeof (*(SKEIN_CTX(ctx))));
+       SKEIN_CTX_LVALUE(ctx) = NULL;
+
+       return (error);
+}
+
+/*
+ * Performs a full skein digest computation in a single call, configuring the
+ * algorithm according to `mechanism', reading the input to be digested from
+ * `data' and writing the output to `digest'.
+ * Supported input/output formats are raw, uio and mblk.
+ */
+/*ARGSUSED*/
+static int
+skein_digest_atomic(crypto_provider_handle_t provider,
+    crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+    crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req)
+{
+       int             error;
+       skein_ctx_t     skein_ctx;
+       crypto_ctx_t    ctx;
+       SKEIN_CTX_LVALUE(&ctx) = &skein_ctx;
+
+       /* Init */
+       if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type))
+               return (CRYPTO_MECHANISM_INVALID);
+       skein_ctx.sc_mech_type = mechanism->cm_type;
+       error = skein_get_digest_bitlen(mechanism, &skein_ctx.sc_digest_bitlen);
+       if (error != CRYPTO_SUCCESS)
+               goto out;
+       SKEIN_OP(&skein_ctx, Init, skein_ctx.sc_digest_bitlen);
+
+       if ((error = skein_update(&ctx, data, digest)) != CRYPTO_SUCCESS)
+               goto out;
+       if ((error = skein_final(&ctx, data, digest)) != CRYPTO_SUCCESS)
+               goto out;
+
+out:
+       if (error == CRYPTO_SUCCESS)
+               digest->cd_length =
+                   CRYPTO_BITS2BYTES(skein_ctx.sc_digest_bitlen);
+       else
+               digest->cd_length = 0;
+       bzero(&skein_ctx, sizeof (skein_ctx));
+
+       return (error);
+}
+
+/*
+ * Helper function that builds a Skein MAC context from the provided
+ * mechanism and key.
+ */
+static int
+skein_mac_ctx_build(skein_ctx_t *ctx, crypto_mechanism_t *mechanism,
+    crypto_key_t *key)
+{
+       int error;
+
+       if (!VALID_SKEIN_MAC_MECH(mechanism->cm_type))
+               return (CRYPTO_MECHANISM_INVALID);
+       if (key->ck_format != CRYPTO_KEY_RAW)
+               return (CRYPTO_ARGUMENTS_BAD);
+       ctx->sc_mech_type = mechanism->cm_type;
+       error = skein_get_digest_bitlen(mechanism, &ctx->sc_digest_bitlen);
+       if (error != CRYPTO_SUCCESS)
+               return (error);
+       SKEIN_OP(ctx, InitExt, ctx->sc_digest_bitlen, 0, key->ck_data,
+           CRYPTO_BITS2BYTES(key->ck_length));
+
+       return (CRYPTO_SUCCESS);
+}
+
+/*
+ * KCF software provide mac entry points.
+ */
+/*
+ * Initializes a skein MAC context. You may pass a ctx_template, in which
+ * case the template will be reused to make initialization more efficient.
+ * Otherwise a new context will be constructed. The mechanism cm_type must
+ * be one of SKEIN_*_MAC_MECH_INFO_TYPE. Same as in skein_digest_init, you
+ * may pass a skein_param_t in cm_param to configure the length of the
+ * digest. The key must be in raw format.
+ */
+static int
+skein_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
+    crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
+    crypto_req_handle_t req)
+{
+       int     error;
+
+       SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)),
+           crypto_kmflag(req));
+       if (SKEIN_CTX(ctx) == NULL)
+               return (CRYPTO_HOST_MEMORY);
+
+       if (ctx_template != NULL) {
+               bcopy(ctx_template, SKEIN_CTX(ctx),
+                   sizeof (*SKEIN_CTX(ctx)));
+       } else {
+               error = skein_mac_ctx_build(SKEIN_CTX(ctx), mechanism, key);
+               if (error != CRYPTO_SUCCESS)
+                       goto errout;
+       }
+
+       return (CRYPTO_SUCCESS);
+errout:
+       bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+       kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+       return (error);
+}
+
+/*
+ * The MAC update and final calls are reused from the regular digest code.
+ */
+
+/*ARGSUSED*/
+/*
+ * Same as skein_digest_atomic, performs an atomic Skein MAC operation in
+ * one step. All the same properties apply to the arguments of this
+ * function as to those of the partial operations above.
+ */
+static int
+skein_mac_atomic(crypto_provider_handle_t provider,
+    crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+    crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
+    crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
+{
+       /* faux crypto context just for skein_digest_{update,final} */
+       int             error;
+       crypto_ctx_t    ctx;
+       skein_ctx_t     skein_ctx;
+       SKEIN_CTX_LVALUE(&ctx) = &skein_ctx;
+
+       if (ctx_template != NULL) {
+               bcopy(ctx_template, &skein_ctx, sizeof (skein_ctx));
+       } else {
+               error = skein_mac_ctx_build(&skein_ctx, mechanism, key);
+               if (error != CRYPTO_SUCCESS)
+                       goto errout;
+       }
+
+       if ((error = skein_update(&ctx, data, req)) != CRYPTO_SUCCESS)
+               goto errout;
+       if ((error = skein_final(&ctx, mac, req)) != CRYPTO_SUCCESS)
+               goto errout;
+
+       return (CRYPTO_SUCCESS);
+errout:
+       bzero(&skein_ctx, sizeof (skein_ctx));
+       return (error);
+}
+
+/*
+ * KCF software provider context management entry points.
+ */
+
+/*
+ * Constructs a context template for the Skein MAC algorithm. The same
+ * properties apply to the arguments of this function as to those of
+ * skein_mac_init.
+ */
+/*ARGSUSED*/
+static int
+skein_create_ctx_template(crypto_provider_handle_t provider,
+    crypto_mechanism_t *mechanism, crypto_key_t *key,
+    crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
+    crypto_req_handle_t req)
+{
+       int             error;
+       skein_ctx_t     *ctx_tmpl;
+
+       ctx_tmpl = kmem_alloc(sizeof (*ctx_tmpl), crypto_kmflag(req));
+       if (ctx_tmpl == NULL)
+               return (CRYPTO_HOST_MEMORY);
+       error = skein_mac_ctx_build(ctx_tmpl, mechanism, key);
+       if (error != CRYPTO_SUCCESS)
+               goto errout;
+       *ctx_template = ctx_tmpl;
+       *ctx_template_size = sizeof (*ctx_tmpl);
+
+       return (CRYPTO_SUCCESS);
+errout:
+       bzero(ctx_tmpl, sizeof (*ctx_tmpl));
+       kmem_free(ctx_tmpl, sizeof (*ctx_tmpl));
+       return (error);
+}
+
+/*
+ * Frees a skein context in a parent crypto context.
+ */
+static int
+skein_free_context(crypto_ctx_t *ctx)
+{
+       if (SKEIN_CTX(ctx) != NULL) {
+               bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+               kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+               SKEIN_CTX_LVALUE(ctx) = NULL;
+       }
+
+       return (CRYPTO_SUCCESS);
+}
index 5436bae9a8b92521ae642d12cc3f9815b25561c3..8a975ecb34ce698ca8f1f794f207f8af0c821345 100644 (file)
@@ -23,6 +23,9 @@
  * Use is subject to license terms.
  * Copyright (C) 2016 Gvozden NeÅ¡ković. All rights reserved.
  */
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
 
 /*
  * Fletcher Checksums
@@ -206,8 +209,10 @@ static struct fletcher_4_kstat {
 /* Indicate that benchmark has been completed */
 static boolean_t fletcher_4_initialized = B_FALSE;
 
+/*ARGSUSED*/
 void
-fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
+fletcher_2_native(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
 {
        const uint64_t *ip = buf;
        const uint64_t *ipend = ip + (size / sizeof (uint64_t));
@@ -223,8 +228,10 @@ fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
        ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
 }
 
+/*ARGSUSED*/
 void
-fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
+fletcher_2_byteswap(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
 {
        const uint64_t *ip = buf;
        const uint64_t *ipend = ip + (size / sizeof (uint64_t));
@@ -404,8 +411,10 @@ fletcher_4_native_impl(const fletcher_4_ops_t *ops, const void *buf,
                ops->fini_native(zcp);
 }
 
+/*ARGSUSED*/
 void
-fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
+fletcher_4_native(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
 {
        const fletcher_4_ops_t *ops;
        uint64_t p2size = P2ALIGN(size, 64);
@@ -443,8 +452,10 @@ fletcher_4_byteswap_impl(const fletcher_4_ops_t *ops, const void *buf,
                ops->fini_byteswap(zcp);
 }
 
+/*ARGSUSED*/
 void
-fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
+fletcher_4_byteswap(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
 {
        const fletcher_4_ops_t *ops;
        uint64_t p2size = P2ALIGN(size, 64);
@@ -551,7 +562,7 @@ fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size)
                start = gethrtime();
                do {
                        for (l = 0; l < 32; l++, run_count++)
-                               fletcher_4_test(data, data_size, &zc);
+                               fletcher_4_test(data, data_size, NULL, &zc);
 
                        run_time_ns = gethrtime() - start;
                } while (run_time_ns < FLETCHER_4_BENCH_NS);
index 1d68ca29e6d6b369225c3c2e3a096ad8c3ce372b..029075ebec93caf703372bc6e82ca1765d7764ca 100644 (file)
@@ -70,6 +70,10 @@ zfs_prop_init(void)
                { "fletcher2",  ZIO_CHECKSUM_FLETCHER_2 },
                { "fletcher4",  ZIO_CHECKSUM_FLETCHER_4 },
                { "sha256",     ZIO_CHECKSUM_SHA256 },
+               { "noparity",   ZIO_CHECKSUM_NOPARITY },
+               { "sha512",     ZIO_CHECKSUM_SHA512 },
+               { "skein",      ZIO_CHECKSUM_SKEIN },
+               { "edonr",      ZIO_CHECKSUM_EDONR },
                { NULL }
        };
 
@@ -80,6 +84,14 @@ zfs_prop_init(void)
                { "sha256",     ZIO_CHECKSUM_SHA256 },
                { "sha256,verify",
                                ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY },
+               { "sha512",     ZIO_CHECKSUM_SHA512 },
+               { "sha512,verify",
+                               ZIO_CHECKSUM_SHA512 | ZIO_CHECKSUM_VERIFY },
+               { "skein",      ZIO_CHECKSUM_SKEIN },
+               { "skein,verify",
+                               ZIO_CHECKSUM_SKEIN | ZIO_CHECKSUM_VERIFY },
+               { "edonr,verify",
+                               ZIO_CHECKSUM_EDONR | ZIO_CHECKSUM_VERIFY },
                { NULL }
        };
 
@@ -241,12 +253,12 @@ zfs_prop_init(void)
        zprop_register_index(ZFS_PROP_CHECKSUM, "checksum",
            ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
            ZFS_TYPE_VOLUME,
-           "on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM",
-           checksum_table);
+           "on | off | fletcher2 | fletcher4 | sha256 | sha512 | "
+           "skein | edonr", "CHECKSUM", checksum_table);
        zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
            PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
-           "on | off | verify | sha256[,verify]", "DEDUP",
-           dedup_table);
+           "on | off | verify | sha256[,verify], sha512[,verify], "
+           "skein[,verify], edonr,verify", "DEDUP", dedup_table);
        zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
            ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
            ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
index e1771b233a124fec0637040a49e46ff3d8bdd7ca..ce368880cbd363f37be3f3921b97df7ac5acbbbe 100644 (file)
@@ -36,6 +36,7 @@ $(MODULE)-objs += dsl_pool.o
 $(MODULE)-objs += dsl_prop.o
 $(MODULE)-objs += dsl_scan.o
 $(MODULE)-objs += dsl_synctask.o
+$(MODULE)-objs += edonr_zfs.o
 $(MODULE)-objs += fm.o
 $(MODULE)-objs += gzip.o
 $(MODULE)-objs += lzjb.o
@@ -49,6 +50,7 @@ $(MODULE)-objs += refcount.o
 $(MODULE)-objs += rrwlock.o
 $(MODULE)-objs += sa.o
 $(MODULE)-objs += sha256.o
+$(MODULE)-objs += skein_zfs.o
 $(MODULE)-objs += spa.o
 $(MODULE)-objs += spa_boot.o
 $(MODULE)-objs += spa_config.o
index bf078aa949bb9390bbf5b3ea6fa446468c2a4dc3..7bae2c42d26d10f83bb250bb668edb0e1435b33c 100755 (executable)
@@ -1382,7 +1382,7 @@ arc_cksum_verify(arc_buf_t *buf)
                return;
        }
 
-       fletcher_2_native(buf->b_data, arc_buf_size(buf), &zc);
+       fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL, &zc);
        if (!ZIO_CHECKSUM_EQUAL(*hdr->b_l1hdr.b_freeze_cksum, zc))
                panic("buffer modified while frozen!");
        mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
@@ -1495,7 +1495,7 @@ arc_cksum_compute(arc_buf_t *buf)
        ASSERT(!ARC_BUF_COMPRESSED(buf));
        hdr->b_l1hdr.b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t),
            KM_SLEEP);
-       fletcher_2_native(buf->b_data, arc_buf_size(buf),
+       fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL,
            hdr->b_l1hdr.b_freeze_cksum);
        mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
        arc_buf_watch(buf);
index e487e469f3a4534d7b5f010fcdef095e31eb8376..2ec41fb51e95f65af23f398f55f20c61c276dd62 100644 (file)
@@ -3814,7 +3814,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
                    dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite);
                mutex_exit(&db->db_mtx);
        } else if (db->db_state == DB_NOFILL) {
-               ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF);
+               ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||
+                   zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
                dr->dr_zio = zio_write(zio, os->os_spa, txg,
                    &dr->dr_bp_copy, NULL, db->db.db_size, db->db.db_size, &zp,
                    dbuf_write_nofill_ready, NULL, NULL,
index 7d2383968122f80ac6ea259ff946478b290e06b4..09a3536f58ee285539f1ef445ed5eb524bdd605c 100644 (file)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -62,7 +62,8 @@ ddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
        spa_t *spa = ddt->ddt_spa;
        objset_t *os = ddt->ddt_os;
        uint64_t *objectp = &ddt->ddt_object[type][class];
-       boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_dedup;
+       boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_flags &
+           ZCHECKSUM_FLAG_DEDUP;
        char name[DDT_NAMELEN];
 
        ddt_object_name(ddt, type, class, name);
index d2f4aac9808723e64f9422a4ae284eaae40da385..80185706cd24d37998601f0834826a40731993d1 100644 (file)
@@ -1445,7 +1445,8 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
 
                        ASSERT(BP_EQUAL(bp, bp_orig));
                        ASSERT(zio->io_prop.zp_compress != ZIO_COMPRESS_OFF);
-                       ASSERT(zio_checksum_table[chksum].ci_dedup);
+                       ASSERT(zio_checksum_table[chksum].ci_flags &
+                           ZCHECKSUM_FLAG_NOPWRITE);
                }
                dr->dt.dl.dr_overridden_by = *zio->io_bp;
                dr->dt.dl.dr_override_state = DR_OVERRIDDEN;
@@ -1792,8 +1793,10 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
                 * as well.  Otherwise, the metadata checksum defaults
                 * to fletcher4.
                 */
-               if (zio_checksum_table[checksum].ci_correctable < 1 ||
-                   zio_checksum_table[checksum].ci_eck)
+               if (!(zio_checksum_table[checksum].ci_flags &
+                   ZCHECKSUM_FLAG_METADATA) ||
+                   (zio_checksum_table[checksum].ci_flags &
+                   ZCHECKSUM_FLAG_EMBEDDED))
                        checksum = ZIO_CHECKSUM_FLETCHER_4;
 
                if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
@@ -1832,17 +1835,20 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
                 */
                if (dedup_checksum != ZIO_CHECKSUM_OFF) {
                        dedup = (wp & WP_DMU_SYNC) ? B_FALSE : B_TRUE;
-                       if (!zio_checksum_table[checksum].ci_dedup)
+                       if (!(zio_checksum_table[checksum].ci_flags &
+                           ZCHECKSUM_FLAG_DEDUP))
                                dedup_verify = B_TRUE;
                }
 
                /*
-                * Enable nopwrite if we have a cryptographically secure
-                * checksum that has no known collisions (i.e. SHA-256)
-                * and compression is enabled.  We don't enable nopwrite if
-                * dedup is enabled as the two features are mutually exclusive.
+                * Enable nopwrite if we have secure enough checksum
+                * algorithm (see comment in zio_nop_write) and
+                * compression is enabled.  We don't enable nopwrite if
+                * dedup is enabled as the two features are mutually
+                * exclusive.
                 */
-               nopwrite = (!dedup && zio_checksum_table[checksum].ci_dedup &&
+               nopwrite = (!dedup && (zio_checksum_table[checksum].ci_flags &
+                   ZCHECKSUM_FLAG_NOPWRITE) &&
                    compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled);
        }
 
index 5e95da52d639479e9e05f7110357f5f83aa545c3..f9414ea3ab2898e021edcbb5dfd95143df01c472 100644 (file)
@@ -346,7 +346,8 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
                drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
        } else {
                drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
-               if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
+               if (zio_checksum_table[drrw->drr_checksumtype].ci_flags &
+                   ZCHECKSUM_FLAG_DEDUP)
                        drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
                DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
                DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
index 3026d873355bd2561e185afd1dac2a22a6be3a20..9362d49bd26e78085568bf1c79206a8a11babf7f 100644 (file)
@@ -56,6 +56,7 @@
 #include <sys/dmu_send.h>
 #include <sys/zio_compress.h>
 #include <zfs_fletcher.h>
+#include <sys/zio_checksum.h>
 
 /*
  * The SPA supports block sizes up to 16MB.  However, very large blocks
@@ -108,6 +109,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
 {
        int used, compressed, uncompressed;
        int64_t delta;
+       spa_feature_t f;
 
        used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
        compressed = BP_GET_PSIZE(bp);
@@ -134,10 +136,16 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
        dsl_dataset_phys(ds)->ds_compressed_bytes += compressed;
        dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed;
        dsl_dataset_phys(ds)->ds_unique_bytes += used;
+
        if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) {
                ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_BLOCKS] =
                    B_TRUE;
        }
+
+       f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
+       if (f != SPA_FEATURE_NONE)
+               ds->ds_feature_activation_needed[f] = B_TRUE;
+
        mutex_exit(&ds->ds_lock);
        dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
            compressed, uncompressed, tx);
diff --git a/module/zfs/edonr_zfs.c b/module/zfs/edonr_zfs.c
new file mode 100644 (file)
index 0000000..3c7d986
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2013 Saso Kiselkov.  All rights reserved.
+ * Use is subject to license terms.
+ */
+#include <sys/zfs_context.h>
+#include <sys/zio.h>
+#include <sys/edonr.h>
+#include <sys/zfs_context.h>   /* For CTASSERT() */
+
+#define        EDONR_MODE              512
+#define        EDONR_BLOCK_SIZE        EdonR512_BLOCK_SIZE
+
+/*
+ * Native zio_checksum interface for the Edon-R hash function.
+ */
+/*ARGSUSED*/
+void
+zio_checksum_edonr_native(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
+{
+       uint8_t         digest[EDONR_MODE / 8];
+       EdonRState      ctx;
+
+       ASSERT(ctx_template != NULL);
+       bcopy(ctx_template, &ctx, sizeof (ctx));
+       EdonRUpdate(&ctx, buf, size * 8);
+       EdonRFinal(&ctx, digest);
+       bcopy(digest, zcp->zc_word, sizeof (zcp->zc_word));
+}
+
+/*
+ * Byteswapped zio_checksum interface for the Edon-R hash function.
+ */
+void
+zio_checksum_edonr_byteswap(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
+{
+       zio_cksum_t     tmp;
+
+       zio_checksum_edonr_native(buf, size, ctx_template, &tmp);
+       zcp->zc_word[0] = BSWAP_64(zcp->zc_word[0]);
+       zcp->zc_word[1] = BSWAP_64(zcp->zc_word[1]);
+       zcp->zc_word[2] = BSWAP_64(zcp->zc_word[2]);
+       zcp->zc_word[3] = BSWAP_64(zcp->zc_word[3]);
+}
+
+void *
+zio_checksum_edonr_tmpl_init(const zio_cksum_salt_t *salt)
+{
+       EdonRState      *ctx;
+       uint8_t         salt_block[EDONR_BLOCK_SIZE];
+
+       /*
+        * Edon-R needs all but the last hash invocation to be on full-size
+        * blocks, but the salt is too small. Rather than simply padding it
+        * with zeros, we expand the salt into a new salt block of proper
+        * size by double-hashing it (the new salt block will be composed of
+        * H(salt) || H(H(salt))).
+        */
+       CTASSERT(EDONR_BLOCK_SIZE == 2 * (EDONR_MODE / 8));
+       EdonRHash(EDONR_MODE, salt->zcs_bytes, sizeof (salt->zcs_bytes) * 8,
+           salt_block);
+       EdonRHash(EDONR_MODE, salt_block, EDONR_MODE, salt_block +
+           EDONR_MODE / 8);
+
+       /*
+        * Feed the new salt block into the hash function - this will serve
+        * as our MAC key.
+        */
+       ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);
+       EdonRInit(ctx, EDONR_MODE);
+       EdonRUpdate(ctx, salt_block, sizeof (salt_block) * 8);
+       return (ctx);
+}
+
+void
+zio_checksum_edonr_tmpl_free(void *ctx_template)
+{
+       EdonRState      *ctx = ctx_template;
+
+       bzero(ctx, sizeof (*ctx));
+       kmem_free(ctx, sizeof (*ctx));
+}
index 57f5b7daffea0c0bfd15a3f49eb4a73404620f1e..c8a4882f84b201d9f7fe52e6c8720fdfd3af3280 100644 (file)
  * CDDL HEADER END
  */
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
-
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
 #include <sys/zfs_context.h>
 #include <sys/zio.h>
 #include <sys/zio_checksum.h>
+#include <sys/sha2.h>
 
-/*
- * SHA-256 checksum, as specified in FIPS 180-3, available at:
- * http://csrc.nist.gov/publications/PubsFIPS.html
- *
- * This is a very compact implementation of SHA-256.
- * It is designed to be simple and portable, not to be fast.
- */
-
-/*
- * The literal definitions of Ch() and Maj() according to FIPS 180-3 are:
- *
- *     Ch(x, y, z)     (x & y) ^ (~x & z)
- *     Maj(x, y, z)    (x & y) ^ (x & z) ^ (y & z)
- *
- * We use equivalent logical reductions here that require one less op.
- */
-#define        Ch(x, y, z)     ((z) ^ ((x) & ((y) ^ (z))))
-#define        Maj(x, y, z)    (((x) & (y)) ^ ((z) & ((x) ^ (y))))
-#define        Rot32(x, s)     (((x) >> s) | ((x) << (32 - s)))
-#define        SIGMA0(x)       (Rot32(x, 2) ^ Rot32(x, 13) ^ Rot32(x, 22))
-#define        SIGMA1(x)       (Rot32(x, 6) ^ Rot32(x, 11) ^ Rot32(x, 25))
-#define        sigma0(x)       (Rot32(x, 7) ^ Rot32(x, 18) ^ ((x) >> 3))
-#define        sigma1(x)       (Rot32(x, 17) ^ Rot32(x, 19) ^ ((x) >> 10))
-
-static const uint32_t SHA256_K[64] = {
-       0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
-       0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-       0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
-       0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-       0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
-       0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-       0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
-       0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-       0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
-       0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-       0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
-       0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-       0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
-       0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-       0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
-       0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-static void
-SHA256Transform(uint32_t *H, const uint8_t *cp)
+/*ARGSUSED*/
+void
+zio_checksum_SHA256(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
 {
-       uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64];
-
-       for (t = 0; t < 16; t++, cp += 4)
-               W[t] = ((uint32_t)cp[0] << 24) | ((uint32_t)cp[1] << 16) |
-                   ((uint32_t)cp[2] << 8) | (uint32_t)cp[3];
-
-       for (t = 16; t < 64; t++)
-               W[t] = sigma1(W[t - 2]) + W[t - 7] +
-                   sigma0(W[t - 15]) + W[t - 16];
-
-       a = H[0]; b = H[1]; c = H[2]; d = H[3];
-       e = H[4]; f = H[5]; g = H[6]; h = H[7];
-
-       for (t = 0; t < 64; t++) {
-               T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t];
-               T2 = SIGMA0(a) + Maj(a, b, c);
-               h = g; g = f; f = e; e = d + T1;
-               d = c; c = b; b = a; a = T1 + T2;
-       }
-
-       H[0] += a; H[1] += b; H[2] += c; H[3] += d;
-       H[4] += e; H[5] += f; H[6] += g; H[7] += h;
+       SHA2_CTX ctx;
+       zio_cksum_t tmp;
+
+       SHA2Init(SHA256, &ctx);
+       SHA2Update(&ctx, buf, size);
+       SHA2Final(&tmp, &ctx);
+
+       /*
+        * A prior implementation of this function had a
+        * private SHA256 implementation always wrote things out in
+        * Big Endian and there wasn't a byteswap variant of it.
+        * To preseve on disk compatibility we need to force that
+        * behaviour.
+        */
+       zcp->zc_word[0] = BE_64(tmp.zc_word[0]);
+       zcp->zc_word[1] = BE_64(tmp.zc_word[1]);
+       zcp->zc_word[2] = BE_64(tmp.zc_word[2]);
+       zcp->zc_word[3] = BE_64(tmp.zc_word[3]);
 }
 
+/*ARGSUSED*/
 void
-zio_checksum_SHA256(const void *buf, uint64_t size, zio_cksum_t *zcp)
+zio_checksum_SHA512_native(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
 {
-       uint32_t H[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
-           0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
-       uint8_t pad[128];
-       int i, padsize;
+       SHA2_CTX        ctx;
 
-       for (i = 0; i < (size & ~63ULL); i += 64)
-               SHA256Transform(H, (uint8_t *)buf + i);
-
-       for (padsize = 0; i < size; i++)
-               pad[padsize++] = *((uint8_t *)buf + i);
-
-       for (pad[padsize++] = 0x80; (padsize & 63) != 56; padsize++)
-               pad[padsize] = 0;
-
-       for (i = 56; i >= 0; i -= 8)
-               pad[padsize++] = (size << 3) >> i;
+       SHA2Init(SHA512_256, &ctx);
+       SHA2Update(&ctx, buf, size);
+       SHA2Final(zcp, &ctx);
+}
 
-       for (i = 0; i < padsize; i += 64)
-               SHA256Transform(H, pad + i);
+/*ARGSUSED*/
+void
+zio_checksum_SHA512_byteswap(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
+{
+       zio_cksum_t     tmp;
 
-       ZIO_SET_CHECKSUM(zcp,
-           (uint64_t)H[0] << 32 | H[1],
-           (uint64_t)H[2] << 32 | H[3],
-           (uint64_t)H[4] << 32 | H[5],
-           (uint64_t)H[6] << 32 | H[7]);
+       zio_checksum_SHA512_native(buf, size, ctx_template, &tmp);
+       zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]);
+       zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]);
+       zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]);
+       zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]);
 }
diff --git a/module/zfs/skein_zfs.c b/module/zfs/skein_zfs.c
new file mode 100644 (file)
index 0000000..6592340
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2013 Saso Kiselkov.  All rights reserved.
+ */
+#include <sys/zfs_context.h>
+#include <sys/zio.h>
+#include <sys/skein.h>
+
+/*
+ * Computes a native 256-bit skein MAC checksum. Please note that this
+ * function requires the presence of a ctx_template that should be allocated
+ * using zio_checksum_skein_tmpl_init.
+ */
+/*ARGSUSED*/
+void
+zio_checksum_skein_native(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
+{
+       Skein_512_Ctxt_t        ctx;
+
+       ASSERT(ctx_template != NULL);
+       bcopy(ctx_template, &ctx, sizeof (ctx));
+       (void) Skein_512_Update(&ctx, buf, size);
+       (void) Skein_512_Final(&ctx, (uint8_t *)zcp);
+       bzero(&ctx, sizeof (ctx));
+}
+
+/*
+ * Byteswapped version of zio_checksum_skein_native. This just invokes
+ * the native checksum function and byteswaps the resulting checksum (since
+ * skein is internally endian-insensitive).
+ */
+void
+zio_checksum_skein_byteswap(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
+{
+       zio_cksum_t     tmp;
+
+       zio_checksum_skein_native(buf, size, ctx_template, &tmp);
+       zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]);
+       zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]);
+       zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]);
+       zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]);
+}
+
+/*
+ * Allocates a skein MAC template suitable for using in skein MAC checksum
+ * computations and returns a pointer to it.
+ */
+void *
+zio_checksum_skein_tmpl_init(const zio_cksum_salt_t *salt)
+{
+       Skein_512_Ctxt_t        *ctx;
+
+       ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);
+       (void) Skein_512_InitExt(ctx, sizeof (zio_cksum_t) * 8, 0,
+           salt->zcs_bytes, sizeof (salt->zcs_bytes));
+       return (ctx);
+}
+
+/*
+ * Frees a skein context template previously allocated using
+ * zio_checksum_skein_tmpl_init.
+ */
+void
+zio_checksum_skein_tmpl_free(void *ctx_template)
+{
+       Skein_512_Ctxt_t        *ctx = ctx_template;
+
+       bzero(ctx, sizeof (*ctx));
+       kmem_free(ctx, sizeof (*ctx));
+}
index 3264bfb10054f55e776dee985090a6832397bb92..c2f914e11c093aa5896809620ead9932646e2412 100644 (file)
@@ -25,6 +25,7 @@
  * Copyright (c) 2015, Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2013, 2014, Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  */
 
@@ -2675,6 +2676,19 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
                return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
        }
 
+       /* Grab the checksum salt from the MOS. */
+       error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+           DMU_POOL_CHECKSUM_SALT, 1,
+           sizeof (spa->spa_cksum_salt.zcs_bytes),
+           spa->spa_cksum_salt.zcs_bytes);
+       if (error == ENOENT) {
+               /* Generate a new salt for subsequent use */
+               (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes,
+                   sizeof (spa->spa_cksum_salt.zcs_bytes));
+       } else if (error != 0) {
+               return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+       }
+
        if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0)
                return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
        error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj);
@@ -3929,6 +3943,12 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
        if (version >= SPA_VERSION_ZPOOL_HISTORY)
                spa_history_create_obj(spa, tx);
 
+       /*
+        * Generate some random noise for salted checksums to operate on.
+        */
+       (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes,
+           sizeof (spa->spa_cksum_salt.zcs_bytes));
+
        /*
         * Set pool properties.
         */
@@ -6406,6 +6426,20 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
                if (lz4_en && !lz4_ac)
                        spa_feature_incr(spa, SPA_FEATURE_LZ4_COMPRESS, tx);
        }
+
+       /*
+        * If we haven't written the salt, do so now.  Note that the
+        * feature may not be activated yet, but that's fine since
+        * the presence of this ZAP entry is backwards compatible.
+        */
+       if (zap_contains(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+           DMU_POOL_CHECKSUM_SALT) == ENOENT) {
+               VERIFY0(zap_add(spa->spa_meta_objset,
+                   DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT, 1,
+                   sizeof (spa->spa_cksum_salt.zcs_bytes),
+                   spa->spa_cksum_salt.zcs_bytes, tx));
+       }
+
        rrw_exit(&dp->dp_config_rwlock, FTAG);
 }
 
index 6330a6a6b2f764b86e220a784b15b3299039e63c..595e594ca97224c5e89cab97a65e5d6b8ce53adc 100644 (file)
@@ -23,6 +23,7 @@
  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -53,7 +54,7 @@
 #include <sys/ddt.h>
 #include <sys/kstat.h>
 #include "zfs_prop.h"
-#include "zfeature_common.h"
+#include <sys/zfeature.h>
 
 /*
  * SPA locking
@@ -558,6 +559,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
        mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
        mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
        mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
+       mutex_init(&spa->spa_cksum_tmpls_lock, NULL, MUTEX_DEFAULT, NULL);
        mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
        mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
        mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -686,6 +688,8 @@ spa_remove(spa_t *spa)
        for (t = 0; t < TXG_SIZE; t++)
                bplist_destroy(&spa->spa_free_bplist[t]);
 
+       zio_checksum_templates_free(spa);
+
        cv_destroy(&spa->spa_async_cv);
        cv_destroy(&spa->spa_evicting_os_cv);
        cv_destroy(&spa->spa_proc_cv);
@@ -699,6 +703,7 @@ spa_remove(spa_t *spa)
        mutex_destroy(&spa->spa_history_lock);
        mutex_destroy(&spa->spa_proc_lock);
        mutex_destroy(&spa->spa_props_lock);
+       mutex_destroy(&spa->spa_cksum_tmpls_lock);
        mutex_destroy(&spa->spa_scrub_lock);
        mutex_destroy(&spa->spa_suspend_lock);
        mutex_destroy(&spa->spa_vdev_top_lock);
index f5df2c7d89702e5c72360f41d0d36caf1b31ded9..d1b41536780530a639cf32e8f5247e5a79fc913d 100644 (file)
@@ -1604,6 +1604,13 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
        int c, ret = 0;
        raidz_col_t *rc;
 
+       blkptr_t *bp = zio->io_bp;
+       enum zio_checksum checksum = (bp == NULL ? zio->io_prop.zp_checksum :
+           (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
+
+       if (checksum == ZIO_CHECKSUM_NOPARITY)
+               return (ret);
+
        for (c = 0; c < rm->rm_firstdatacol; c++) {
                rc = &rm->rm_col[c];
                if (!rc->rc_tried || rc->rc_error != 0)
index 3264f62352740f12d3c1c82c80e368bcf1b06265..e8b0a16aed86f83306d88d3b1517d853d8701031 100644 (file)
@@ -253,4 +253,16 @@ zpool_feature_init(void)
            "Variable on-disk size of dnodes.",
            ZFEATURE_FLAG_PER_DATASET, large_dnode_deps);
        }
+       zfeature_register(SPA_FEATURE_SHA512,
+           "org.illumos:sha512", "sha512",
+           "SHA-512/256 hash algorithm.",
+           ZFEATURE_FLAG_PER_DATASET, NULL);
+       zfeature_register(SPA_FEATURE_SKEIN,
+           "org.illumos:skein", "skein",
+           "Skein hash algorithm.",
+           ZFEATURE_FLAG_PER_DATASET, NULL);
+       zfeature_register(SPA_FEATURE_EDONR,
+           "org.illumos:edonr", "edonr",
+           "Edon-R hash algorithm.",
+           ZFEATURE_FLAG_PER_DATASET, NULL);
 }
index 64f630108089f0324ad5aa819fb9b59902bde47a..9140c62a62a1c9ef00ab48d3cfe3ed5c9816a6a1 100644 (file)
 #include <sys/dsl_bookmark.h>
 #include <sys/dsl_userhold.h>
 #include <sys/zfeature.h>
+#include <sys/zio_checksum.h>
 
 #include <linux/miscdevice.h>
 #include <linux/slab.h>
@@ -3809,11 +3810,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                        return (SET_ERROR(ENOTSUP));
                break;
 
-       case ZFS_PROP_DEDUP:
-               if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
-                       return (SET_ERROR(ENOTSUP));
-               break;
-
        case ZFS_PROP_VOLBLOCKSIZE:
        case ZFS_PROP_RECORDSIZE:
                /* Record sizes above 128k need the feature to be enabled */
@@ -3893,6 +3889,47 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
                                return (SET_ERROR(ENOTSUP));
                }
                break;
+       case ZFS_PROP_CHECKSUM:
+       case ZFS_PROP_DEDUP:
+       {
+               spa_feature_t feature;
+               spa_t *spa;
+               uint64_t intval;
+               int err;
+
+               /* dedup feature version checks */
+               if (prop == ZFS_PROP_DEDUP &&
+                   zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
+                       return (SET_ERROR(ENOTSUP));
+
+               if (nvpair_value_uint64(pair, &intval) != 0)
+                       return (SET_ERROR(EINVAL));
+
+               /* check prop value is enabled in features */
+               feature = zio_checksum_to_feature(intval);
+               if (feature == SPA_FEATURE_NONE)
+                       break;
+
+               if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+                       return (err);
+               /*
+                * Salted checksums are not supported on root pools.
+                */
+               if (spa_bootfs(spa) != 0 &&
+                   intval < ZIO_CHECKSUM_FUNCTIONS &&
+                   (zio_checksum_table[intval].ci_flags &
+                   ZCHECKSUM_FLAG_SALTED)) {
+                       spa_close(spa, FTAG);
+                       return (SET_ERROR(ERANGE));
+               }
+               if (!spa_feature_is_enabled(spa, feature)) {
+                       spa_close(spa, FTAG);
+                       return (SET_ERROR(ENOTSUP));
+               }
+               spa_close(spa, FTAG);
+               break;
+       }
+
        default:
                break;
        }
index e26822e34d114dd7ce484d27e32845b22ab665a1..8a063ab7fc8c5dff1b19d525ed57ec39861a9650 100644 (file)
@@ -979,7 +979,7 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
 
        zio->io_prop.zp_checksum = checksum;
 
-       if (zio_checksum_table[checksum].ci_eck) {
+       if (zio_checksum_table[checksum].ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
                /*
                 * zec checksums are necessarily destructive -- they modify
                 * the end of the write buffer to hold the verifier/checksum.
@@ -1190,8 +1190,8 @@ zio_write_bp_init(zio_t *zio)
                if (BP_IS_HOLE(bp) || !zp->zp_dedup)
                        return (ZIO_PIPELINE_CONTINUE);
 
-               ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup ||
-                   zp->zp_dedup_verify);
+               ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags &
+                   ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify);
 
                if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
                        BP_SET_DEDUP(bp, 1);
@@ -2198,12 +2198,22 @@ zio_write_gang_block(zio_t *pio)
 }
 
 /*
- * The zio_nop_write stage in the pipeline determines if allocating
- * a new bp is necessary.  By leveraging a cryptographically secure checksum,
- * such as SHA256, we can compare the checksums of the new data and the old
- * to determine if allocating a new block is required.  The nopwrite
- * feature can handle writes in either syncing or open context (i.e. zil
- * writes) and as a result is mutually exclusive with dedup.
+ * The zio_nop_write stage in the pipeline determines if allocating a
+ * new bp is necessary.  The nopwrite feature can handle writes in
+ * either syncing or open context (i.e. zil writes) and as a result is
+ * mutually exclusive with dedup.
+ *
+ * By leveraging a cryptographically secure checksum, such as SHA256, we
+ * can compare the checksums of the new data and the old to determine if
+ * allocating a new block is required.  Note that our requirements for
+ * cryptographic strength are fairly weak: there can't be any accidental
+ * hash collisions, but we don't need to be secure against intentional
+ * (malicious) collisions.  To trigger a nopwrite, you have to be able
+ * to write the file to begin with, and triggering an incorrect (hash
+ * collision) nopwrite is no worse than simply writing to the file.
+ * That said, there are no known attacks against the checksum algorithms
+ * used for nopwrite, assuming that the salt and the checksums
+ * themselves remain secret.
  */
 static int
 zio_nop_write(zio_t *zio)
@@ -2226,7 +2236,8 @@ zio_nop_write(zio_t *zio)
         * allocate a new bp.
         */
        if (BP_IS_HOLE(bp_orig) ||
-           !zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_dedup ||
+           !(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags &
+           ZCHECKSUM_FLAG_NOPWRITE) ||
            BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) ||
            BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) ||
            BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) ||
@@ -2238,7 +2249,8 @@ zio_nop_write(zio_t *zio)
         * avoid allocating a new bp and issuing any I/O.
         */
        if (ZIO_CHECKSUM_EQUAL(bp->blk_cksum, bp_orig->blk_cksum)) {
-               ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup);
+               ASSERT(zio_checksum_table[zp->zp_checksum].ci_flags &
+                   ZCHECKSUM_FLAG_NOPWRITE);
                ASSERT3U(BP_GET_PSIZE(bp), ==, BP_GET_PSIZE(bp_orig));
                ASSERT3U(BP_GET_LSIZE(bp), ==, BP_GET_LSIZE(bp_orig));
                ASSERT(zp->zp_compress != ZIO_COMPRESS_OFF);
@@ -2566,7 +2578,8 @@ zio_ddt_write(zio_t *zio)
                 * we can't resolve it, so just convert to an ordinary write.
                 * (And automatically e-mail a paper to Nature?)
                 */
-               if (!zio_checksum_table[zp->zp_checksum].ci_dedup) {
+               if (!(zio_checksum_table[zp->zp_checksum].ci_flags &
+                   ZCHECKSUM_FLAG_DEDUP)) {
                        zp->zp_checksum = spa_dedup_checksum(spa);
                        zio_pop_transforms(zio);
                        zio->io_stage = ZIO_STAGE_OPEN;
index b05e787dcaac098b2713c53e4d13eb92980cc3d1..59871c50ea2252df304f7bfd012fe940404ef9b4 100644 (file)
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/spa.h>
+#include <sys/spa_impl.h>
 #include <sys/zio.h>
 #include <sys/zio_checksum.h>
 #include <sys/zil.h>
  * checksum function of the appropriate strength.  When reading a block,
  * we compare the expected checksum against the actual checksum, which we
  * compute via the checksum function specified by BP_GET_CHECKSUM(bp).
+ *
+ * SALTED CHECKSUMS
+ *
+ * To enable the use of less secure hash algorithms with dedup, we
+ * introduce the notion of salted checksums (MACs, really).  A salted
+ * checksum is fed both a random 256-bit value (the salt) and the data
+ * to be checksummed.  This salt is kept secret (stored on the pool, but
+ * never shown to the user).  Thus even if an attacker knew of collision
+ * weaknesses in the hash algorithm, they won't be able to mount a known
+ * plaintext attack on the DDT, since the actual hash value cannot be
+ * known ahead of time.  How the salt is used is algorithm-specific
+ * (some might simply prefix it to the data block, others might need to
+ * utilize a full-blown HMAC).  On disk the salt is stored in a ZAP
+ * object in the MOS (DMU_POOL_CHECKSUM_SALT).
+ *
+ * CONTEXT TEMPLATES
+ *
+ * Some hashing algorithms need to perform a substantial amount of
+ * initialization work (e.g. salted checksums above may need to pre-hash
+ * the salt) before being able to process data.  Performing this
+ * redundant work for each block would be wasteful, so we instead allow
+ * a checksum algorithm to do the work once (the first time it's used)
+ * and then keep this pre-initialized context as a template inside the
+ * spa_t (spa_cksum_tmpls).  If the zio_checksum_info_t contains
+ * non-NULL ci_tmpl_init and ci_tmpl_free callbacks, they are used to
+ * construct and destruct the pre-initialized checksum context.  The
+ * pre-initialized context is then reused during each checksum
+ * invocation and passed to the checksum function.
  */
 
 /*ARGSUSED*/
 static void
-zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
+zio_checksum_off(const void *buf, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
 {
        ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
 }
 
 zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
-       {{NULL,                 NULL},                  0, 0, 0, "inherit"},
-       {{NULL,                 NULL},                  0, 0, 0, "on"},
-       {{zio_checksum_off,     zio_checksum_off},      0, 0, 0, "off"},
-       {{zio_checksum_SHA256,  zio_checksum_SHA256},   1, 1, 0, "label"},
-       {{zio_checksum_SHA256,  zio_checksum_SHA256},   1, 1, 0, "gang_header"},
-       {{fletcher_2_native,    fletcher_2_byteswap},   0, 1, 0, "zilog"},
-       {{fletcher_2_native,    fletcher_2_byteswap},   0, 0, 0, "fletcher2"},
-       {{fletcher_4_native,    fletcher_4_byteswap},   1, 0, 0, "fletcher4"},
-       {{zio_checksum_SHA256,  zio_checksum_SHA256},   1, 0, 1, "sha256"},
-       {{fletcher_4_native,    fletcher_4_byteswap},   0, 1, 0, "zilog2"},
+       {{NULL, NULL}, NULL, NULL, 0, "inherit"},
+       {{NULL, NULL}, NULL, NULL, 0, "on"},
+       {{zio_checksum_off,             zio_checksum_off},
+           NULL, NULL, 0, "off"},
+       {{zio_checksum_SHA256,          zio_checksum_SHA256},
+           NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED,
+           "label"},
+       {{zio_checksum_SHA256,          zio_checksum_SHA256},
+           NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED,
+           "gang_header"},
+       {{fletcher_2_native,            fletcher_2_byteswap},
+           NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog"},
+       {{fletcher_2_native,            fletcher_2_byteswap},
+           NULL, NULL, 0, "fletcher2"},
+       {{fletcher_4_native,            fletcher_4_byteswap},
+           NULL, NULL, ZCHECKSUM_FLAG_METADATA, "fletcher4"},
+       {{zio_checksum_SHA256,          zio_checksum_SHA256},
+           NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
+           ZCHECKSUM_FLAG_NOPWRITE, "sha256"},
+       {{fletcher_4_native,            fletcher_4_byteswap},
+           NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog2"},
+       {{zio_checksum_off,             zio_checksum_off},
+           NULL, NULL, 0, "noparity"},
+       {{zio_checksum_SHA512_native,   zio_checksum_SHA512_byteswap},
+           NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
+           ZCHECKSUM_FLAG_NOPWRITE, "sha512"},
+       {{zio_checksum_skein_native,    zio_checksum_skein_byteswap},
+           zio_checksum_skein_tmpl_init, zio_checksum_skein_tmpl_free,
+           ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
+           ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "skein"},
+       {{zio_checksum_edonr_native,    zio_checksum_edonr_byteswap},
+           zio_checksum_edonr_tmpl_init, zio_checksum_edonr_tmpl_free,
+           ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED |
+           ZCHECKSUM_FLAG_NOPWRITE, "edonr"},
 };
 
+spa_feature_t
+zio_checksum_to_feature(enum zio_checksum cksum)
+{
+       switch (cksum) {
+       case ZIO_CHECKSUM_SHA512:
+               return (SPA_FEATURE_SHA512);
+       case ZIO_CHECKSUM_SKEIN:
+               return (SPA_FEATURE_SKEIN);
+       case ZIO_CHECKSUM_EDONR:
+               return (SPA_FEATURE_EDONR);
+       default:
+               return (SPA_FEATURE_NONE);
+       }
+}
+
 enum zio_checksum
 zio_checksum_select(enum zio_checksum child, enum zio_checksum parent)
 {
@@ -113,7 +183,8 @@ zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child,
        if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY))
                return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY);
 
-       ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup ||
+       ASSERT((zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_flags &
+           ZCHECKSUM_FLAG_DEDUP) ||
            (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF);
 
        return (child);
@@ -145,6 +216,30 @@ zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset)
        ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0);
 }
 
+/*
+ * Calls the template init function of a checksum which supports context
+ * templates and installs the template into the spa_t.
+ */
+static void
+zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa)
+{
+       zio_checksum_info_t *ci = &zio_checksum_table[checksum];
+
+       if (ci->ci_tmpl_init == NULL)
+               return;
+       if (spa->spa_cksum_tmpls[checksum] != NULL)
+               return;
+
+       VERIFY(ci->ci_tmpl_free != NULL);
+       mutex_enter(&spa->spa_cksum_tmpls_lock);
+       if (spa->spa_cksum_tmpls[checksum] == NULL) {
+               spa->spa_cksum_tmpls[checksum] =
+                   ci->ci_tmpl_init(&spa->spa_cksum_salt);
+               VERIFY(spa->spa_cksum_tmpls[checksum] != NULL);
+       }
+       mutex_exit(&spa->spa_cksum_tmpls_lock);
+}
+
 /*
  * Generate the checksum.
  */
@@ -156,11 +251,14 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
        uint64_t offset = zio->io_offset;
        zio_checksum_info_t *ci = &zio_checksum_table[checksum];
        zio_cksum_t cksum;
+       spa_t *spa = zio->io_spa;
 
        ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS);
        ASSERT(ci->ci_func[0] != NULL);
 
-       if (ci->ci_eck) {
+       zio_checksum_template_init(checksum, spa);
+
+       if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
                zio_eck_t *eck;
 
                if (checksum == ZIO_CHECKSUM_ZILOG2) {
@@ -179,10 +277,12 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
                else
                        bp->blk_cksum = eck->zec_cksum;
                eck->zec_magic = ZEC_MAGIC;
-               ci->ci_func[0](data, size, &cksum);
+               ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum],
+                   &cksum);
                eck->zec_cksum = cksum;
        } else {
-               ci->ci_func[0](data, size, &bp->blk_cksum);
+               ci->ci_func[0](data, size, spa->spa_cksum_tmpls[checksum],
+                   &bp->blk_cksum);
        }
 }
 
@@ -191,13 +291,15 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
     void *data, uint64_t size, uint64_t offset, zio_bad_cksum_t *info)
 {
        zio_checksum_info_t *ci = &zio_checksum_table[checksum];
-       zio_cksum_t actual_cksum, expected_cksum;
        int byteswap;
+       zio_cksum_t actual_cksum, expected_cksum;
 
        if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
                return (SET_ERROR(EINVAL));
 
-       if (ci->ci_eck) {
+       zio_checksum_template_init(checksum, spa);
+
+       if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
                zio_eck_t *eck;
                zio_cksum_t verifier;
 
@@ -235,7 +337,8 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
 
                expected_cksum = eck->zec_cksum;
                eck->zec_cksum = verifier;
-               ci->ci_func[byteswap](data, size, &actual_cksum);
+               ci->ci_func[byteswap](data, size,
+                   spa->spa_cksum_tmpls[checksum], &actual_cksum);
                eck->zec_cksum = expected_cksum;
 
                if (byteswap) {
@@ -245,7 +348,8 @@ zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
        } else {
                byteswap = BP_SHOULD_BYTESWAP(bp);
                expected_cksum = bp->blk_cksum;
-               ci->ci_func[byteswap](data, size, &actual_cksum);
+               ci->ci_func[byteswap](data, size,
+                   spa->spa_cksum_tmpls[checksum], &actual_cksum);
        }
 
        if (info != NULL) {
@@ -286,3 +390,24 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
        }
        return (error);
 }
+
+/*
+ * Called by a spa_t that's about to be deallocated. This steps through
+ * all of the checksum context templates and deallocates any that were
+ * initialized using the algorithm-specific template init function.
+ */
+void
+zio_checksum_templates_free(spa_t *spa)
+{
+       enum zio_checksum checksum;
+       for (checksum = 0; checksum < ZIO_CHECKSUM_FUNCTIONS;
+           checksum++) {
+               if (spa->spa_cksum_tmpls[checksum] != NULL) {
+                       zio_checksum_info_t *ci = &zio_checksum_table[checksum];
+
+                       VERIFY(ci->ci_tmpl_free != NULL);
+                       ci->ci_tmpl_free(spa->spa_cksum_tmpls[checksum]);
+                       spa->spa_cksum_tmpls[checksum] = NULL;
+               }
+       }
+}
index e6fc5c8b082f5ab4add18a89412013242af9fd3b..d4def4429a833cc043929fd08ed407a8eb59439e 100755 (executable)
@@ -19,3 +19,22 @@ s:usr/src/test/zfs-tests/runfiles:tests/runfiles:g
 s:usr/src/test/zfs-tests/tests/functional:tests/zfs-tests/tests/functional:g
 s:usr/src/test/zfs-tests/tests/perf:tests/zfs-tests/tests/perf:g
 s:usr/src/test/test-runner/cmd/run.py:tests/test-runner/cmd/test-runner.py:g
+
+#
+# The usr/src/common/zfs/ files go in a couple different dirs.
+# usr/src/common/zfs/zfeature_common.c goes in module/zfs
+#
+s:usr/src/common/zfs/zfeature_common.c:module/zfs/zfeature_common.c:g
+
+# ...but most of the rest of the C files go in module/zcommon
+s/usr\/src\/common\/zfs\/\(.*\)\.c/module\/zcommon\/\1.c/g
+
+# crypto framework
+s:usr/src/common/crypto:module/icp/algs:g
+s:usr/src/uts/common/crypto/io:module/icp/io:g
+
+# Headers
+s:usr/src/common/zfs/\(.*\)\.h:include/\1.h:g
+
+# Man pages
+s:usr/src/man:man:g
index 9a85af5d64a326cb49af11c7f4db920d5e06933d..92f867ab96e25e675ed898f839b5aec92c6b0b9a 100644 (file)
@@ -60,6 +60,9 @@ tests = ['cache_002_pos', 'cache_003_pos', 'cache_004_neg',
 [tests/functional/casenorm]
 tests = ['case_all_values', 'norm_all_values']
 
+[tests/functional/checksum]
+tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test', 'filetest_001_pos']
+
 [tests/functional/clean_mirror]
 tests = [ 'clean_mirror_001_pos', 'clean_mirror_002_pos',
     'clean_mirror_003_pos', 'clean_mirror_004_pos']
index 046794820589a35d3fc33f81e9d47829bd02fc54..81fc5de397940ccb40c363519a6e643e7aaaa28a 100644 (file)
@@ -30,6 +30,9 @@
 #include <inttypes.h>
 #include <sys/types.h>
 #include <unistd.h>
+#include <stdlib.h>
+#include <time.h>
+#include <stdint.h>
 
 typedef unsigned char  uchar_t;
 typedef long long      longlong_t;
@@ -44,6 +47,16 @@ static unsigned char bigbuffer[BIGBUFFERSIZE];
 
 static void usage(char *);
 
+/*
+ * psudo-randomize the buffer
+ */
+void randomize_buffer(int block_size) {
+       int i;
+       char rnd = rand() & 0xff;
+       for (i = 0; i < block_size; i++)
+               bigbuffer[i] ^= rnd;
+}
+
 int
 main(int argc, char **argv)
 {
@@ -81,7 +94,10 @@ main(int argc, char **argv)
                                write_count = atoi(optarg);
                                break;
                        case 'd':
-                               fillchar = atoi(optarg);
+                               if (optarg[0] == 'R')
+                                       fillchar = 'R'; /* R = random data */
+                               else
+                                       fillchar = atoi(optarg);
                                break;
                        case 's':
                                offset = atoll(optarg);
@@ -138,6 +154,9 @@ main(int argc, char **argv)
        nxtfillchar = fillchar;
        k = 0;
 
+       if (fillchar == 'R')
+               srand(time(NULL));
+
        for (i = 0; i < block_size; i++) {
                bigbuffer[i] = nxtfillchar;
 
@@ -146,6 +165,8 @@ main(int argc, char **argv)
                                k = 0;
                        }
                        nxtfillchar = k++;
+               } else if (fillchar == 'R') {
+                       nxtfillchar = rand() & 0xff;
                }
        }
 
@@ -191,14 +212,21 @@ main(int argc, char **argv)
 
        if (verbose) {
                (void) printf("%s: block_size = %d, write_count = %d, "
-                   "offset = %lld, data = %s%d\n", filename, block_size,
-                   write_count, offset,
-                   (fillchar == 0) ? "0->" : "",
-                   (fillchar == 0) ? DATA_RANGE : fillchar);
+                   "offset = %lld, ", filename, block_size,
+                   write_count, offset);
+               if (fillchar == 'R') {
+                       (void) printf("data = [random]\n");
+               } else {
+                       (void) printf("data = %s%d\n",
+                           (fillchar == 0) ? "0->" : "",
+                           (fillchar == 0) ? DATA_RANGE : fillchar);
+               }
        }
 
        for (i = 0; i < write_count; i++) {
                ssize_t n;
+               if (fillchar == 'R')
+                       randomize_buffer(block_size);
 
                if ((n = write(bigfd, &bigbuffer, block_size)) == -1) {
                        (void) printf("write failed (%ld), good_writes = %"
@@ -224,9 +252,11 @@ usage(char *prog)
 {
        (void) printf("Usage: %s [-v] -o {create,overwrite,append} -f file_name"
            " [-b block_size]\n"
-           "\t[-s offset] [-c write_count] [-d data]\n"
-           "\twhere [data] equal to zero causes chars "
-           "0->%d to be repeated throughout\n", prog, DATA_RANGE);
+           "\t[-s offset] [-c write_count] [-d data]\n\n"
+           "Where [data] equal to zero causes chars "
+           "0->%d to be repeated throughout, or [data]\n"
+           "equal to 'R' for psudorandom data.\n",
+           prog, DATA_RANGE);
 
        exit(1);
 }
index 37f173e126e7c1295e6c455e413e2974b815a1f0..62ba3a9eb3b3fd89866fe3453a59a76b5400aff2 100644 (file)
@@ -147,12 +147,14 @@ function default_setup_noexit
        typeset disklist=$1
        typeset container=$2
        typeset volume=$3
+       log_note begin default_setup_noexit
 
        if is_global_zone; then
                if poolexists $TESTPOOL ; then
                        destroy_pool $TESTPOOL
                fi
                [[ -d /$TESTPOOL ]] && $RM -rf /$TESTPOOL
+               log_note creating pool $TESTPOOL $disklist
                log_must $ZPOOL create -f $TESTPOOL $disklist
        else
                reexport_pool
@@ -1539,6 +1541,18 @@ function get_disklist # pool
        $ECHO $disklist
 }
 
+#
+# Given a pool, and this function list all disks in the pool with their full
+# path (like "/dev/sda" instead of "sda").
+#
+function get_disklist_fullpath # pool
+{
+       args="-P $1"
+       get_disklist $args
+}
+
+
+
 # /**
 #  This function kills a given list of processes after a time period. We use
 #  this in the stress tests instead of STF_TIMEOUT so that we can have processes
index bb0b4ff58618d11be3c32829211f4095bfd10105..c495eecb477be8f473e0f0f77ffd7b730c579e23 100644 (file)
@@ -16,7 +16,8 @@
 typeset -a compress_props=('on' 'off' 'lzjb' 'gzip' 'gzip-1' 'gzip-2' 'gzip-3'
     'gzip-4' 'gzip-5' 'gzip-6' 'gzip-7' 'gzip-8' 'gzip-9' 'zle')
 
-typeset -a checksum_props=('on' 'off' 'fletcher2' 'fletcher4' 'sha256')
+typeset -a checksum_props=('on' 'off' 'fletcher2' 'fletcher4' 'sha256' 'sha512'
+    'edonr' 'skein' 'noparity')
 
 #
 # Given the property array passed in, return 'num_props' elements to the
index 79d33a14b09d37e80ad179fbea2baa97c528d00c..ed01eafb489863d9579b2c45cbfcd32d55381143 100644 (file)
@@ -5,6 +5,7 @@ SUBDIRS = \
        cache \
        cachefile \
        casenorm \
+       checksum \
        clean_mirror \
        cli_root \
        cli_user \
diff --git a/tests/zfs-tests/tests/functional/checksum/.gitignore b/tests/zfs-tests/tests/functional/checksum/.gitignore
new file mode 100644 (file)
index 0000000..0411d5a
--- /dev/null
@@ -0,0 +1,4 @@
+skein_test
+edonr_test
+sha2_test
+
diff --git a/tests/zfs-tests/tests/functional/checksum/Makefile.am b/tests/zfs-tests/tests/functional/checksum/Makefile.am
new file mode 100644 (file)
index 0000000..2d7d271
--- /dev/null
@@ -0,0 +1,26 @@
+include $(top_srcdir)/config/Rules.am
+AM_CPPFLAGS += -I$(top_srcdir)/include
+LDADD = $(top_srcdir)/lib/libicp/libicp.la
+
+AUTOMAKE_OPTIONS = subdir-objects
+
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/checksum
+
+dist_pkgdata_SCRIPTS = \
+       setup.ksh \
+       cleanup.ksh \
+       run_edonr_test.ksh \
+       run_sha2_test.ksh \
+       run_skein_test.ksh \
+       filetest_001_pos.ksh
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/checksum
+
+pkgexec_PROGRAMS = \
+       edonr_test \
+       skein_test \
+       sha2_test
+
+edonr_test_SOURCES = edonr_test.c
+skein_test_SOURCES = skein_test.c
+sha2_test_SOURCES = sha2_test.c
diff --git a/tests/zfs-tests/tests/functional/checksum/cleanup.ksh b/tests/zfs-tests/tests/functional/checksum/cleanup.ksh
new file mode 100755 (executable)
index 0000000..79cd6e9
--- /dev/null
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+default_cleanup
diff --git a/tests/zfs-tests/tests/functional/checksum/edonr_test.c b/tests/zfs-tests/tests/functional/checksum/edonr_test.c
new file mode 100644 (file)
index 0000000..1ea8e99
--- /dev/null
@@ -0,0 +1,219 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
+
+/*
+ * This is just to keep the compiler happy about sys/time.h not declaring
+ * gettimeofday due to -D_KERNEL (we can do this since we're actually
+ * running in userspace, but we need -D_KERNEL for the remaining Edon-R code).
+ */
+#ifdef _KERNEL
+#undef _KERNEL
+#endif
+
+#include <sys/edonr.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/time.h>
+#define NOTE(x) 
+typedef enum boolean { B_FALSE, B_TRUE } boolean_t;
+typedef        unsigned long long      u_longlong_t;
+
+/*
+ * Test messages from:
+ * http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA_All.pdf
+ */
+const char     *test_msg0 = "abc";
+const char     *test_msg1 = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmn"
+       "lmnomnopnopq";
+const char     *test_msg2 = "abcdefghbcdefghicdefghijdefghijkefghijklfghi"
+       "jklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
+
+/*
+ * Test digests computed by hand. There's no formal standard or spec for edonr.
+ */
+const uint8_t  edonr_224_test_digests[][28] = {
+       {
+               /* for test_msg0 */
+               0x56, 0x63, 0xc4, 0x93, 0x95, 0x20, 0xfa, 0xf6,
+               0x12, 0x31, 0x65, 0xa4, 0x66, 0xf2, 0x56, 0x01,
+               0x95, 0x2e, 0xa9, 0xe4, 0x24, 0xdd, 0xc9, 0x6b,
+               0xef, 0xd0, 0x40, 0x94
+       },
+       {
+               /* for test_msg1 */
+               0xd0, 0x13, 0xe4, 0x87, 0x4d, 0x06, 0x8d, 0xca,
+               0x4e, 0x14, 0xb9, 0x37, 0x2f, 0xce, 0x12, 0x20,
+               0x60, 0xf8, 0x5c, 0x0a, 0xfd, 0x7a, 0x7d, 0x97,
+               0x88, 0x2b, 0x05, 0x75
+       }
+       /* no test vector for test_msg2 */
+};
+
+const uint8_t  edonr_256_test_digests[][32] = {
+       {
+               /* for test_msg0 */
+               0x54, 0xd7, 0x8b, 0x13, 0xc7, 0x4e, 0xda, 0x5a,
+               0xed, 0xc2, 0x71, 0xcc, 0x88, 0x1f, 0xb2, 0x2f,
+               0x83, 0x99, 0xaf, 0xd3, 0x04, 0x0b, 0x6a, 0x39,
+               0x2d, 0x73, 0x94, 0x05, 0x50, 0x8d, 0xd8, 0x51
+       },
+       {
+               /* for test_msg1 */
+               0x49, 0x2d, 0x0b, 0x19, 0xab, 0x1e, 0xde, 0x3a,
+               0xea, 0x9b, 0xf2, 0x39, 0x3a, 0xb1, 0x21, 0xde,
+               0x21, 0xf6, 0x80, 0x1f, 0xad, 0xbe, 0x8b, 0x07,
+               0xc7, 0xfb, 0xe6, 0x99, 0x0e, 0x4d, 0x73, 0x63
+       }
+       /* no test vectorfor test_msg2 */
+};
+
+const uint8_t  edonr_384_test_digests[][48] = {
+       {
+               /* for test_msg0 */
+               0x0e, 0x7c, 0xd7, 0x85, 0x78, 0x77, 0xe0, 0x89,
+               0x5b, 0x1c, 0xdf, 0x49, 0xf4, 0x1d, 0x20, 0x9c,
+               0x72, 0x7d, 0x2e, 0x57, 0x9b, 0x9b, 0x9a, 0xdc,
+               0x60, 0x27, 0x97, 0x82, 0xb9, 0x90, 0x72, 0xec,
+               0x7e, 0xce, 0xd3, 0x16, 0x5f, 0x47, 0x75, 0x48,
+               0xfa, 0x60, 0x72, 0x7e, 0x01, 0xc7, 0x7c, 0xc6
+       },
+       {
+               /* no test vector for test_msg1 */
+               0
+       },
+       {
+               /* for test_msg2 */
+               0xe2, 0x34, 0xa1, 0x02, 0x83, 0x76, 0xae, 0xe6,
+               0x82, 0xd9, 0x38, 0x32, 0x0e, 0x00, 0x78, 0xd2,
+               0x34, 0xdb, 0xb9, 0xbd, 0xf0, 0x08, 0xa8, 0x0f,
+               0x63, 0x1c, 0x3d, 0x4a, 0xfd, 0x0a, 0xe9, 0x59,
+               0xdc, 0xd4, 0xce, 0xcd, 0x8d, 0x67, 0x6c, 0xea,
+               0xbb, 0x1a, 0x32, 0xed, 0x5c, 0x6b, 0xf1, 0x7f
+       }
+};
+
+const uint8_t  edonr_512_test_digests[][64] = {
+       {
+               /* for test_msg0 */
+               0x1b, 0x14, 0xdb, 0x15, 0x5f, 0x1d, 0x40, 0x65,
+               0x94, 0xb8, 0xce, 0xf7, 0x0a, 0x43, 0x62, 0xec,
+               0x6b, 0x5d, 0xe6, 0xa5, 0xda, 0xf5, 0x0e, 0xc9,
+               0x99, 0xe9, 0x87, 0xc1, 0x9d, 0x30, 0x49, 0xe2,
+               0xde, 0x59, 0x77, 0xbb, 0x05, 0xb1, 0xbb, 0x22,
+               0x00, 0x50, 0xa1, 0xea, 0x5b, 0x46, 0xa9, 0xf1,
+               0x74, 0x0a, 0xca, 0xfb, 0xf6, 0xb4, 0x50, 0x32,
+               0xad, 0xc9, 0x0c, 0x62, 0x83, 0x72, 0xc2, 0x2b
+       },
+       {
+               /* no test vector for test_msg1 */
+               0
+       },
+       {
+               /* for test_msg2 */
+               0x53, 0x51, 0x07, 0x0d, 0xc5, 0x1c, 0x3b, 0x2b,
+               0xac, 0xa5, 0xa6, 0x0d, 0x02, 0x52, 0xcc, 0xb4,
+               0xe4, 0x92, 0x1a, 0x96, 0xfe, 0x5a, 0x69, 0xe7,
+               0x6d, 0xad, 0x48, 0xfd, 0x21, 0xa0, 0x84, 0x5a,
+               0xd5, 0x7f, 0x88, 0x0b, 0x3e, 0x4a, 0x90, 0x7b,
+               0xc5, 0x03, 0x15, 0x18, 0x42, 0xbb, 0x94, 0x9e,
+               0x1c, 0xba, 0x74, 0x39, 0xa6, 0x40, 0x9a, 0x34,
+               0xb8, 0x43, 0x6c, 0xb4, 0x69, 0x21, 0x58, 0x3c
+       }
+};
+
+int
+main(int argc, char *argv[])
+{
+       boolean_t       failed = B_FALSE;
+       uint64_t        cpu_mhz = 0;
+
+       if (argc == 2)
+               cpu_mhz = atoi(argv[1]);
+
+#define        EDONR_ALGO_TEST(_m, mode, testdigest)                           \
+       do {                                                            \
+               EdonRState      ctx;                                    \
+               uint8_t         digest[mode / 8];                       \
+               EdonRInit(&ctx, mode);                                  \
+               EdonRUpdate(&ctx, (const uint8_t *) _m, strlen(_m) * 8);\
+               EdonRFinal(&ctx, digest);                               \
+               (void) printf("Edon-R-%-6sMessage: " #_m                \
+                   "\tResult: ", #mode);                               \
+               if (bcmp(digest, testdigest, mode / 8) == 0) {          \
+                       (void) printf("OK\n");                          \
+               } else {                                                \
+                       (void) printf("FAILED!\n");                     \
+                       failed = B_TRUE;                                \
+               }                                                       \
+               NOTE(CONSTCOND)                                         \
+       } while (0)
+
+#define        EDONR_PERF_TEST(mode)                                           \
+       do {                                                            \
+               EdonRState      ctx;                                    \
+               uint8_t         digest[mode / 8];                       \
+               uint8_t         block[131072];                          \
+               uint64_t        delta;                                  \
+               double          cpb = 0;                                \
+               int             i;                                      \
+               struct timeval  start, end;                             \
+               bzero(block, sizeof (block));                           \
+               (void) gettimeofday(&start, NULL);                      \
+               EdonRInit(&ctx, mode);                                  \
+               for (i = 0; i < 8192; i++)                              \
+                       EdonRUpdate(&ctx, block, sizeof (block) * 8);   \
+               EdonRFinal(&ctx, digest);                               \
+               (void) gettimeofday(&end, NULL);                        \
+               delta = (end.tv_sec * 1000000llu + end.tv_usec) -       \
+                   (start.tv_sec * 1000000llu + start.tv_usec);        \
+               if (cpu_mhz != 0) {                                     \
+                       cpb = (cpu_mhz * 1e6 * ((double)delta /         \
+                           1000000)) / (8192 * 128 * 1024);            \
+               }                                                       \
+               (void) printf("Edon-R-%-6s%llu us (%.02f CPB)\n", #mode,\
+                   (u_longlong_t)delta, cpb);                          \
+               NOTE(CONSTCOND)                                         \
+       } while (0)
+
+       (void) printf("Running algorithm correctness tests:\n");
+       EDONR_ALGO_TEST(test_msg0, 224, edonr_224_test_digests[0]);
+       EDONR_ALGO_TEST(test_msg1, 224, edonr_224_test_digests[1]);
+       EDONR_ALGO_TEST(test_msg0, 256, edonr_256_test_digests[0]);
+       EDONR_ALGO_TEST(test_msg1, 256, edonr_256_test_digests[1]);
+       EDONR_ALGO_TEST(test_msg0, 384, edonr_384_test_digests[0]);
+       EDONR_ALGO_TEST(test_msg2, 384, edonr_384_test_digests[2]);
+       EDONR_ALGO_TEST(test_msg0, 512, edonr_512_test_digests[0]);
+       EDONR_ALGO_TEST(test_msg2, 512, edonr_512_test_digests[2]);
+       if (failed)
+               return (1);
+
+       (void) printf("Running performance tests (hashing 1024 MiB of "
+           "data):\n");
+       EDONR_PERF_TEST(256);
+       EDONR_PERF_TEST(512);
+
+       return (0);
+}
diff --git a/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh b/tests/zfs-tests/tests/functional/checksum/filetest_001_pos.ksh
new file mode 100755 (executable)
index 0000000..758b353
--- /dev/null
@@ -0,0 +1,125 @@
+#! /bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/include/properties.shlib
+
+# DESCRIPTION:
+# Sanity test to make sure checksum algorithms work.
+# For each checksum, create a file in the pool using that checksum.  Verify
+# that there are no checksum errors.  Next, for each checksum, create a single
+# file in the pool using that checksum, scramble the underlying vdev, and
+# verify that we correctly catch the checksum errors.
+#
+# STRATEGY:
+# Test 1
+# 1. Create a mirrored pool
+# 2. Create a file using each checksum
+# 3. Export/import/scrub the pool
+# 4. Verify there's no checksum errors.
+# 5. Clear the pool
+#
+# Test 2
+# 6. For each checksum:
+# 7.   Create a file using the checksum
+# 8.   Export the pool
+# 9.   Scramble the data on one of the underlying VDEVs
+# 10.  Import the pool
+# 11.  Scrub the pool
+# 12.  Verify that there are checksum errors
+
+verify_runnable "both"
+
+function cleanup
+{
+       $ECHO cleanup
+       [[ -e $TESTDIR ]] && \
+               log_must $RM -rf $TESTDIR/* > /dev/null 2>&1
+}
+
+log_assert "Create and read back files with using different checksum algorithms"
+
+log_onexit cleanup
+
+FSSIZE=$($ZPOOL list -Hp -o size $TESTPOOL)
+WRITESZ=1048576
+WRITECNT=$((($FSSIZE) / $WRITESZ ))
+# Skip the first and last 4MB
+SKIP=4127518
+SKIPCNT=$((($SKIP / $WRITESZ )))
+SKIPCNT=$((($SKIPCNT * 2)))
+WRITECNT=$((($WRITECNT - $SKIPCNT)))
+
+# Get a list of vdevs in our pool
+set -A array $(get_disklist_fullpath)
+
+# Get the first vdev, since we will corrupt it later
+firstvdev=${array[0]}
+
+# First test each checksum by writing a file using it, and confirm there's no
+# errors.
+for ((count = 0; count < ${#checksum_props[*]} ; count++)); do
+       i=${checksum_props[$count]}
+       $ZFS set checksum=$i $TESTPOOL
+       $FILE_WRITE -o overwrite -f $TESTDIR/test_$i -b $WRITESZ -c 5 -d R
+done
+$ZPOOL export $TESTPOOL
+$ZPOOL import $TESTPOOL
+$ZPOOL scrub $TESTPOOL
+while is_pool_scrubbing $TESTPOOL; do
+       $SLEEP 1
+done
+$ZPOOL status -P -v $TESTPOOL | grep $firstvdev | read -r name state rd wr cksum
+log_assert "Normal file write test saw: $cksum errors"
+log_must [ $cksum -eq 0 ]
+
+rm -fr $TESTDIR/*
+
+log_assert "Test scrambling the disk and seeing checksum errors"
+for ((count = 0; count < ${#checksum_props[*]} ; count++)); do
+       i=${checksum_props[$count]}
+       $ZFS set checksum=$i $TESTPOOL
+       $FILE_WRITE -o overwrite -f $TESTDIR/test_$i -b $WRITESZ -c 5 -d R
+
+       $ZPOOL export $TESTPOOL
+
+       # Scramble the data on the first vdev in our pool.
+       # Skip the first and last 16MB of data, then scramble the rest after that
+       #
+       $FILE_WRITE -o overwrite -f $firstvdev -s $SKIP -c $WRITECNT -b $WRITESZ -d R
+
+       $ZPOOL import $TESTPOOL
+
+       i=${checksum_props[$count]}
+       $ZPOOL scrub $TESTPOOL
+       while is_pool_scrubbing $TESTPOOL; do
+                $SLEEP 1
+        done
+
+       $ZPOOL status -P -v $TESTPOOL | grep $firstvdev | read -r name state rd wr cksum
+
+       log_assert "Checksum '$i' caught $cksum checksum errors"
+       log_must [ $cksum -ne 0 ]
+
+       rm -f $TESTDIR/test_$i
+       $ZPOOL clear $TESTPOOL
+done
diff --git a/tests/zfs-tests/tests/functional/checksum/run_edonr_test.ksh b/tests/zfs-tests/tests/functional/checksum/run_edonr_test.ksh
new file mode 100755 (executable)
index 0000000..7bcb321
--- /dev/null
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# Run the tests for the EdonR hash algorithm.
+#
+
+log_assert "Run the tests for the EdonR hash algorithm."
+
+freq=$(get_cpu_freq)
+log_must $STF_SUITE/tests/functional/checksum/edonr_test $freq
+
+log_pass "EdonR tests passed."
diff --git a/tests/zfs-tests/tests/functional/checksum/run_sha2_test.ksh b/tests/zfs-tests/tests/functional/checksum/run_sha2_test.ksh
new file mode 100755 (executable)
index 0000000..589e28a
--- /dev/null
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# Run the tests for the SHA-2 hash algorithm.
+#
+
+log_assert "Run the tests for the SHA-2 hash algorithm."
+
+freq=$(get_cpu_freq)
+log_must $STF_SUITE/tests/functional/checksum/sha2_test $freq
+
+log_pass "SHA-2 tests passed."
diff --git a/tests/zfs-tests/tests/functional/checksum/run_skein_test.ksh b/tests/zfs-tests/tests/functional/checksum/run_skein_test.ksh
new file mode 100755 (executable)
index 0000000..4290bfc
--- /dev/null
@@ -0,0 +1,30 @@
+#!/bin/ksh -p
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2015 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# Description:
+# Run the tests for the Skein hash algorithm.
+#
+
+log_assert "Run the tests for the Skein hash algorithm."
+
+freq=$(get_cpu_freq)
+log_must $STF_SUITE/tests/functional/checksum/skein_test $freq
+
+log_pass "Skein tests passed."
diff --git a/tests/zfs-tests/tests/functional/checksum/setup.ksh b/tests/zfs-tests/tests/functional/checksum/setup.ksh
new file mode 100755 (executable)
index 0000000..27e125d
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+DISK=${DISKS%% *}
+default_mirror_setup $DISKS
diff --git a/tests/zfs-tests/tests/functional/checksum/sha2_test.c b/tests/zfs-tests/tests/functional/checksum/sha2_test.c
new file mode 100644 (file)
index 0000000..afd6f82
--- /dev/null
@@ -0,0 +1,265 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
+
+/*
+ * This is just to keep the compiler happy about sys/time.h not declaring
+ * gettimeofday due to -D_KERNEL (we can do this since we're actually
+ * running in userspace, but we need -D_KERNEL for the remaining SHA2 code).
+ */
+#ifdef _KERNEL
+#undef _KERNEL
+#endif
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/time.h>
+#define        _SHA2_IMPL
+#include <sys/sha2.h>
+#define NOTE(x)
+typedef enum boolean { B_FALSE, B_TRUE } boolean_t;
+typedef        unsigned long long      u_longlong_t;
+
+
+/*
+ * Test messages from:
+ * http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA_All.pdf
+ */
+
+const char     *test_msg0 = "abc";
+const char     *test_msg1 = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmn"
+       "lmnomnopnopq";
+const char     *test_msg2 = "abcdefghbcdefghicdefghijdefghijkefghijklfghi"
+       "jklmghijklmnhijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu";
+
+/*
+ * Test digests from:
+ * http://csrc.nist.gov/groups/ST/toolkit/documents/Examples/SHA_All.pdf
+ */
+const uint8_t  sha256_test_digests[][32] = {
+       {
+               /* for test_msg0 */
+               0xBA, 0x78, 0x16, 0xBF, 0x8F, 0x01, 0xCF, 0xEA,
+               0x41, 0x41, 0x40, 0xDE, 0x5D, 0xAE, 0x22, 0x23,
+               0xB0, 0x03, 0x61, 0xA3, 0x96, 0x17, 0x7A, 0x9C,
+               0xB4, 0x10, 0xFF, 0x61, 0xF2, 0x00, 0x15, 0xAD
+       },
+       {
+               /* for test_msg1 */
+               0x24, 0x8D, 0x6A, 0x61, 0xD2, 0x06, 0x38, 0xB8,
+               0xE5, 0xC0, 0x26, 0x93, 0x0C, 0x3E, 0x60, 0x39,
+               0xA3, 0x3C, 0xE4, 0x59, 0x64, 0xFF, 0x21, 0x67,
+               0xF6, 0xEC, 0xED, 0xD4, 0x19, 0xDB, 0x06, 0xC1
+       }
+       /* no test vector for test_msg2 */
+};
+
+const uint8_t  sha384_test_digests[][48] = {
+       {
+               /* for test_msg0 */
+               0xCB, 0x00, 0x75, 0x3F, 0x45, 0xA3, 0x5E, 0x8B,
+               0xB5, 0xA0, 0x3D, 0x69, 0x9A, 0xC6, 0x50, 0x07,
+               0x27, 0x2C, 0x32, 0xAB, 0x0E, 0xDE, 0xD1, 0x63,
+               0x1A, 0x8B, 0x60, 0x5A, 0x43, 0xFF, 0x5B, 0xED,
+               0x80, 0x86, 0x07, 0x2B, 0xA1, 0xE7, 0xCC, 0x23,
+               0x58, 0xBA, 0xEC, 0xA1, 0x34, 0xC8, 0x25, 0xA7
+       },
+       {
+               /* no test vector for test_msg1 */
+               0
+       },
+       {
+               /* for test_msg2 */
+               0x09, 0x33, 0x0C, 0x33, 0xF7, 0x11, 0x47, 0xE8,
+               0x3D, 0x19, 0x2F, 0xC7, 0x82, 0xCD, 0x1B, 0x47,
+               0x53, 0x11, 0x1B, 0x17, 0x3B, 0x3B, 0x05, 0xD2,
+               0x2F, 0xA0, 0x80, 0x86, 0xE3, 0xB0, 0xF7, 0x12,
+               0xFC, 0xC7, 0xC7, 0x1A, 0x55, 0x7E, 0x2D, 0xB9,
+               0x66, 0xC3, 0xE9, 0xFA, 0x91, 0x74, 0x60, 0x39
+       }
+};
+
+const uint8_t  sha512_test_digests[][64] = {
+       {
+               /* for test_msg0 */
+               0xDD, 0xAF, 0x35, 0xA1, 0x93, 0x61, 0x7A, 0xBA,
+               0xCC, 0x41, 0x73, 0x49, 0xAE, 0x20, 0x41, 0x31,
+               0x12, 0xE6, 0xFA, 0x4E, 0x89, 0xA9, 0x7E, 0xA2,
+               0x0A, 0x9E, 0xEE, 0xE6, 0x4B, 0x55, 0xD3, 0x9A,
+               0x21, 0x92, 0x99, 0x2A, 0x27, 0x4F, 0xC1, 0xA8,
+               0x36, 0xBA, 0x3C, 0x23, 0xA3, 0xFE, 0xEB, 0xBD,
+               0x45, 0x4D, 0x44, 0x23, 0x64, 0x3C, 0xE8, 0x0E,
+               0x2A, 0x9A, 0xC9, 0x4F, 0xA5, 0x4C, 0xA4, 0x9F
+       },
+       {
+               /* no test vector for test_msg1 */
+               0
+       },
+       {
+               /* for test_msg2 */
+               0x8E, 0x95, 0x9B, 0x75, 0xDA, 0xE3, 0x13, 0xDA,
+               0x8C, 0xF4, 0xF7, 0x28, 0x14, 0xFC, 0x14, 0x3F,
+               0x8F, 0x77, 0x79, 0xC6, 0xEB, 0x9F, 0x7F, 0xA1,
+               0x72, 0x99, 0xAE, 0xAD, 0xB6, 0x88, 0x90, 0x18,
+               0x50, 0x1D, 0x28, 0x9E, 0x49, 0x00, 0xF7, 0xE4,
+               0x33, 0x1B, 0x99, 0xDE, 0xC4, 0xB5, 0x43, 0x3A,
+               0xC7, 0xD3, 0x29, 0xEE, 0xB6, 0xDD, 0x26, 0x54,
+               0x5E, 0x96, 0xE5, 0x5B, 0x87, 0x4B, 0xE9, 0x09
+       }
+};
+
+const uint8_t  sha512_224_test_digests[][28] = {
+       {
+               /* for test_msg0 */
+               0x46, 0x34, 0x27, 0x0F, 0x70, 0x7B, 0x6A, 0x54,
+               0xDA, 0xAE, 0x75, 0x30, 0x46, 0x08, 0x42, 0xE2,
+               0x0E, 0x37, 0xED, 0x26, 0x5C, 0xEE, 0xE9, 0xA4,
+               0x3E, 0x89, 0x24, 0xAA
+       },
+       {
+               /* no test vector for test_msg1 */
+               0
+       },
+       {
+               /* for test_msg2 */
+               0x23, 0xFE, 0xC5, 0xBB, 0x94, 0xD6, 0x0B, 0x23,
+               0x30, 0x81, 0x92, 0x64, 0x0B, 0x0C, 0x45, 0x33,
+               0x35, 0xD6, 0x64, 0x73, 0x4F, 0xE4, 0x0E, 0x72,
+               0x68, 0x67, 0x4A, 0xF9
+       }
+};
+
+const uint8_t  sha512_256_test_digests[][32] = {
+       {
+               /* for test_msg0 */
+               0x53, 0x04, 0x8E, 0x26, 0x81, 0x94, 0x1E, 0xF9,
+               0x9B, 0x2E, 0x29, 0xB7, 0x6B, 0x4C, 0x7D, 0xAB,
+               0xE4, 0xC2, 0xD0, 0xC6, 0x34, 0xFC, 0x6D, 0x46,
+               0xE0, 0xE2, 0xF1, 0x31, 0x07, 0xE7, 0xAF, 0x23
+       },
+       {
+               /* no test vector for test_msg1 */
+               0
+       },
+       {
+               /* for test_msg2 */
+               0x39, 0x28, 0xE1, 0x84, 0xFB, 0x86, 0x90, 0xF8,
+               0x40, 0xDA, 0x39, 0x88, 0x12, 0x1D, 0x31, 0xBE,
+               0x65, 0xCB, 0x9D, 0x3E, 0xF8, 0x3E, 0xE6, 0x14,
+               0x6F, 0xEA, 0xC8, 0x61, 0xE1, 0x9B, 0x56, 0x3A
+       }
+};
+
+/*
+ * Local reimplementation of cmn_err, since it's used in sha2.c.
+ */
+/*ARGSUSED*/
+void
+cmn_err(int level, char *format, ...)
+{
+       va_list ap;
+       va_start(ap, format);
+       /* LINTED: E_SEC_PRINTF_VAR_FMT */
+       (void) vfprintf(stderr, format, ap);
+       va_end(ap);
+}
+
+int
+main(int argc, char *argv[])
+{
+       boolean_t       failed = B_FALSE;
+       uint64_t        cpu_mhz = 0;
+
+       if (argc == 2)
+               cpu_mhz = atoi(argv[1]);
+
+#define        SHA2_ALGO_TEST(_m, mode, diglen, testdigest)                    \
+       do {                                                            \
+               SHA2_CTX                ctx;                            \
+               uint8_t                 digest[diglen / 8];             \
+               SHA2Init(SHA ## mode ## _MECH_INFO_TYPE, &ctx);         \
+               SHA2Update(&ctx, _m, strlen(_m));                       \
+               SHA2Final(digest, &ctx);                                \
+               (void) printf("SHA%-9sMessage: " #_m                    \
+                   "\tResult: ", #mode);                               \
+               if (bcmp(digest, testdigest, diglen / 8) == 0) {        \
+                       (void) printf("OK\n");                          \
+               } else {                                                \
+                       (void) printf("FAILED!\n");                     \
+                       failed = B_TRUE;                                \
+               }                                                       \
+               NOTE(CONSTCOND)                                         \
+       } while (0)
+
+#define        SHA2_PERF_TEST(mode, diglen)                                    \
+       do {                                                            \
+               SHA2_CTX        ctx;                                    \
+               uint8_t         digest[diglen / 8];                     \
+               uint8_t         block[131072];                          \
+               uint64_t        delta;                                  \
+               double          cpb = 0;                                \
+               int             i;                                      \
+               struct timeval  start, end;                             \
+               bzero(block, sizeof (block));                           \
+               (void) gettimeofday(&start, NULL);                      \
+               SHA2Init(SHA ## mode ## _MECH_INFO_TYPE, &ctx);         \
+               for (i = 0; i < 8192; i++)                              \
+                       SHA2Update(&ctx, block, sizeof (block));        \
+               SHA2Final(digest, &ctx);                                \
+               (void) gettimeofday(&end, NULL);                        \
+               delta = (end.tv_sec * 1000000llu + end.tv_usec) -       \
+                   (start.tv_sec * 1000000llu + start.tv_usec);        \
+               if (cpu_mhz != 0) {                                     \
+                       cpb = (cpu_mhz * 1e6 * ((double)delta /         \
+                           1000000)) / (8192 * 128 * 1024);            \
+               }                                                       \
+               (void) printf("SHA%-9s%llu us (%.02f CPB)\n", #mode,    \
+                   (u_longlong_t)delta, cpb);                          \
+               NOTE(CONSTCOND)                                         \
+       } while (0)
+
+       (void) printf("Running algorithm correctness tests:\n");
+       SHA2_ALGO_TEST(test_msg0, 256, 256, sha256_test_digests[0]);
+       SHA2_ALGO_TEST(test_msg1, 256, 256, sha256_test_digests[1]);
+       SHA2_ALGO_TEST(test_msg0, 384, 384, sha384_test_digests[0]);
+       SHA2_ALGO_TEST(test_msg2, 384, 384, sha384_test_digests[2]);
+       SHA2_ALGO_TEST(test_msg0, 512, 512, sha512_test_digests[0]);
+       SHA2_ALGO_TEST(test_msg2, 512, 512, sha512_test_digests[2]);
+       SHA2_ALGO_TEST(test_msg0, 512_224, 224, sha512_224_test_digests[0]);
+       SHA2_ALGO_TEST(test_msg2, 512_224, 224, sha512_224_test_digests[2]);
+       SHA2_ALGO_TEST(test_msg0, 512_256, 256, sha512_256_test_digests[0]);
+       SHA2_ALGO_TEST(test_msg2, 512_256, 256, sha512_256_test_digests[2]);
+
+       if (failed)
+               return (1);
+
+       (void) printf("Running performance tests (hashing 1024 MiB of "
+           "data):\n");
+       SHA2_PERF_TEST(256, 256);
+       SHA2_PERF_TEST(512, 512);
+
+       return (0);
+}
diff --git a/tests/zfs-tests/tests/functional/checksum/skein_test.c b/tests/zfs-tests/tests/functional/checksum/skein_test.c
new file mode 100644 (file)
index 0000000..37548f0
--- /dev/null
@@ -0,0 +1,342 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
+
+/*
+ * This is just to keep the compiler happy about sys/time.h not declaring
+ * gettimeofday due to -D_KERNEL (we can do this since we're actually
+ * running in userspace, but we need -D_KERNEL for the remaining Skein code).
+ */
+#ifdef _KERNEL
+#undef _KERNEL
+#endif
+
+#include <sys/skein.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <stdio.h>
+#include <sys/time.h>
+#define NOTE(x)
+
+typedef        enum boolean { B_FALSE, B_TRUE } boolean_t;
+typedef        unsigned long long      u_longlong_t;
+
+/*
+ * Skein test suite using values from the Skein V1.3 specification found at:
+ * http://www.skein-hash.info/sites/default/files/skein1.3.pdf
+ */
+
+/*
+ * Test messages from the Skein spec, Appendix C.
+ */
+const uint8_t  test_msg0[] = {
+       0xFF
+};
+
+const uint8_t  test_msg1[] = {
+       0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA, 0xF9, 0xF8,
+       0xF7, 0xF6, 0xF5, 0xF4, 0xF3, 0xF2, 0xF1, 0xF0,
+       0xEF, 0xEE, 0xED, 0xEC, 0xEB, 0xEA, 0xE9, 0xE8,
+       0xE7, 0xE6, 0xE5, 0xE4, 0xE3, 0xE2, 0xE1, 0xE0
+};
+
+const uint8_t  test_msg2[] = {
+       0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA, 0xF9, 0xF8,
+       0xF7, 0xF6, 0xF5, 0xF4, 0xF3, 0xF2, 0xF1, 0xF0,
+       0xEF, 0xEE, 0xED, 0xEC, 0xEB, 0xEA, 0xE9, 0xE8,
+       0xE7, 0xE6, 0xE5, 0xE4, 0xE3, 0xE2, 0xE1, 0xE0,
+       0xDF, 0xDE, 0xDD, 0xDC, 0xDB, 0xDA, 0xD9, 0xD8,
+       0xD7, 0xD6, 0xD5, 0xD4, 0xD3, 0xD2, 0xD1, 0xD0,
+       0xCF, 0xCE, 0xCD, 0xCC, 0xCB, 0xCA, 0xC9, 0xC8,
+       0xC7, 0xC6, 0xC5, 0xC4, 0xC3, 0xC2, 0xC1, 0xC0
+};
+
+const uint8_t  test_msg3[] = {
+       0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA, 0xF9, 0xF8,
+       0xF7, 0xF6, 0xF5, 0xF4, 0xF3, 0xF2, 0xF1, 0xF0,
+       0xEF, 0xEE, 0xED, 0xEC, 0xEB, 0xEA, 0xE9, 0xE8,
+       0xE7, 0xE6, 0xE5, 0xE4, 0xE3, 0xE2, 0xE1, 0xE0,
+       0xDF, 0xDE, 0xDD, 0xDC, 0xDB, 0xDA, 0xD9, 0xD8,
+       0xD7, 0xD6, 0xD5, 0xD4, 0xD3, 0xD2, 0xD1, 0xD0,
+       0xCF, 0xCE, 0xCD, 0xCC, 0xCB, 0xCA, 0xC9, 0xC8,
+       0xC7, 0xC6, 0xC5, 0xC4, 0xC3, 0xC2, 0xC1, 0xC0,
+       0xBF, 0xBE, 0xBD, 0xBC, 0xBB, 0xBA, 0xB9, 0xB8,
+       0xB7, 0xB6, 0xB5, 0xB4, 0xB3, 0xB2, 0xB1, 0xB0,
+       0xAF, 0xAE, 0xAD, 0xAC, 0xAB, 0xAA, 0xA9, 0xA8,
+       0xA7, 0xA6, 0xA5, 0xA4, 0xA3, 0xA2, 0xA1, 0xA0,
+       0x9F, 0x9E, 0x9D, 0x9C, 0x9B, 0x9A, 0x99, 0x98,
+       0x97, 0x96, 0x95, 0x94, 0x93, 0x92, 0x91, 0x90,
+       0x8F, 0x8E, 0x8D, 0x8C, 0x8B, 0x8A, 0x89, 0x88,
+       0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80
+};
+
+const uint8_t  test_msg4[] = {
+       0xFF, 0xFE, 0xFD, 0xFC, 0xFB, 0xFA, 0xF9, 0xF8,
+       0xF7, 0xF6, 0xF5, 0xF4, 0xF3, 0xF2, 0xF1, 0xF0,
+       0xEF, 0xEE, 0xED, 0xEC, 0xEB, 0xEA, 0xE9, 0xE8,
+       0xE7, 0xE6, 0xE5, 0xE4, 0xE3, 0xE2, 0xE1, 0xE0,
+       0xDF, 0xDE, 0xDD, 0xDC, 0xDB, 0xDA, 0xD9, 0xD8,
+       0xD7, 0xD6, 0xD5, 0xD4, 0xD3, 0xD2, 0xD1, 0xD0,
+       0xCF, 0xCE, 0xCD, 0xCC, 0xCB, 0xCA, 0xC9, 0xC8,
+       0xC7, 0xC6, 0xC5, 0xC4, 0xC3, 0xC2, 0xC1, 0xC0,
+       0xBF, 0xBE, 0xBD, 0xBC, 0xBB, 0xBA, 0xB9, 0xB8,
+       0xB7, 0xB6, 0xB5, 0xB4, 0xB3, 0xB2, 0xB1, 0xB0,
+       0xAF, 0xAE, 0xAD, 0xAC, 0xAB, 0xAA, 0xA9, 0xA8,
+       0xA7, 0xA6, 0xA5, 0xA4, 0xA3, 0xA2, 0xA1, 0xA0,
+       0x9F, 0x9E, 0x9D, 0x9C, 0x9B, 0x9A, 0x99, 0x98,
+       0x97, 0x96, 0x95, 0x94, 0x93, 0x92, 0x91, 0x90,
+       0x8F, 0x8E, 0x8D, 0x8C, 0x8B, 0x8A, 0x89, 0x88,
+       0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80,
+       0x7F, 0x7E, 0x7D, 0x7C, 0x7B, 0x7A, 0x79, 0x78,
+       0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70,
+       0x6F, 0x6E, 0x6D, 0x6C, 0x6B, 0x6A, 0x69, 0x68,
+       0x67, 0x66, 0x65, 0x64, 0x63, 0x62, 0x61, 0x60,
+       0x5F, 0x5E, 0x5D, 0x5C, 0x5B, 0x5A, 0x59, 0x58,
+       0x57, 0x56, 0x55, 0x54, 0x53, 0x52, 0x51, 0x50,
+       0x4F, 0x4E, 0x4D, 0x4C, 0x4B, 0x4A, 0x49, 0x48,
+       0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41, 0x40,
+       0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38,
+       0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30,
+       0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28,
+       0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20,
+       0x1F, 0x1E, 0x1D, 0x1C, 0x1B, 0x1A, 0x19, 0x18,
+       0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
+       0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08,
+       0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
+};
+
+/*
+ * Test digests from the Skein spec, Appendix C.
+ */
+const uint8_t  skein_256_test_digests[][32] = {
+       {
+               /* for test_msg0 */
+               0x0B, 0x98, 0xDC, 0xD1, 0x98, 0xEA, 0x0E, 0x50,
+               0xA7, 0xA2, 0x44, 0xC4, 0x44, 0xE2, 0x5C, 0x23,
+               0xDA, 0x30, 0xC1, 0x0F, 0xC9, 0xA1, 0xF2, 0x70,
+               0xA6, 0x63, 0x7F, 0x1F, 0x34, 0xE6, 0x7E, 0xD2
+       },
+       {
+               /* for test_msg1 */
+               0x8D, 0x0F, 0xA4, 0xEF, 0x77, 0x7F, 0xD7, 0x59,
+               0xDF, 0xD4, 0x04, 0x4E, 0x6F, 0x6A, 0x5A, 0xC3,
+               0xC7, 0x74, 0xAE, 0xC9, 0x43, 0xDC, 0xFC, 0x07,
+               0x92, 0x7B, 0x72, 0x3B, 0x5D, 0xBF, 0x40, 0x8B
+       },
+       {
+               /* for test_msg2 */
+               0xDF, 0x28, 0xE9, 0x16, 0x63, 0x0D, 0x0B, 0x44,
+               0xC4, 0xA8, 0x49, 0xDC, 0x9A, 0x02, 0xF0, 0x7A,
+               0x07, 0xCB, 0x30, 0xF7, 0x32, 0x31, 0x82, 0x56,
+               0xB1, 0x5D, 0x86, 0x5A, 0xC4, 0xAE, 0x16, 0x2F
+       }
+       /* no test digests for test_msg3 and test_msg4 */
+};
+
+const uint8_t  skein_512_test_digests[][64] = {
+       {
+               /* for test_msg0 */
+               0x71, 0xB7, 0xBC, 0xE6, 0xFE, 0x64, 0x52, 0x22,
+               0x7B, 0x9C, 0xED, 0x60, 0x14, 0x24, 0x9E, 0x5B,
+               0xF9, 0xA9, 0x75, 0x4C, 0x3A, 0xD6, 0x18, 0xCC,
+               0xC4, 0xE0, 0xAA, 0xE1, 0x6B, 0x31, 0x6C, 0xC8,
+               0xCA, 0x69, 0x8D, 0x86, 0x43, 0x07, 0xED, 0x3E,
+               0x80, 0xB6, 0xEF, 0x15, 0x70, 0x81, 0x2A, 0xC5,
+               0x27, 0x2D, 0xC4, 0x09, 0xB5, 0xA0, 0x12, 0xDF,
+               0x2A, 0x57, 0x91, 0x02, 0xF3, 0x40, 0x61, 0x7A
+       },
+       {
+               /* no test vector for test_msg1 */
+               0,
+       },
+       {
+               /* for test_msg2 */
+               0x45, 0x86, 0x3B, 0xA3, 0xBE, 0x0C, 0x4D, 0xFC,
+               0x27, 0xE7, 0x5D, 0x35, 0x84, 0x96, 0xF4, 0xAC,
+               0x9A, 0x73, 0x6A, 0x50, 0x5D, 0x93, 0x13, 0xB4,
+               0x2B, 0x2F, 0x5E, 0xAD, 0xA7, 0x9F, 0xC1, 0x7F,
+               0x63, 0x86, 0x1E, 0x94, 0x7A, 0xFB, 0x1D, 0x05,
+               0x6A, 0xA1, 0x99, 0x57, 0x5A, 0xD3, 0xF8, 0xC9,
+               0xA3, 0xCC, 0x17, 0x80, 0xB5, 0xE5, 0xFA, 0x4C,
+               0xAE, 0x05, 0x0E, 0x98, 0x98, 0x76, 0x62, 0x5B
+       },
+       {
+               /* for test_msg3 */
+               0x91, 0xCC, 0xA5, 0x10, 0xC2, 0x63, 0xC4, 0xDD,
+               0xD0, 0x10, 0x53, 0x0A, 0x33, 0x07, 0x33, 0x09,
+               0x62, 0x86, 0x31, 0xF3, 0x08, 0x74, 0x7E, 0x1B,
+               0xCB, 0xAA, 0x90, 0xE4, 0x51, 0xCA, 0xB9, 0x2E,
+               0x51, 0x88, 0x08, 0x7A, 0xF4, 0x18, 0x87, 0x73,
+               0xA3, 0x32, 0x30, 0x3E, 0x66, 0x67, 0xA7, 0xA2,
+               0x10, 0x85, 0x6F, 0x74, 0x21, 0x39, 0x00, 0x00,
+               0x71, 0xF4, 0x8E, 0x8B, 0xA2, 0xA5, 0xAD, 0xB7
+       }
+       /* no test digests for test_msg4 */
+};
+
+const uint8_t  skein_1024_test_digests[][128] = {
+       {
+               /* for test_msg0 */
+               0xE6, 0x2C, 0x05, 0x80, 0x2E, 0xA0, 0x15, 0x24,
+               0x07, 0xCD, 0xD8, 0x78, 0x7F, 0xDA, 0x9E, 0x35,
+               0x70, 0x3D, 0xE8, 0x62, 0xA4, 0xFB, 0xC1, 0x19,
+               0xCF, 0xF8, 0x59, 0x0A, 0xFE, 0x79, 0x25, 0x0B,
+               0xCC, 0xC8, 0xB3, 0xFA, 0xF1, 0xBD, 0x24, 0x22,
+               0xAB, 0x5C, 0x0D, 0x26, 0x3F, 0xB2, 0xF8, 0xAF,
+               0xB3, 0xF7, 0x96, 0xF0, 0x48, 0x00, 0x03, 0x81,
+               0x53, 0x1B, 0x6F, 0x00, 0xD8, 0x51, 0x61, 0xBC,
+               0x0F, 0xFF, 0x4B, 0xEF, 0x24, 0x86, 0xB1, 0xEB,
+               0xCD, 0x37, 0x73, 0xFA, 0xBF, 0x50, 0xAD, 0x4A,
+               0xD5, 0x63, 0x9A, 0xF9, 0x04, 0x0E, 0x3F, 0x29,
+               0xC6, 0xC9, 0x31, 0x30, 0x1B, 0xF7, 0x98, 0x32,
+               0xE9, 0xDA, 0x09, 0x85, 0x7E, 0x83, 0x1E, 0x82,
+               0xEF, 0x8B, 0x46, 0x91, 0xC2, 0x35, 0x65, 0x65,
+               0x15, 0xD4, 0x37, 0xD2, 0xBD, 0xA3, 0x3B, 0xCE,
+               0xC0, 0x01, 0xC6, 0x7F, 0xFD, 0xE1, 0x5B, 0xA8
+       },
+       {
+               /* no test vector for test_msg1 */
+               0
+       },
+       {
+               /* no test vector for test_msg2 */
+               0
+       },
+       {
+               /* for test_msg3 */
+               0x1F, 0x3E, 0x02, 0xC4, 0x6F, 0xB8, 0x0A, 0x3F,
+               0xCD, 0x2D, 0xFB, 0xBC, 0x7C, 0x17, 0x38, 0x00,
+               0xB4, 0x0C, 0x60, 0xC2, 0x35, 0x4A, 0xF5, 0x51,
+               0x18, 0x9E, 0xBF, 0x43, 0x3C, 0x3D, 0x85, 0xF9,
+               0xFF, 0x18, 0x03, 0xE6, 0xD9, 0x20, 0x49, 0x31,
+               0x79, 0xED, 0x7A, 0xE7, 0xFC, 0xE6, 0x9C, 0x35,
+               0x81, 0xA5, 0xA2, 0xF8, 0x2D, 0x3E, 0x0C, 0x7A,
+               0x29, 0x55, 0x74, 0xD0, 0xCD, 0x7D, 0x21, 0x7C,
+               0x48, 0x4D, 0x2F, 0x63, 0x13, 0xD5, 0x9A, 0x77,
+               0x18, 0xEA, 0xD0, 0x7D, 0x07, 0x29, 0xC2, 0x48,
+               0x51, 0xD7, 0xE7, 0xD2, 0x49, 0x1B, 0x90, 0x2D,
+               0x48, 0x91, 0x94, 0xE6, 0xB7, 0xD3, 0x69, 0xDB,
+               0x0A, 0xB7, 0xAA, 0x10, 0x6F, 0x0E, 0xE0, 0xA3,
+               0x9A, 0x42, 0xEF, 0xC5, 0x4F, 0x18, 0xD9, 0x37,
+               0x76, 0x08, 0x09, 0x85, 0xF9, 0x07, 0x57, 0x4F,
+               0x99, 0x5E, 0xC6, 0xA3, 0x71, 0x53, 0xA5, 0x78
+       },
+       {
+               /* for test_msg4 */
+               0x84, 0x2A, 0x53, 0xC9, 0x9C, 0x12, 0xB0, 0xCF,
+               0x80, 0xCF, 0x69, 0x49, 0x1B, 0xE5, 0xE2, 0xF7,
+               0x51, 0x5D, 0xE8, 0x73, 0x3B, 0x6E, 0xA9, 0x42,
+               0x2D, 0xFD, 0x67, 0x66, 0x65, 0xB5, 0xFA, 0x42,
+               0xFF, 0xB3, 0xA9, 0xC4, 0x8C, 0x21, 0x77, 0x77,
+               0x95, 0x08, 0x48, 0xCE, 0xCD, 0xB4, 0x8F, 0x64,
+               0x0F, 0x81, 0xFB, 0x92, 0xBE, 0xF6, 0xF8, 0x8F,
+               0x7A, 0x85, 0xC1, 0xF7, 0xCD, 0x14, 0x46, 0xC9,
+               0x16, 0x1C, 0x0A, 0xFE, 0x8F, 0x25, 0xAE, 0x44,
+               0x4F, 0x40, 0xD3, 0x68, 0x00, 0x81, 0xC3, 0x5A,
+               0xA4, 0x3F, 0x64, 0x0F, 0xD5, 0xFA, 0x3C, 0x3C,
+               0x03, 0x0B, 0xCC, 0x06, 0xAB, 0xAC, 0x01, 0xD0,
+               0x98, 0xBC, 0xC9, 0x84, 0xEB, 0xD8, 0x32, 0x27,
+               0x12, 0x92, 0x1E, 0x00, 0xB1, 0xBA, 0x07, 0xD6,
+               0xD0, 0x1F, 0x26, 0x90, 0x70, 0x50, 0x25, 0x5E,
+               0xF2, 0xC8, 0xE2, 0x4F, 0x71, 0x6C, 0x52, 0xA5
+       }
+};
+
+int
+main(int argc, char *argv[])
+{
+       boolean_t       failed = B_FALSE;
+       uint64_t        cpu_mhz = 0;
+
+       if (argc == 2)
+               cpu_mhz = atoi(argv[1]);
+
+#define        SKEIN_ALGO_TEST(_m, mode, diglen, testdigest)                   \
+       do {                                                            \
+               Skein ## mode ## _Ctxt_t        ctx;                    \
+               uint8_t                         digest[diglen / 8];     \
+               (void) Skein ## mode ## _Init(&ctx, diglen);            \
+               (void) Skein ## mode ## _Update(&ctx, _m, sizeof (_m)); \
+               (void) Skein ## mode ## _Final(&ctx, digest);           \
+               (void) printf("Skein" #mode "/" #diglen                 \
+                   "\tMessage: " #_m "\tResult: ");                    \
+               if (bcmp(digest, testdigest, diglen / 8) == 0) {        \
+                       (void) printf("OK\n");                          \
+               } else {                                                \
+                       (void) printf("FAILED!\n");                     \
+                       failed = B_TRUE;                                \
+               }                                                       \
+               NOTE(CONSTCOND)                                         \
+       } while (0)
+
+#define        SKEIN_PERF_TEST(mode, diglen)                                   \
+       do {                                                            \
+               Skein ## mode ## _Ctxt_t ctx;                           \
+               uint8_t         digest[diglen / 8];                     \
+               uint8_t         block[131072];                          \
+               uint64_t        delta;                                  \
+               double          cpb = 0;                                \
+               int             i;                                      \
+               struct timeval  start, end;                             \
+               bzero(block, sizeof (block));                           \
+               (void) gettimeofday(&start, NULL);                      \
+               (void) Skein ## mode ## _Init(&ctx, diglen);            \
+               for (i = 0; i < 8192; i++) {                            \
+                       (void) Skein ## mode ## _Update(&ctx, block,    \
+                           sizeof (block));                            \
+               }                                                       \
+               (void) Skein ## mode ## _Final(&ctx, digest);           \
+               (void) gettimeofday(&end, NULL);                        \
+               delta = (end.tv_sec * 1000000llu + end.tv_usec) -       \
+                   (start.tv_sec * 1000000llu + start.tv_usec);        \
+               if (cpu_mhz != 0) {                                     \
+                       cpb = (cpu_mhz * 1e6 * ((double)delta /         \
+                           1000000)) / (8192 * 128 * 1024);            \
+               }                                                       \
+               (void) printf("Skein" #mode "/" #diglen "\t%llu us "    \
+                   "(%.02f CPB)\n", (u_longlong_t)delta, cpb);         \
+               NOTE(CONSTCOND)                                         \
+       } while (0)
+
+       (void) printf("Running algorithm correctness tests:\n");
+       SKEIN_ALGO_TEST(test_msg0, _256, 256, skein_256_test_digests[0]);
+       SKEIN_ALGO_TEST(test_msg1, _256, 256, skein_256_test_digests[1]);
+       SKEIN_ALGO_TEST(test_msg2, _256, 256, skein_256_test_digests[2]);
+       SKEIN_ALGO_TEST(test_msg0, _512, 512, skein_512_test_digests[0]);
+       SKEIN_ALGO_TEST(test_msg2, _512, 512, skein_512_test_digests[2]);
+       SKEIN_ALGO_TEST(test_msg3, _512, 512, skein_512_test_digests[3]);
+       SKEIN_ALGO_TEST(test_msg0, 1024, 1024, skein_1024_test_digests[0]);
+       SKEIN_ALGO_TEST(test_msg3, 1024, 1024, skein_1024_test_digests[3]);
+       SKEIN_ALGO_TEST(test_msg4, 1024, 1024, skein_1024_test_digests[4]);
+       if (failed)
+               return (1);
+
+       (void) printf("Running performance tests (hashing 1024 MiB of "
+           "data):\n");
+       SKEIN_PERF_TEST(_256, 256);
+       SKEIN_PERF_TEST(_512, 512);
+       SKEIN_PERF_TEST(1024, 1024);
+
+       return (0);
+}
index edc7a3fb90a996de1e450381c2b594039d88a22f..27003b21b556ecc4459c4c5ad1f04346f0ab6803 100755 (executable)
@@ -46,7 +46,7 @@
 verify_runnable "both"
 
 set -A dataset "$TESTPOOL" "$TESTPOOL/$TESTFS" "$TESTPOOL/$TESTVOL"
-set -A values "on" "off" "fletcher2" "fletcher4" "sha256"
+set -A values "on" "off" "fletcher2" "fletcher4" "sha256" "sha512" "skein" "edonr" "noparity"
 
 log_assert "Setting a valid checksum on a file system, volume," \
        "it should be successful."
index f7a1d9cb13533369528bbc3c11fb1936d633c12a..3807d0af63d820b982587595fcb74a531f4366ed 100644 (file)
@@ -37,7 +37,8 @@ typeset -a properties=("size" "capacity" "altroot" "health" "guid" "version"
     "feature@async_destroy" "feature@empty_bpobj" "feature@lz4_compress"
     "feature@large_blocks" "feature@large_dnode" "feature@filesystem_limits"
     "feature@spacemap_histogram" "feature@enabled_txg" "feature@hole_birth"
-    "feature@extensible_dataset" "feature@bookmarks" "feature@embedded_data")
+    "feature@extensible_dataset" "feature@bookmarks" "feature@embedded_data"
+    "feature@sha512" "feature@skein" "feature@edonr")
 else
 typeset -a properties=("size" "capacity" "altroot" "health" "guid" "version"
     "bootfs" ""leaked" delegation" "autoreplace" "cachefile" "dedupditto" "dedupratio"
@@ -45,5 +46,6 @@ typeset -a properties=("size" "capacity" "altroot" "health" "guid" "version"
     "listsnapshots" "autoexpand" "feature@async_destroy" "feature@empty_bpobj"
     "feature@lz4_compress" "feature@multi_vdev_crash_dump"
     "feature@spacemap_histogram" "feature@enabled_txg" "feature@hole_birth"
-    "feature@extensible_dataset" "feature@bookmarks")
+    "feature@extensible_dataset" "feature@bookmarks" "feature@sha512"
+    "feature@skein" "feature@edonr")
 fi