]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Update BLAKE3 for using the new impl handling
authorTino Reichardt <milky-zfs@mcmilk.de>
Mon, 27 Feb 2023 15:14:37 +0000 (16:14 +0100)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 2 Mar 2023 21:52:27 +0000 (13:52 -0800)
This commit changes the BLAKE3 implementation handling and
also the calls to it from the ztest command.

Tested-by: Rich Ercolani <rincebrain@gmail.com>
Tested-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes #13741

cmd/ztest.c
include/sys/blake3.h
module/icp/algs/blake3/blake3.c
module/icp/algs/blake3/blake3_generic.c
module/icp/algs/blake3/blake3_impl.c
module/icp/algs/blake3/blake3_impl.h
module/icp/algs/blake3/blake3_x86-64.c [deleted file]
tests/zfs-tests/cmd/checksum/blake3_test.c

index 9dce486ee08c48bc13e05146623bc203bde348eb..790835363fef8bd0dd4d8f47e0e78dd0421d2dd5 100644 (file)
 #include <libnvpair.h>
 #include <libzutil.h>
 #include <sys/crypto/icp.h>
+#include <sys/zfs_impl.h>
 #if (__GLIBC__ && !__UCLIBC__)
 #include <execinfo.h> /* for backtrace() */
 #endif
@@ -6410,6 +6411,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
        int i, *ptr;
        uint32_t size;
        BLAKE3_CTX ctx;
+       const zfs_impl_t *blake3 = zfs_impl_get_ops("blake3");
 
        size = ztest_random_blocksize();
        buf = umem_alloc(size, UMEM_NOFAIL);
@@ -6434,7 +6436,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
                void *res2 = &zc_res2;
 
                /* BLAKE3_KEY_LEN = 32 */
-               VERIFY0(blake3_impl_setname("generic"));
+               VERIFY0(blake3->setname("generic"));
                templ = abd_checksum_blake3_tmpl_init(&salt);
                Blake3_InitKeyed(&ctx, salt_ptr);
                Blake3_Update(&ctx, buf, size);
@@ -6443,7 +6445,7 @@ ztest_blake3(ztest_ds_t *zd, uint64_t id)
                ZIO_CHECKSUM_BSWAP(&zc_ref2);
                abd_checksum_blake3_tmpl_free(templ);
 
-               VERIFY0(blake3_impl_setname("cycle"));
+               VERIFY0(blake3->setname("cycle"));
                while (run_count-- > 0) {
 
                        /* Test current implementation */
index ad65fc8db7b995b870d8b07999456a98139c794b..b981b18db94321aae43723f6c899fc79cf6bb509 100644 (file)
 /*
  * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
  * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
- * Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
  */
 
-#ifndef BLAKE3_H
-#define        BLAKE3_H
+#ifndef        _SYS_BLAKE3_H
+#define        _SYS_BLAKE3_H
 
 #ifdef  _KERNEL
 #include <sys/types.h>
@@ -97,26 +97,8 @@ extern void **blake3_per_cpu_ctx;
 extern void blake3_per_cpu_ctx_init(void);
 extern void blake3_per_cpu_ctx_fini(void);
 
-/* get count of supported implementations */
-extern uint32_t blake3_impl_getcnt(void);
-
-/* get id of selected implementation */
-extern uint32_t blake3_impl_getid(void);
-
-/* get name of selected implementation */
-extern const char *blake3_impl_getname(void);
-
-/* setup id as fastest implementation */
-extern void blake3_impl_set_fastest(uint32_t id);
-
-/* set implementation by id */
-extern void blake3_impl_setid(uint32_t id);
-
-/* set implementation by name */
-extern int blake3_impl_setname(const char *name);
-
 #ifdef __cplusplus
 }
 #endif
 
-#endif /* BLAKE3_H */
+#endif /* _SYS_BLAKE3_H */
index 8e441f454a725bc1cf829e69e76bbe2c30230618..4f93e4ff205168d4488175f572128bd880bb78c5 100644 (file)
@@ -432,7 +432,7 @@ static void hasher_init_base(BLAKE3_CTX *ctx, const uint32_t key[8],
        memcpy(ctx->key, key, BLAKE3_KEY_LEN);
        chunk_state_init(&ctx->chunk, key, flags);
        ctx->cv_stack_len = 0;
-       ctx->ops = blake3_impl_get_ops();
+       ctx->ops = blake3_get_ops();
 }
 
 /*
index 94a1f108236e70b8619f779ef0531f9b90c53c8e..ca7197a26f39a2cc9bf2ff0748a01eb54f027c58 100644 (file)
@@ -187,7 +187,8 @@ static inline void blake3_hash_many_generic(const uint8_t * const *inputs,
        }
 }
 
-static inline boolean_t blake3_is_generic_supported(void)
+/* the generic implementation is always okay */
+static boolean_t blake3_is_supported(void)
 {
        return (B_TRUE);
 }
@@ -196,7 +197,7 @@ const blake3_ops_t blake3_generic_impl = {
        .compress_in_place = blake3_compress_in_place_generic,
        .compress_xof = blake3_compress_xof_generic,
        .hash_many = blake3_hash_many_generic,
-       .is_supported = blake3_is_generic_supported,
+       .is_supported = blake3_is_supported,
        .degree = 4,
        .name = "generic"
 };
index 7bc4db2c9806a69b5c1f080e9c9299c0ceba2df7..f68a5edfeaa4c1d29f7f381fcc3677ecdae57e79 100644 (file)
  */
 
 #include <sys/zfs_context.h>
-#include <sys/zio_checksum.h>
+#include <sys/zfs_impl.h>
+#include <sys/blake3.h>
+#include <sys/simd.h>
 
 #include "blake3_impl.h"
 
-static const blake3_ops_t *const blake3_impls[] = {
-       &blake3_generic_impl,
 #if defined(__aarch64__) || \
        (defined(__x86_64) && defined(HAVE_SSE2)) || \
        (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-       &blake3_sse2_impl,
-#endif
-#if defined(__aarch64__) || \
-       (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
-       (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-       &blake3_sse41_impl,
-#endif
-#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
-       &blake3_avx2_impl,
-#endif
-#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
-       &blake3_avx512_impl,
-#endif
-};
-
-/* Select BLAKE3 implementation */
-#define        IMPL_FASTEST    (UINT32_MAX)
-#define        IMPL_CYCLE      (UINT32_MAX - 1)
-
-#define        IMPL_READ(i)    (*(volatile uint32_t *) &(i))
-
-/* Indicate that benchmark has been done */
-static boolean_t blake3_initialized = B_FALSE;
-
-/* Implementation that contains the fastest methods */
-static blake3_ops_t blake3_fastest_impl = {
-       .name = "fastest"
-};
 
-/* Hold all supported implementations */
-static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)];
-static uint32_t blake3_supp_impls_cnt = 0;
+extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_sse2(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags) {
+       kfpu_begin();
+       zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
+           flags);
+       kfpu_end();
+}
 
-/* Currently selected implementation */
-static uint32_t blake3_impl_chosen = IMPL_FASTEST;
+static void blake3_compress_xof_sse2(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]) {
+       kfpu_begin();
+       zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
+           out);
+       kfpu_end();
+}
 
-static struct blake3_impl_selector {
-       const char *name;
-       uint32_t sel;
-} blake3_impl_selectors[] = {
-       { "cycle",      IMPL_CYCLE },
-       { "fastest",    IMPL_FASTEST }
-};
+static void blake3_hash_many_sse2(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+       kfpu_begin();
+       zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
+           increment_counter, flags, flags_start, flags_end, out);
+       kfpu_end();
+}
 
-/* check the supported implementations */
-static void blake3_impl_init(void)
+static boolean_t blake3_is_sse2_supported(void)
 {
-       int i, c;
-
-       /* init only once */
-       if (likely(blake3_initialized))
-               return;
+#if defined(__x86_64)
+       return (kfpu_allowed() && zfs_sse2_available());
+#elif defined(__PPC64__)
+       return (kfpu_allowed() && zfs_vsx_available());
+#else
+       return (kfpu_allowed());
+#endif
+}
 
-       /* move supported implementations into blake3_supp_impls */
-       for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) {
-               const blake3_ops_t *impl = blake3_impls[i];
+const blake3_ops_t blake3_sse2_impl = {
+       .compress_in_place = blake3_compress_in_place_sse2,
+       .compress_xof = blake3_compress_xof_sse2,
+       .hash_many = blake3_hash_many_sse2,
+       .is_supported = blake3_is_sse2_supported,
+       .degree = 4,
+       .name = "sse2"
+};
+#endif
 
-               if (impl->is_supported && impl->is_supported())
-                       blake3_supp_impls[c++] = impl;
-       }
-       blake3_supp_impls_cnt = c;
+#if defined(__aarch64__) || \
+       (defined(__x86_64) && defined(HAVE_SSE2)) || \
+       (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
 
-       /* first init generic impl, may be changed via set_fastest() */
-       memcpy(&blake3_fastest_impl, blake3_impls[0],
-           sizeof (blake3_fastest_impl));
-       blake3_initialized = B_TRUE;
+extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_sse41(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags) {
+       kfpu_begin();
+       zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
+           flags);
+       kfpu_end();
 }
 
-/* get number of supported implementations */
-uint32_t
-blake3_impl_getcnt(void)
-{
-       blake3_impl_init();
-       return (blake3_supp_impls_cnt);
+static void blake3_compress_xof_sse41(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]) {
+       kfpu_begin();
+       zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
+           out);
+       kfpu_end();
 }
 
-/* get id of selected implementation */
-uint32_t
-blake3_impl_getid(void)
-{
-       return (IMPL_READ(blake3_impl_chosen));
+static void blake3_hash_many_sse41(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+       kfpu_begin();
+       zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
+           increment_counter, flags, flags_start, flags_end, out);
+       kfpu_end();
 }
 
-/* get name of selected implementation */
-const char *
-blake3_impl_getname(void)
+static boolean_t blake3_is_sse41_supported(void)
 {
-       uint32_t impl = IMPL_READ(blake3_impl_chosen);
-
-       blake3_impl_init();
-       switch (impl) {
-       case IMPL_FASTEST:
-               return ("fastest");
-       case IMPL_CYCLE:
-               return ("cycle");
-       default:
-               return (blake3_supp_impls[impl]->name);
-       }
+#if defined(__x86_64)
+       return (kfpu_allowed() && zfs_sse4_1_available());
+#elif defined(__PPC64__)
+       return (kfpu_allowed() && zfs_vsx_available());
+#else
+       return (kfpu_allowed());
+#endif
 }
 
-/* setup id as fastest implementation */
-void
-blake3_impl_set_fastest(uint32_t id)
-{
-       /* setup fastest impl */
-       memcpy(&blake3_fastest_impl, blake3_supp_impls[id],
-           sizeof (blake3_fastest_impl));
+const blake3_ops_t blake3_sse41_impl = {
+       .compress_in_place = blake3_compress_in_place_sse41,
+       .compress_xof = blake3_compress_xof_sse41,
+       .hash_many = blake3_hash_many_sse41,
+       .is_supported = blake3_is_sse41_supported,
+       .degree = 4,
+       .name = "sse41"
+};
+#endif
+
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_hash_many_avx2(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+       kfpu_begin();
+       zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
+           increment_counter, flags, flags_start, flags_end, out);
+       kfpu_end();
 }
 
-/* set implementation by id */
-void
-blake3_impl_setid(uint32_t id)
+static boolean_t blake3_is_avx2_supported(void)
 {
-       blake3_impl_init();
-       switch (id) {
-       case IMPL_FASTEST:
-               atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST);
-               break;
-       case IMPL_CYCLE:
-               atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE);
-               break;
-       default:
-               ASSERT3U(id, <, blake3_supp_impls_cnt);
-               atomic_swap_32(&blake3_impl_chosen, id);
-               break;
-       }
+       return (kfpu_allowed() && zfs_sse4_1_available() &&
+           zfs_avx2_available());
 }
 
-/* set implementation by name */
-int
-blake3_impl_setname(const char *val)
-{
-       uint32_t impl = IMPL_READ(blake3_impl_chosen);
-       size_t val_len;
-       int i, err = -EINVAL;
-
-       blake3_impl_init();
-       val_len = strlen(val);
-       while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */
-               val_len--;
-
-       /* check mandatory implementations */
-       for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) {
-               const char *name = blake3_impl_selectors[i].name;
-
-               if (val_len == strlen(name) &&
-                   strncmp(val, name, val_len) == 0) {
-                       impl = blake3_impl_selectors[i].sel;
-                       err = 0;
-                       break;
-               }
-       }
+const blake3_ops_t
+blake3_avx2_impl = {
+       .compress_in_place = blake3_compress_in_place_sse41,
+       .compress_xof = blake3_compress_xof_sse41,
+       .hash_many = blake3_hash_many_avx2,
+       .is_supported = blake3_is_avx2_supported,
+       .degree = 8,
+       .name = "avx2"
+};
+#endif
 
-       if (err != 0 && blake3_initialized) {
-               /* check all supported implementations */
-               for (i = 0; i < blake3_supp_impls_cnt; i++) {
-                       const char *name = blake3_supp_impls[i]->name;
-
-                       if (val_len == strlen(name) &&
-                           strncmp(val, name, val_len) == 0) {
-                               impl = i;
-                               err = 0;
-                               break;
-                       }
-               }
-       }
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_avx512(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags) {
+       kfpu_begin();
+       zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
+           flags);
+       kfpu_end();
+}
 
-       if (err == 0) {
-               atomic_swap_32(&blake3_impl_chosen, impl);
-       }
+static void blake3_compress_xof_avx512(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]) {
+       kfpu_begin();
+       zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
+           out);
+       kfpu_end();
+}
 
-       return (err);
+static void blake3_hash_many_avx512(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+       kfpu_begin();
+       zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
+           increment_counter, flags, flags_start, flags_end, out);
+       kfpu_end();
 }
 
-const blake3_ops_t *
-blake3_impl_get_ops(void)
+static boolean_t blake3_is_avx512_supported(void)
 {
-       const blake3_ops_t *ops = NULL;
-       uint32_t impl = IMPL_READ(blake3_impl_chosen);
-
-       blake3_impl_init();
-       switch (impl) {
-       case IMPL_FASTEST:
-               ASSERT(blake3_initialized);
-               ops = &blake3_fastest_impl;
-               break;
-       case IMPL_CYCLE:
-               /* Cycle through supported implementations */
-               ASSERT(blake3_initialized);
-               ASSERT3U(blake3_supp_impls_cnt, >, 0);
-               static uint32_t cycle_count = 0;
-               uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt;
-               ops = blake3_supp_impls[idx];
-               break;
-       default:
-               ASSERT3U(blake3_supp_impls_cnt, >, 0);
-               ASSERT3U(impl, <, blake3_supp_impls_cnt);
-               ops = blake3_supp_impls[impl];
-               break;
-       }
-
-       ASSERT3P(ops, !=, NULL);
-       return (ops);
+       return (kfpu_allowed() && zfs_avx512f_available() &&
+           zfs_avx512vl_available());
 }
 
-#if defined(_KERNEL)
+const blake3_ops_t blake3_avx512_impl = {
+       .compress_in_place = blake3_compress_in_place_avx512,
+       .compress_xof = blake3_compress_xof_avx512,
+       .hash_many = blake3_hash_many_avx512,
+       .is_supported = blake3_is_avx512_supported,
+       .degree = 16,
+       .name = "avx512"
+};
+#endif
+
+extern const blake3_ops_t blake3_generic_impl;
+
+static const blake3_ops_t *const blake3_impls[] = {
+       &blake3_generic_impl,
+#if defined(__aarch64__) || \
+       (defined(__x86_64) && defined(HAVE_SSE2)) || \
+       (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+       &blake3_sse2_impl,
+#endif
+#if defined(__aarch64__) || \
+       (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
+       (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+       &blake3_sse41_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+       &blake3_avx2_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+       &blake3_avx512_impl,
+#endif
+};
 
+/* use the generic implementation functions */
+#define        IMPL_NAME               "blake3"
+#define        IMPL_OPS_T              blake3_ops_t
+#define        IMPL_ARRAY              blake3_impls
+#define        IMPL_GET_OPS            blake3_get_ops
+#define        ZFS_IMPL_OPS            zfs_blake3_ops
+#include <generic_impl.c>
+
+#ifdef _KERNEL
 void **blake3_per_cpu_ctx;
 
 void
@@ -253,9 +297,6 @@ blake3_per_cpu_ctx_init(void)
                blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
                    KM_SLEEP);
        }
-
-       /* init once in kernel mode */
-       blake3_impl_init();
 }
 
 void
@@ -276,7 +317,7 @@ blake3_per_cpu_ctx_fini(void)
 static int
 blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
 {
-       const uint32_t impl = IMPL_READ(blake3_impl_chosen);
+       const uint32_t impl = IMPL_READ(generic_impl_chosen);
        char *fmt;
        int cnt = 0;
 
@@ -289,10 +330,11 @@ blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
        cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest");
 
        /* list all supported implementations */
-       for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
+       generic_impl_init();
+       for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
                fmt = IMPL_FMT(impl, i);
                cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
-                   blake3_supp_impls[i]->name);
+                   blake3_impls[i]->name);
        }
 
        return (cnt);
@@ -302,7 +344,7 @@ static int
 blake3_param_set(const char *val, zfs_kernel_param_t *unused)
 {
        (void) unused;
-       return (blake3_impl_setname(val));
+       return (generic_impl_setname(val));
 }
 
 #elif defined(__FreeBSD__)
@@ -314,8 +356,9 @@ blake3_param(ZFS_MODULE_PARAM_ARGS)
 {
        int err;
 
+       generic_impl_init();
        if (req->newptr == NULL) {
-               const uint32_t impl = IMPL_READ(blake3_impl_chosen);
+               const uint32_t impl = IMPL_READ(generic_impl_chosen);
                const int init_buflen = 64;
                const char *fmt;
                struct sbuf *s;
@@ -331,9 +374,9 @@ blake3_param(ZFS_MODULE_PARAM_ARGS)
                (void) sbuf_printf(s, fmt, "fastest");
 
                /* list all supported implementations */
-               for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) {
+               for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
                        fmt = IMPL_FMT(impl, i);
-                       (void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name);
+                       (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name);
                }
 
                err = sbuf_finish(s);
@@ -349,7 +392,7 @@ blake3_param(ZFS_MODULE_PARAM_ARGS)
                return (err);
        }
 
-       return (-blake3_impl_setname(buf));
+       return (-generic_impl_setname(buf));
 }
 #endif
 
index ecb51e3a301024bd38be8d65e112f2841bfda4c4..90d508fac08fd13b386b0d06fd2d9022d33bcef5 100644 (file)
  * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
  */
 
-#ifndef BLAKE3_IMPL_H
+#ifndef        BLAKE3_IMPL_H
 #define        BLAKE3_IMPL_H
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <sys/types.h>
 #include <sys/blake3.h>
 #include <sys/simd.h>
 #include <sys/asm_linkage.h>
@@ -56,7 +55,7 @@ typedef void (*blake3_hash_many_f)(const uint8_t * const *inputs,
 
 typedef boolean_t (*blake3_is_supported_f)(void);
 
-typedef struct blake3_impl_ops {
+typedef struct {
        blake3_compress_in_place_f compress_in_place;
        blake3_compress_xof_f compress_xof;
        blake3_hash_many_f hash_many;
@@ -65,30 +64,8 @@ typedef struct blake3_impl_ops {
        const char *name;
 } blake3_ops_t;
 
-/* Return selected BLAKE3 implementation ops */
-extern const blake3_ops_t *blake3_impl_get_ops(void);
-
-extern const blake3_ops_t blake3_generic_impl;
-
-#if defined(__aarch64__) || \
-       (defined(__x86_64) && defined(HAVE_SSE2)) || \
-       (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-extern const blake3_ops_t blake3_sse2_impl;
-#endif
-
-#if defined(__aarch64__) || \
-       (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
-       (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-extern const blake3_ops_t blake3_sse41_impl;
-#endif
-
-#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
-extern const blake3_ops_t blake3_avx2_impl;
-#endif
-
-#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
-extern const blake3_ops_t blake3_avx512_impl;
-#endif
+/* return selected BLAKE3 implementation ops */
+extern const blake3_ops_t *blake3_get_ops(void);
 
 #if defined(__x86_64)
 #define        MAX_SIMD_DEGREE 16
diff --git a/module/icp/algs/blake3/blake3_x86-64.c b/module/icp/algs/blake3/blake3_x86-64.c
deleted file mode 100644 (file)
index 04a8b33..0000000
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License (the "License").
- * You may not use this file except in compliance with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or https://opensource.org/licenses/CDDL-1.0.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-
-/*
- * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
- */
-
-#include "blake3_impl.h"
-
-#if defined(__aarch64__) || \
-       (defined(__x86_64) && defined(HAVE_SSE2)) || \
-       (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-
-extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags);
-
-extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags, uint8_t out[64]);
-
-extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
-    size_t num_inputs, size_t blocks, const uint32_t key[8],
-    uint64_t counter, boolean_t increment_counter, uint8_t flags,
-    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
-
-static void blake3_compress_in_place_sse2(uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags) {
-       kfpu_begin();
-       zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
-           flags);
-       kfpu_end();
-}
-
-static void blake3_compress_xof_sse2(const uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags, uint8_t out[64]) {
-       kfpu_begin();
-       zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
-           out);
-       kfpu_end();
-}
-
-static void blake3_hash_many_sse2(const uint8_t * const *inputs,
-    size_t num_inputs, size_t blocks, const uint32_t key[8],
-    uint64_t counter, boolean_t increment_counter, uint8_t flags,
-    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-       kfpu_begin();
-       zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
-           increment_counter, flags, flags_start, flags_end, out);
-       kfpu_end();
-}
-
-static boolean_t blake3_is_sse2_supported(void)
-{
-#if defined(__x86_64)
-       return (kfpu_allowed() && zfs_sse2_available());
-#elif defined(__PPC64__) && defined(__linux__)
-       return (kfpu_allowed() && zfs_vsx_available());
-#else
-       return (kfpu_allowed());
-#endif
-}
-
-const blake3_ops_t blake3_sse2_impl = {
-       .compress_in_place = blake3_compress_in_place_sse2,
-       .compress_xof = blake3_compress_xof_sse2,
-       .hash_many = blake3_hash_many_sse2,
-       .is_supported = blake3_is_sse2_supported,
-       .degree = 4,
-       .name = "sse2"
-};
-#endif
-
-#if defined(__aarch64__) || \
-       (defined(__x86_64) && defined(HAVE_SSE2)) || \
-       (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
-
-extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags);
-
-extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags, uint8_t out[64]);
-
-extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
-    size_t num_inputs, size_t blocks, const uint32_t key[8],
-    uint64_t counter, boolean_t increment_counter, uint8_t flags,
-    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
-
-static void blake3_compress_in_place_sse41(uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags) {
-       kfpu_begin();
-       zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
-           flags);
-       kfpu_end();
-}
-
-static void blake3_compress_xof_sse41(const uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags, uint8_t out[64]) {
-       kfpu_begin();
-       zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
-           out);
-       kfpu_end();
-}
-
-static void blake3_hash_many_sse41(const uint8_t * const *inputs,
-    size_t num_inputs, size_t blocks, const uint32_t key[8],
-    uint64_t counter, boolean_t increment_counter, uint8_t flags,
-    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-       kfpu_begin();
-       zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
-           increment_counter, flags, flags_start, flags_end, out);
-       kfpu_end();
-}
-
-static boolean_t blake3_is_sse41_supported(void)
-{
-#if defined(__x86_64)
-       return (kfpu_allowed() && zfs_sse4_1_available());
-#elif defined(__PPC64__) && defined(__linux__)
-       return (kfpu_allowed() && zfs_vsx_available());
-#else
-       return (kfpu_allowed());
-#endif
-}
-
-const blake3_ops_t blake3_sse41_impl = {
-       .compress_in_place = blake3_compress_in_place_sse41,
-       .compress_xof = blake3_compress_xof_sse41,
-       .hash_many = blake3_hash_many_sse41,
-       .is_supported = blake3_is_sse41_supported,
-       .degree = 4,
-       .name = "sse41"
-};
-#endif
-
-#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
-extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
-    size_t num_inputs, size_t blocks, const uint32_t key[8],
-    uint64_t counter, boolean_t increment_counter, uint8_t flags,
-    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
-
-static void blake3_hash_many_avx2(const uint8_t * const *inputs,
-    size_t num_inputs, size_t blocks, const uint32_t key[8],
-    uint64_t counter, boolean_t increment_counter, uint8_t flags,
-    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-       kfpu_begin();
-       zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
-           increment_counter, flags, flags_start, flags_end, out);
-       kfpu_end();
-}
-
-static boolean_t blake3_is_avx2_supported(void)
-{
-       return (kfpu_allowed() && zfs_sse4_1_available() &&
-           zfs_avx2_available());
-}
-
-const blake3_ops_t blake3_avx2_impl = {
-       .compress_in_place = blake3_compress_in_place_sse41,
-       .compress_xof = blake3_compress_xof_sse41,
-       .hash_many = blake3_hash_many_avx2,
-       .is_supported = blake3_is_avx2_supported,
-       .degree = 8,
-       .name = "avx2"
-};
-#endif
-
-#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
-extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags);
-
-extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags, uint8_t out[64]);
-
-extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
-    size_t num_inputs, size_t blocks, const uint32_t key[8],
-    uint64_t counter, boolean_t increment_counter, uint8_t flags,
-    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
-
-static void blake3_compress_in_place_avx512(uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags) {
-       kfpu_begin();
-       zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
-           flags);
-       kfpu_end();
-}
-
-static void blake3_compress_xof_avx512(const uint32_t cv[8],
-    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
-    uint64_t counter, uint8_t flags, uint8_t out[64]) {
-       kfpu_begin();
-       zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
-           out);
-       kfpu_end();
-}
-
-static void blake3_hash_many_avx512(const uint8_t * const *inputs,
-    size_t num_inputs, size_t blocks, const uint32_t key[8],
-    uint64_t counter, boolean_t increment_counter, uint8_t flags,
-    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-       kfpu_begin();
-       zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
-           increment_counter, flags, flags_start, flags_end, out);
-       kfpu_end();
-}
-
-static boolean_t blake3_is_avx512_supported(void)
-{
-       return (kfpu_allowed() && zfs_avx512f_available() &&
-           zfs_avx512vl_available());
-}
-
-const blake3_ops_t blake3_avx512_impl = {
-       .compress_in_place = blake3_compress_in_place_avx512,
-       .compress_xof = blake3_compress_xof_avx512,
-       .hash_many = blake3_hash_many_avx512,
-       .is_supported = blake3_is_avx512_supported,
-       .degree = 16,
-       .name = "avx512"
-};
-#endif
index 648e1faaaeb79f8eb0a75ffd72fc35ff4a378898..aebe0363cc6e3d56ab022670ca600258bbc01998 100644 (file)
@@ -31,6 +31,8 @@
 #include <sys/time.h>
 #include <sys/blake3.h>
 
+#include <sys/zfs_impl.h>
+
 /*
  * set it to a define for debugging
  */
@@ -485,10 +487,14 @@ main(int argc, char *argv[])
        uint8_t buffer[102400];
        uint64_t cpu_mhz = 0;
        int id, i, j;
+       const zfs_impl_t *blake3 = zfs_impl_get_ops("blake3");
 
        if (argc == 2)
                cpu_mhz = atoi(argv[1]);
 
+       if (!blake3)
+               return (1);
+
        /* fill test message */
        for (i = 0, j = 0; i < sizeof (buffer); i++, j++) {
                if (j == 251)
@@ -497,9 +503,9 @@ main(int argc, char *argv[])
        }
 
        (void) printf("Running algorithm correctness tests:\n");
-       for (id = 0; id < blake3_impl_getcnt(); id++) {
-               blake3_impl_setid(id);
-               const char *name = blake3_impl_getname();
+       for (id = 0; id < blake3->getcnt(); id++) {
+               blake3->setid(id);
+               const char *name = blake3->getname();
                dprintf("Result for BLAKE3-%s:\n", name);
                for (i = 0; TestArray[i].hash; i++) {
                        blake3_test_t *cur = &TestArray[i];
@@ -565,9 +571,9 @@ main(int argc, char *argv[])
        } while (0)
 
        printf("Running performance tests (hashing 1024 MiB of data):\n");
-       for (id = 0; id < blake3_impl_getcnt(); id++) {
-               blake3_impl_setid(id);
-               const char *name = blake3_impl_getname();
+       for (id = 0; id < blake3->getcnt(); id++) {
+               blake3->setid(id);
+               const char *name = blake3->getname();
                BLAKE3_PERF_TEST(name, 256);
        }