Add AVX512BW variant of fletcher

author Romain Dolbeau <romain.dolbeau@atos.net>

Wed, 30 Oct 2019 19:26:14 +0000 (20:26 +0100)

committer Brian Behlendorf <behlendorf1@llnl.gov>

Wed, 30 Oct 2019 19:26:14 +0000 (12:26 -0700)
author Romain Dolbeau <romain.dolbeau@atos.net>
Wed, 30 Oct 2019 19:26:14 +0000 (20:26 +0100)
committer Brian Behlendorf <behlendorf1@llnl.gov>
Wed, 30 Oct 2019 19:26:14 +0000 (12:26 -0700)
diff --git a/include/zfs_fletcher.h b/include/zfs_fletcher.h

index 5c7a61c56259073ba9d81fe3c2709abcf5b30d41..9e8b2cf7c7296e102ef4fb4d5cb50b396a5f36de 100644 (file)
--- a/include/zfs_fletcher.h
+++ b/include/zfs_fletcher.h
@@ -143,6 +143,10 @@ extern const fletcher_4_ops_t fletcher_4_avx2_ops;
  extern const fletcher_4_ops_t fletcher_4_avx512f_ops;
  #endif
  
+#if defined(__x86_64) && defined(HAVE_AVX512BW)
+extern const fletcher_4_ops_t fletcher_4_avx512bw_ops;
+#endif
+
  #if defined(__aarch64__)
  extern const fletcher_4_ops_t fletcher_4_aarch64_neon_ops;
  #endif
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5

index c711f6de61bfd07e4c2ec4f57bac136ac3072815..1c773435c9fb1c529407b63e7594d7ba805ac896 100644 (file)
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -1507,7 +1507,7 @@ Default value: \fB20\fR% of \fBzfs_dirty_data_max\fR.
  Select a fletcher 4 implementation.
  .sp
  Supported selectors are: \fBfastest\fR, \fBscalar\fR, \fBsse2\fR, \fBssse3\fR,
-\fBavx2\fR, \fBavx512f\fR, and \fBaarch64_neon\fR.
+\fBavx2\fR, \fBavx512f\fR, \fBavx512bw\fR, and \fBaarch64_neon\fR.
  All of the selectors except \fBfastest\fR and \fBscalar\fR require instruction
  set extensions to be available and will only appear if ZFS detects that they are
  present at runtime. If multiple implementations of fletcher 4 are available,
diff --git a/module/zcommon/zfs_fletcher.c b/module/zcommon/zfs_fletcher.c

index 1280ace31899bf7acd03d95c337f3b7464ce83f2..f955dc8d9e371889cd38ce5004eae97b05c2b341 100644 (file)
--- a/module/zcommon/zfs_fletcher.c
+++ b/module/zcommon/zfs_fletcher.c
@@ -184,6 +184,9 @@ static const fletcher_4_ops_t *fletcher_4_impls[] = {
  #if defined(__x86_64) && defined(HAVE_AVX512F)
         &fletcher_4_avx512f_ops,
  #endif
+#if defined(__x86_64) && defined(HAVE_AVX512BW)
+       &fletcher_4_avx512bw_ops,
+#endif
  #if defined(__aarch64__)
         &fletcher_4_aarch64_neon_ops,
  #endif
diff --git a/module/zcommon/zfs_fletcher_avx512.c b/module/zcommon/zfs_fletcher_avx512.c

index 43806f264e5ecbabf69c8479b2b31101d6f9601e..d33d2dc33f36cf62b3fec08aa59fca5825a46de2 100644 (file)
--- a/module/zcommon/zfs_fletcher_avx512.c
+++ b/module/zcommon/zfs_fletcher_avx512.c
@@ -171,4 +171,53 @@ const fletcher_4_ops_t fletcher_4_avx512f_ops = {
         .name = "avx512f"
  };
  
+#if defined(HAVE_AVX512BW)
+static void
+fletcher_4_avx512bw_byteswap(fletcher_4_ctx_t *ctx, const void *buf,
+    uint64_t size)
+{
+       static const zfs_fletcher_avx512_t mask = {
+               .v = { 0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
+               0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
+               0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
+               0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B }
+       };
+       const uint32_t *ip = buf;
+       const uint32_t *ipend = (uint32_t *)((uint8_t *)ip + size);
+
+       kfpu_begin();
+
+       FLETCHER_4_AVX512_RESTORE_CTX(ctx);
+
+       __asm("vmovdqu64 %0, %%zmm5" :: "m" (mask));
+
+       for (; ip < ipend; ip += 8) {
+               __asm("vpmovzxdq %0, %%zmm4"::"m" (*ip));
+
+               __asm("vpshufb %zmm5, %zmm4, %zmm4");
+
+               __asm("vpaddq %zmm4, %zmm0, %zmm0");
+               __asm("vpaddq %zmm0, %zmm1, %zmm1");
+               __asm("vpaddq %zmm1, %zmm2, %zmm2");
+               __asm("vpaddq %zmm2, %zmm3, %zmm3");
+       }
+
+       FLETCHER_4_AVX512_SAVE_CTX(ctx)
+
+       kfpu_end();
+}
+STACK_FRAME_NON_STANDARD(fletcher_4_avx512bw_byteswap);
+
+const fletcher_4_ops_t fletcher_4_avx512bw_ops = {
+       .init_native = fletcher_4_avx512f_init,
+       .fini_native = fletcher_4_avx512f_fini,
+       .compute_native = fletcher_4_avx512f_native,
+       .init_byteswap = fletcher_4_avx512f_init,
+       .fini_byteswap = fletcher_4_avx512f_fini,
+       .compute_byteswap = fletcher_4_avx512bw_byteswap,
+       .valid = fletcher_4_avx512f_valid,
+       .name = "avx512bw"
+};
+#endif
+
  #endif /* defined(__x86_64) && defined(HAVE_AVX512F) */
author	Romain Dolbeau <romain.dolbeau@atos.net>
	Wed, 30 Oct 2019 19:26:14 +0000 (20:26 +0100)
committer	Brian Behlendorf <behlendorf1@llnl.gov>
	Wed, 30 Oct 2019 19:26:14 +0000 (12:26 -0700)
include/zfs_fletcher.h		patch \| blob \| blame \| history
man/man5/zfs-module-parameters.5		patch \| blob \| blame \| history
module/zcommon/zfs_fletcher.c		patch \| blob \| blame \| history
module/zcommon/zfs_fletcher_avx512.c		patch \| blob \| blame \| history