]> git.proxmox.com Git - mirror_zfs.git/commitdiff
module/icp/asm-arm/sha2: enable non-SIMD asm kernels on armv5/6
authorShengqi Chen <harry-chen@outlook.com>
Tue, 5 Dec 2023 20:01:09 +0000 (04:01 +0800)
committerGitHub <noreply@github.com>
Tue, 5 Dec 2023 20:01:09 +0000 (12:01 -0800)
My merged pull request #15557 fixes compilation of sha2 kernels on arm
v5/6. However, the compiler guards only allows sha256/512_armv7_impl to
be used when __ARM_ARCH > 6. This patch enables these ASM kernels on all
arm architectures. Some compiler guards are adjusted accordingly to
avoid the unnecessary compilation of SIMD (e.g., neon, armv8ce) kernels
on old architectures.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Shengqi Chen <harry-chen@outlook.com>
Closes #15623

module/icp/algs/sha2/sha256_impl.c
module/icp/algs/sha2/sha512_impl.c
module/icp/asm-arm/sha2/sha256-armv7.S
module/icp/asm-arm/sha2/sha512-armv7.S

index 01ce5cbd814c73d67d43a4b38673ce267bd18245..0f24319511d7271ba289c178b96b3d3ee0744790 100644 (file)
@@ -118,7 +118,15 @@ const sha256_ops_t sha256_shani_impl = {
 };
 #endif
 
-#elif defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH > 6)
+#elif defined(__aarch64__) || defined(__arm__)
+extern void zfs_sha256_block_armv7(uint32_t s[8], const void *, size_t);
+const sha256_ops_t sha256_armv7_impl = {
+       .is_supported = sha2_is_supported,
+       .transform = zfs_sha256_block_armv7,
+       .name = "armv7"
+};
+
+#if __ARM_ARCH > 6
 static boolean_t sha256_have_neon(void)
 {
        return (kfpu_allowed() && zfs_neon_available());
@@ -129,13 +137,6 @@ static boolean_t sha256_have_armv8ce(void)
        return (kfpu_allowed() && zfs_sha256_available());
 }
 
-extern void zfs_sha256_block_armv7(uint32_t s[8], const void *, size_t);
-const sha256_ops_t sha256_armv7_impl = {
-       .is_supported = sha2_is_supported,
-       .transform = zfs_sha256_block_armv7,
-       .name = "armv7"
-};
-
 TF(zfs_sha256_block_neon, tf_sha256_neon);
 const sha256_ops_t sha256_neon_impl = {
        .is_supported = sha256_have_neon,
@@ -149,6 +150,7 @@ const sha256_ops_t sha256_armv8_impl = {
        .transform = tf_sha256_armv8ce,
        .name = "armv8-ce"
 };
+#endif
 
 #elif defined(__PPC64__)
 static boolean_t sha256_have_isa207(void)
@@ -192,11 +194,13 @@ static const sha256_ops_t *const sha256_impls[] = {
 #if defined(__x86_64) && defined(HAVE_SSE4_1)
        &sha256_shani_impl,
 #endif
-#if defined(__aarch64__) || (defined(__arm__) && __ARM_ARCH > 6)
+#if defined(__aarch64__) || defined(__arm__)
        &sha256_armv7_impl,
+#if __ARM_ARCH > 6
        &sha256_neon_impl,
        &sha256_armv8_impl,
 #endif
+#endif
 #if defined(__PPC64__)
        &sha256_ppc_impl,
        &sha256_power8_impl,
index 27b35a639a5418355f0af983c0de987381e34961..6291fbd77e361171c8e400ef175c440eee1f681b 100644 (file)
@@ -88,7 +88,7 @@ const sha512_ops_t sha512_avx2_impl = {
 };
 #endif
 
-#elif defined(__aarch64__)
+#elif defined(__aarch64__) || defined(__arm__)
 extern void zfs_sha512_block_armv7(uint64_t s[8], const void *, size_t);
 const sha512_ops_t sha512_armv7_impl = {
        .is_supported = sha2_is_supported,
@@ -96,6 +96,7 @@ const sha512_ops_t sha512_armv7_impl = {
        .name = "armv7"
 };
 
+#if defined(__aarch64__)
 static boolean_t sha512_have_armv8ce(void)
 {
        return (kfpu_allowed() && zfs_sha512_available());
@@ -107,15 +108,9 @@ const sha512_ops_t sha512_armv8_impl = {
        .transform = tf_sha512_armv8ce,
        .name = "armv8-ce"
 };
+#endif
 
-#elif defined(__arm__) && __ARM_ARCH > 6
-extern void zfs_sha512_block_armv7(uint64_t s[8], const void *, size_t);
-const sha512_ops_t sha512_armv7_impl = {
-       .is_supported = sha2_is_supported,
-       .transform = zfs_sha512_block_armv7,
-       .name = "armv7"
-};
-
+#if defined(__arm__) && __ARM_ARCH > 6
 static boolean_t sha512_have_neon(void)
 {
        return (kfpu_allowed() && zfs_neon_available());
@@ -127,6 +122,7 @@ const sha512_ops_t sha512_neon_impl = {
        .transform = tf_sha512_neon,
        .name = "neon"
 };
+#endif
 
 #elif defined(__PPC64__)
 TF(zfs_sha512_ppc, tf_sha512_ppc);
@@ -164,14 +160,15 @@ static const sha512_ops_t *const sha512_impls[] = {
 #if defined(__x86_64) && defined(HAVE_AVX2)
        &sha512_avx2_impl,
 #endif
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(__arm__)
        &sha512_armv7_impl,
+#if defined(__aarch64__)
        &sha512_armv8_impl,
 #endif
 #if defined(__arm__) && __ARM_ARCH > 6
-       &sha512_armv7_impl,
        &sha512_neon_impl,
 #endif
+#endif
 #if defined(__PPC64__)
        &sha512_ppc_impl,
        &sha512_power8_impl,
index 190dbabc5ecb1f13a930390cbdc311cd21bc2b6b..3ae66626df31cccbbaef06d6b535e98224ffcc0c 100644 (file)
@@ -1837,6 +1837,7 @@ zfs_sha256_block_armv7:
 #endif
 .size  zfs_sha256_block_armv7,.-zfs_sha256_block_armv7
 
+#if __ARM_ARCH__ >= 7
 .arch  armv7-a
 .fpu   neon
 
@@ -1849,11 +1850,7 @@ zfs_sha256_block_neon:
        stmdb   sp!,{r4-r12,lr}
 
        sub     r11,sp,#16*4+16
-#if __ARM_ARCH__ >=7
        adr     r14,K256
-#else
-       ldr     r14,=K256
-#endif
        bic     r11,r11,#15             @ align for 128-bit stores
        mov     r12,sp
        mov     sp,r11                  @ alloca
@@ -2773,4 +2770,5 @@ zfs_sha256_block_armv8:
        bx      lr              @ bx lr
 .size  zfs_sha256_block_armv8,.-zfs_sha256_block_armv8
 
-#endif
+#endif // #if __ARM_ARCH__ >= 7
+#endif // #if defined(__arm__)
index 499cb6df9567d29fe88aededa41d1752309e09cd..66d7dd3cf0f7f4f3f96ffb7fbf6f812df82442e9 100644 (file)
@@ -493,6 +493,7 @@ zfs_sha512_block_armv7:
 #endif
 .size  zfs_sha512_block_armv7,.-zfs_sha512_block_armv7
 
+#if __ARM_ARCH__ >= 7
 .arch  armv7-a
 .fpu   neon
 
@@ -1822,4 +1823,5 @@ zfs_sha512_block_neon:
        VFP_ABI_POP
        bx      lr                              @ .word 0xe12fff1e
 .size  zfs_sha512_block_neon,.-zfs_sha512_block_neon
-#endif
+#endif // #if __ARM_ARCH__ >= 7
+#endif // #if defined(__arm__)