]> git.proxmox.com Git - mirror_zfs.git/commitdiff
ICP: gcm: Allocate hash subkey table separately
authorAttila Fülöp <attila@fueloep.org>
Fri, 30 Oct 2020 22:24:21 +0000 (23:24 +0100)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Fri, 30 Oct 2020 23:06:09 +0000 (16:06 -0700)
While evaluating other assembler implementations it turns out that
the precomputed hash subkey tables vary in size, from 8*16 bytes
(avx2/avx512) up to 48*16 bytes (avx512-vaes), depending on the
implementation.

To be able to handle the size differences later, allocate
`gcm_Htable` dynamically rather then having a fixed size array, and
adapt consumers.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Attila Fülöp <attila@fueloep.org>
Closes #11102

module/icp/algs/modes/gcm.c
module/icp/algs/modes/modes.c
module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S
module/icp/include/modes/modes.h
module/icp/io/aes.c

index 5553c55e11cd493c2faa3eca355e7ee93c2e64f5..23686c59e8ce1e1797d8f608f059b898a952e05a 100644 (file)
@@ -59,10 +59,12 @@ boolean_t gcm_avx_can_use_movbe = B_FALSE;
 static boolean_t gcm_use_avx = B_FALSE;
 #define        GCM_IMPL_USE_AVX        (*(volatile boolean_t *)&gcm_use_avx)
 
+extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
+
 static inline boolean_t gcm_avx_will_work(void);
 static inline void gcm_set_avx(boolean_t);
 static inline boolean_t gcm_toggle_avx(void);
-extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
+static inline size_t gcm_simd_get_htab_size(boolean_t);
 
 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
     crypto_data_t *, size_t);
@@ -629,6 +631,21 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
                            (volatile boolean_t *)&gcm_avx_can_use_movbe);
                }
        }
+       /* Allocate Htab memory as needed. */
+       if (gcm_ctx->gcm_use_avx == B_TRUE) {
+               size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
+
+               if (htab_len == 0) {
+                       return (CRYPTO_MECHANISM_PARAM_INVALID);
+               }
+               gcm_ctx->gcm_htab_len = htab_len;
+               gcm_ctx->gcm_Htable =
+                   (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag);
+
+               if (gcm_ctx->gcm_Htable == NULL) {
+                       return (CRYPTO_HOST_MEMORY);
+               }
+       }
        /* Avx and non avx context initialization differs from here on. */
        if (gcm_ctx->gcm_use_avx == B_FALSE) {
 #endif /* ifdef CAN_USE_GCM_ASM */
@@ -689,6 +706,22 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
        if (ks->ops->needs_byteswap == B_TRUE) {
                gcm_ctx->gcm_use_avx = B_FALSE;
        }
+       /* Allocate Htab memory as needed. */
+       if (gcm_ctx->gcm_use_avx == B_TRUE) {
+               size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
+
+               if (htab_len == 0) {
+                       return (CRYPTO_MECHANISM_PARAM_INVALID);
+               }
+               gcm_ctx->gcm_htab_len = htab_len;
+               gcm_ctx->gcm_Htable =
+                   (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag);
+
+               if (gcm_ctx->gcm_Htable == NULL) {
+                       return (CRYPTO_HOST_MEMORY);
+               }
+       }
+
        /* Avx and non avx context initialization differs from here on. */
        if (gcm_ctx->gcm_use_avx == B_FALSE) {
 #endif /* ifdef CAN_USE_GCM_ASM */
@@ -1018,7 +1051,7 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
 /* Clear the FPU registers since they hold sensitive internal state. */
 #define        clear_fpu_regs() clear_fpu_regs_avx()
 #define        GHASH_AVX(ctx, in, len) \
-    gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t (*)[2])(ctx)->gcm_Htable, \
+    gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \
     in, len)
 
 #define        gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
@@ -1036,8 +1069,8 @@ extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
 extern void aes_encrypt_intel(const uint32_t rk[], int nr,
     const uint32_t pt[4], uint32_t ct[4]);
 
-extern void gcm_init_htab_avx(uint64_t Htable[16][2], const uint64_t H[2]);
-extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t Htable[16][2],
+extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
+extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
     const uint8_t *in, size_t len);
 
 extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
@@ -1073,6 +1106,18 @@ gcm_toggle_avx(void)
        }
 }
 
+static inline size_t
+gcm_simd_get_htab_size(boolean_t simd_mode)
+{
+       switch (simd_mode) {
+       case B_TRUE:
+               return (2 * 6 * 2 * sizeof (uint64_t));
+
+       default:
+               return (0);
+       }
+}
+
 /*
  * Clear sensitive data in the context.
  *
@@ -1088,7 +1133,6 @@ gcm_clear_ctx(gcm_ctx_t *ctx)
 {
        bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder));
        bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
-       bzero(ctx->gcm_Htable, sizeof (ctx->gcm_Htable));
        bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0));
        bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp));
 }
index f07876a478e24a12cd90b6c2bf278ddfdc75a073..faae9722bd0495300445b9e11948fb532672dbe4 100644 (file)
@@ -152,6 +152,14 @@ crypto_free_mode_ctx(void *ctx)
                        vmem_free(((gcm_ctx_t *)ctx)->gcm_pt_buf,
                            ((gcm_ctx_t *)ctx)->gcm_pt_buf_len);
 
+#ifdef CAN_USE_GCM_ASM
+               if (((gcm_ctx_t *)ctx)->gcm_Htable != NULL) {
+                       gcm_ctx_t *gcm_ctx = (gcm_ctx_t *)ctx;
+                       bzero(gcm_ctx->gcm_Htable, gcm_ctx->gcm_htab_len);
+                       kmem_free(gcm_ctx->gcm_Htable, gcm_ctx->gcm_htab_len);
+               }
+#endif
+
                kmem_free(ctx, sizeof (gcm_ctx_t));
        }
 }
index 4e05255728f707455738b658ac15de34836ba1ea..dc71ae2c1c89f4e484d5518d8a40b53a3c7ecd96 100644 (file)
@@ -718,6 +718,8 @@ aesni_gcm_decrypt:
 .cfi_offset    %r14,-48
        pushq   %r15
 .cfi_offset    %r15,-56
+       pushq   %r9
+.cfi_offset    %r9,-64
        vzeroupper
 
        vmovdqu (%r8),%xmm1
@@ -730,7 +732,8 @@ aesni_gcm_decrypt:
        andq    $-128,%rsp
        vmovdqu (%r11),%xmm0
        leaq    128(%rcx),%rcx
-       leaq    32+32(%r9),%r9
+       movq    32(%r9),%r9
+       leaq    32(%r9),%r9
        movl    504-128(%rcx),%ebp      // ICP has a larger offset for rounds.
        vpshufb %xmm0,%xmm8,%xmm8
 
@@ -786,7 +789,9 @@ aesni_gcm_decrypt:
        vmovups %xmm14,-16(%rsi)
 
        vpshufb (%r11),%xmm8,%xmm8
-       vmovdqu %xmm8,-64(%r9)
+       movq    -56(%rax),%r9
+.cfi_restore   %r9
+       vmovdqu %xmm8,(%r9)
 
        vzeroupper
        movq    -48(%rax),%r15
@@ -924,6 +929,8 @@ aesni_gcm_encrypt:
 .cfi_offset    %r14,-48
        pushq   %r15
 .cfi_offset    %r15,-56
+       pushq   %r9
+.cfi_offset    %r9,-64
        vzeroupper
 
        vmovdqu (%r8),%xmm1
@@ -966,7 +973,8 @@ aesni_gcm_encrypt:
        call    _aesni_ctr32_6x
 
        vmovdqu (%r9),%xmm8
-       leaq    32+32(%r9),%r9
+       movq    32(%r9),%r9
+       leaq    32(%r9),%r9
        subq    $12,%rdx
        movq    $192,%r10
        vpshufb %xmm0,%xmm8,%xmm8
@@ -1157,7 +1165,9 @@ aesni_gcm_encrypt:
        vpxor   %xmm7,%xmm2,%xmm2
        vpxor   %xmm2,%xmm8,%xmm8
        vpshufb (%r11),%xmm8,%xmm8
-       vmovdqu %xmm8,-64(%r9)
+       movq    -56(%rax),%r9
+.cfi_restore   %r9
+       vmovdqu %xmm8,(%r9)
 
        vzeroupper
        movq    -48(%rax),%r15
index 57a211ccf1bf9f2019a13ed15f13c23f6f959ddc..ab71197542eb851a4db323d59a389b05c47e889d 100644 (file)
@@ -219,14 +219,14 @@ typedef struct gcm_ctx {
        size_t gcm_pt_buf_len;
        uint32_t gcm_tmp[4];
        /*
-        * The relative positions of gcm_ghash, gcm_H and pre-computed
-        * gcm_Htable are hard coded in aesni-gcm-x86_64.S and ghash-x86_64.S,
-        * so please don't change (or adjust accordingly).
+        * The offset of gcm_Htable relative to gcm_ghash, (32), is hard coded
+        * in aesni-gcm-x86_64.S, so please don't change (or adjust there).
         */
        uint64_t gcm_ghash[2];
        uint64_t gcm_H[2];
 #ifdef CAN_USE_GCM_ASM
-       uint64_t gcm_Htable[12][2];
+       uint64_t *gcm_Htable;
+       size_t gcm_htab_len;
 #endif
        uint64_t gcm_J0[2];
        uint64_t gcm_len_a_len_c[2];
index 96fb6bb1af30e08b803ed7407608a2c2fd0c9230..e540af4473f730de16bbe0b01ef12d156f02a412 100644 (file)
@@ -1051,6 +1051,16 @@ out:
                bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
                kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
        }
+#ifdef CAN_USE_GCM_ASM
+       if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE) &&
+           ((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) {
+
+               gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx;
+
+               bzero(ctx->gcm_Htable, ctx->gcm_htab_len);
+               kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len);
+       }
+#endif
 
        return (ret);
 }
@@ -1209,6 +1219,14 @@ out:
                        vmem_free(((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf,
                            ((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf_len);
                }
+#ifdef CAN_USE_GCM_ASM
+               if (((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) {
+                       gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx;
+
+                       bzero(ctx->gcm_Htable, ctx->gcm_htab_len);
+                       kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len);
+               }
+#endif
        }
 
        return (ret);