module/icp/algs/sha2/sha2.c

   1 /*
   2  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
   3  * Use is subject to license terms.
   4  */
   5 /*
   6  * Copyright 2013 Saso Kiselkov.  All rights reserved.
   7  */
   8
   9 /*
  10  * The basic framework for this code came from the reference
  11  * implementation for MD5.  That implementation is Copyright (C)
  12  * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  13  *
  14  * License to copy and use this software is granted provided that it
  15  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  16  * Algorithm" in all material mentioning or referencing this software
  17  * or this function.
  18  *
  19  * License is also granted to make and use derivative works provided
  20  * that such works are identified as "derived from the RSA Data
  21  * Security, Inc. MD5 Message-Digest Algorithm" in all material
  22  * mentioning or referencing the derived work.
  23  *
  24  * RSA Data Security, Inc. makes no representations concerning either
  25  * the merchantability of this software or the suitability of this
  26  * software for any particular purpose. It is provided "as is"
  27  * without express or implied warranty of any kind.
  28  *
  29  * These notices must be retained in any copies of any part of this
  30  * documentation and/or software.
  31  *
  32  * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
  33  * standard, available at
  34  * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
  35  * Not as fast as one would like -- further optimizations are encouraged
  36  * and appreciated.
  37  */
  38
  39 #include <sys/zfs_context.h>
  40 #define _SHA2_IMPL
  41 #include <sys/sha2.h>
  42 #include <sha2/sha2_consts.h>
  43
  44 #define _RESTRICT_KYWD
  45
  46 #ifdef _LITTLE_ENDIAN
  47 #include <sys/byteorder.h>
  48 #define HAVE_HTONL
  49 #endif
  50 #include <sys/isa_defs.h>       /* for _ILP32 */
  51
  52 static void Encode(uint8_t *, uint32_t *, size_t);
  53 static void Encode64(uint8_t *, uint64_t *, size_t);
  54
  55 /* userspace only supports the generic version */
  56 #if     defined(__amd64) && defined(_KERNEL)
  57 #define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
  58 #define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
  59
  60 void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
  61 void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
  62
  63 #else
  64 static void SHA256Transform(SHA2_CTX *, const uint8_t *);
  65 static void SHA512Transform(SHA2_CTX *, const uint8_t *);
  66 #endif  /* __amd64 && _KERNEL */
  67
  68 static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
  69
  70 /*
  71  * The low-level checksum routines use a lot of stack space. On systems where
  72  * small stacks are enforced (like 32-bit kernel builds), insert compiler memory
  73  * barriers to reduce stack frame size. This can reduce the SHA512Transform()
  74  * stack frame usage from 3k to <1k on ARM32, for example.
  75  */
  76 #if defined(_ILP32) || defined(__powerpc)       /* small stack */
  77 #define SMALL_STACK_MEMORY_BARRIER      asm volatile("": : :"memory");
  78 #else
  79 #define SMALL_STACK_MEMORY_BARRIER
  80 #endif
  81
  82 /* Ch and Maj are the basic SHA2 functions. */
  83 #define Ch(b, c, d)     (((b) & (c)) ^ ((~b) & (d)))
  84 #define Maj(b, c, d)    (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
  85
  86 /* Rotates x right n bits. */
  87 #define ROTR(x, n)      \
  88         (((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
  89
  90 /* Shift x right n bits */
  91 #define SHR(x, n)       ((x) >> (n))
  92
  93 /* SHA256 Functions */
  94 #define BIGSIGMA0_256(x)        (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
  95 #define BIGSIGMA1_256(x)        (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
  96 #define SIGMA0_256(x)           (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
  97 #define SIGMA1_256(x)           (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
  98
  99 #define SHA256ROUND(a, b, c, d, e, f, g, h, i, w)                       \
 100         T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;  \
 101         d += T1;                                                        \
 102         T2 = BIGSIGMA0_256(a) + Maj(a, b, c);                           \
 103         h = T1 + T2
 104
 105 /* SHA384/512 Functions */
 106 #define BIGSIGMA0(x)    (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
 107 #define BIGSIGMA1(x)    (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
 108 #define SIGMA0(x)       (ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
 109 #define SIGMA1(x)       (ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
 110 #define SHA512ROUND(a, b, c, d, e, f, g, h, i, w)                       \
 111         T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;      \
 112         d += T1;                                                        \
 113         T2 = BIGSIGMA0(a) + Maj(a, b, c);                               \
 114         h = T1 + T2;                                                    \
 115         SMALL_STACK_MEMORY_BARRIER;
 116
 117 /*
 118  * sparc optimization:
 119  *
 120  * on the sparc, we can load big endian 32-bit data easily.  note that
 121  * special care must be taken to ensure the address is 32-bit aligned.
 122  * in the interest of speed, we don't check to make sure, since
 123  * careful programming can guarantee this for us.
 124  */
 125
 126 #if     defined(_BIG_ENDIAN)
 127 #define LOAD_BIG_32(addr)       (*(uint32_t *)(addr))
 128 #define LOAD_BIG_64(addr)       (*(uint64_t *)(addr))
 129
 130 #elif   defined(HAVE_HTONL)
 131 #define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
 132 #define LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
 133
 134 #else
 135 /* little endian -- will work on big endian, but slowly */
 136 #define LOAD_BIG_32(addr)       \
 137         (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
 138 #define LOAD_BIG_64(addr)       \
 139         (((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |    \
 140             ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \
 141             ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \
 142             ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
 143 #endif  /* _BIG_ENDIAN */
 144
 145
 146 #if     !defined(__amd64) || !defined(_KERNEL)
 147 /* SHA256 Transform */
 148
 149 static void
 150 SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
 151 {
 152         uint32_t a = ctx->state.s32[0];
 153         uint32_t b = ctx->state.s32[1];
 154         uint32_t c = ctx->state.s32[2];
 155         uint32_t d = ctx->state.s32[3];
 156         uint32_t e = ctx->state.s32[4];
 157         uint32_t f = ctx->state.s32[5];
 158         uint32_t g = ctx->state.s32[6];
 159         uint32_t h = ctx->state.s32[7];
 160
 161         uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
 162         uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
 163         uint32_t T1, T2;
 164
 165 #if     defined(__sparc)
 166         static const uint32_t sha256_consts[] = {
 167                 SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
 168                 SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
 169                 SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
 170                 SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
 171                 SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
 172                 SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
 173                 SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
 174                 SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
 175                 SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
 176                 SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
 177                 SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
 178                 SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
 179                 SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
 180                 SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
 181                 SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
 182                 SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
 183                 SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
 184                 SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
 185                 SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
 186                 SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
 187                 SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
 188                 SHA256_CONST_63
 189         };
 190 #endif  /* __sparc */
 191
 192         if ((uintptr_t)blk & 0x3) {             /* not 4-byte aligned? */
 193                 bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
 194                 blk = (uint8_t *)ctx->buf_un.buf32;
 195         }
 196
 197         /* LINTED E_BAD_PTR_CAST_ALIGN */
 198         w0 =  LOAD_BIG_32(blk + 4 * 0);
 199         SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
 200         /* LINTED E_BAD_PTR_CAST_ALIGN */
 201         w1 =  LOAD_BIG_32(blk + 4 * 1);
 202         SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
 203         /* LINTED E_BAD_PTR_CAST_ALIGN */
 204         w2 =  LOAD_BIG_32(blk + 4 * 2);
 205         SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
 206         /* LINTED E_BAD_PTR_CAST_ALIGN */
 207         w3 =  LOAD_BIG_32(blk + 4 * 3);
 208         SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
 209         /* LINTED E_BAD_PTR_CAST_ALIGN */
 210         w4 =  LOAD_BIG_32(blk + 4 * 4);
 211         SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
 212         /* LINTED E_BAD_PTR_CAST_ALIGN */
 213         w5 =  LOAD_BIG_32(blk + 4 * 5);
 214         SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
 215         /* LINTED E_BAD_PTR_CAST_ALIGN */
 216         w6 =  LOAD_BIG_32(blk + 4 * 6);
 217         SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
 218         /* LINTED E_BAD_PTR_CAST_ALIGN */
 219         w7 =  LOAD_BIG_32(blk + 4 * 7);
 220         SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
 221         /* LINTED E_BAD_PTR_CAST_ALIGN */
 222         w8 =  LOAD_BIG_32(blk + 4 * 8);
 223         SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
 224         /* LINTED E_BAD_PTR_CAST_ALIGN */
 225         w9 =  LOAD_BIG_32(blk + 4 * 9);
 226         SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
 227         /* LINTED E_BAD_PTR_CAST_ALIGN */
 228         w10 =  LOAD_BIG_32(blk + 4 * 10);
 229         SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
 230         /* LINTED E_BAD_PTR_CAST_ALIGN */
 231         w11 =  LOAD_BIG_32(blk + 4 * 11);
 232         SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
 233         /* LINTED E_BAD_PTR_CAST_ALIGN */
 234         w12 =  LOAD_BIG_32(blk + 4 * 12);
 235         SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
 236         /* LINTED E_BAD_PTR_CAST_ALIGN */
 237         w13 =  LOAD_BIG_32(blk + 4 * 13);
 238         SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
 239         /* LINTED E_BAD_PTR_CAST_ALIGN */
 240         w14 =  LOAD_BIG_32(blk + 4 * 14);
 241         SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
 242         /* LINTED E_BAD_PTR_CAST_ALIGN */
 243         w15 =  LOAD_BIG_32(blk + 4 * 15);
 244         SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
 245
 246         w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
 247         SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
 248         w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
 249         SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
 250         w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
 251         SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
 252         w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
 253         SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
 254         w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
 255         SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
 256         w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
 257         SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
 258         w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
 259         SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
 260         w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
 261         SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
 262         w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
 263         SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
 264         w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
 265         SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
 266         w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
 267         SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
 268         w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
 269         SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
 270         w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
 271         SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
 272         w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
 273         SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
 274         w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
 275         SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
 276         w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
 277         SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
 278
 279         w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
 280         SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
 281         w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
 282         SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
 283         w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
 284         SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
 285         w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
 286         SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
 287         w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
 288         SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
 289         w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
 290         SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
 291         w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
 292         SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
 293         w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
 294         SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
 295         w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
 296         SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
 297         w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
 298         SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
 299         w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
 300         SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
 301         w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
 302         SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
 303         w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
 304         SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
 305         w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
 306         SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
 307         w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
 308         SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
 309         w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
 310         SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
 311
 312         w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
 313         SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
 314         w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
 315         SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
 316         w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
 317         SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
 318         w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
 319         SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
 320         w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
 321         SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
 322         w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
 323         SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
 324         w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
 325         SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
 326         w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
 327         SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
 328         w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
 329         SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
 330         w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
 331         SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
 332         w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
 333         SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
 334         w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
 335         SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
 336         w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
 337         SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
 338         w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
 339         SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
 340         w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
 341         SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
 342         w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
 343         SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
 344
 345         ctx->state.s32[0] += a;
 346         ctx->state.s32[1] += b;
 347         ctx->state.s32[2] += c;
 348         ctx->state.s32[3] += d;
 349         ctx->state.s32[4] += e;
 350         ctx->state.s32[5] += f;
 351         ctx->state.s32[6] += g;
 352         ctx->state.s32[7] += h;
 353 }
 354
 355
 356 /* SHA384 and SHA512 Transform */
 357
 358 static void
 359 SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
 360 {
 361
 362         uint64_t a = ctx->state.s64[0];
 363         uint64_t b = ctx->state.s64[1];
 364         uint64_t c = ctx->state.s64[2];
 365         uint64_t d = ctx->state.s64[3];
 366         uint64_t e = ctx->state.s64[4];
 367         uint64_t f = ctx->state.s64[5];
 368         uint64_t g = ctx->state.s64[6];
 369         uint64_t h = ctx->state.s64[7];
 370
 371         uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
 372         uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
 373         uint64_t T1, T2;
 374
 375 #if     defined(__sparc)
 376         static const uint64_t sha512_consts[] = {
 377                 SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
 378                 SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
 379                 SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
 380                 SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
 381                 SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
 382                 SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
 383                 SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
 384                 SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
 385                 SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
 386                 SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
 387                 SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
 388                 SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
 389                 SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
 390                 SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
 391                 SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
 392                 SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
 393                 SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
 394                 SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
 395                 SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
 396                 SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
 397                 SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
 398                 SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
 399                 SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
 400                 SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
 401                 SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
 402                 SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
 403                 SHA512_CONST_78, SHA512_CONST_79
 404         };
 405 #endif  /* __sparc */
 406
 407
 408         if ((uintptr_t)blk & 0x7) {             /* not 8-byte aligned? */
 409                 bcopy(blk, ctx->buf_un.buf64,  sizeof (ctx->buf_un.buf64));
 410                 blk = (uint8_t *)ctx->buf_un.buf64;
 411         }
 412
 413         /* LINTED E_BAD_PTR_CAST_ALIGN */
 414         w0 =  LOAD_BIG_64(blk + 8 * 0);
 415         SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
 416         /* LINTED E_BAD_PTR_CAST_ALIGN */
 417         w1 =  LOAD_BIG_64(blk + 8 * 1);
 418         SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
 419         /* LINTED E_BAD_PTR_CAST_ALIGN */
 420         w2 =  LOAD_BIG_64(blk + 8 * 2);
 421         SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
 422         /* LINTED E_BAD_PTR_CAST_ALIGN */
 423         w3 =  LOAD_BIG_64(blk + 8 * 3);
 424         SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
 425         /* LINTED E_BAD_PTR_CAST_ALIGN */
 426         w4 =  LOAD_BIG_64(blk + 8 * 4);
 427         SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
 428         /* LINTED E_BAD_PTR_CAST_ALIGN */
 429         w5 =  LOAD_BIG_64(blk + 8 * 5);
 430         SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
 431         /* LINTED E_BAD_PTR_CAST_ALIGN */
 432         w6 =  LOAD_BIG_64(blk + 8 * 6);
 433         SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
 434         /* LINTED E_BAD_PTR_CAST_ALIGN */
 435         w7 =  LOAD_BIG_64(blk + 8 * 7);
 436         SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
 437         /* LINTED E_BAD_PTR_CAST_ALIGN */
 438         w8 =  LOAD_BIG_64(blk + 8 * 8);
 439         SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
 440         /* LINTED E_BAD_PTR_CAST_ALIGN */
 441         w9 =  LOAD_BIG_64(blk + 8 * 9);
 442         SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
 443         /* LINTED E_BAD_PTR_CAST_ALIGN */
 444         w10 =  LOAD_BIG_64(blk + 8 * 10);
 445         SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
 446         /* LINTED E_BAD_PTR_CAST_ALIGN */
 447         w11 =  LOAD_BIG_64(blk + 8 * 11);
 448         SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
 449         /* LINTED E_BAD_PTR_CAST_ALIGN */
 450         w12 =  LOAD_BIG_64(blk + 8 * 12);
 451         SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
 452         /* LINTED E_BAD_PTR_CAST_ALIGN */
 453         w13 =  LOAD_BIG_64(blk + 8 * 13);
 454         SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
 455         /* LINTED E_BAD_PTR_CAST_ALIGN */
 456         w14 =  LOAD_BIG_64(blk + 8 * 14);
 457         SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
 458         /* LINTED E_BAD_PTR_CAST_ALIGN */
 459         w15 =  LOAD_BIG_64(blk + 8 * 15);
 460         SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
 461
 462         w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 463         SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
 464         w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 465         SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
 466         w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 467         SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
 468         w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 469         SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
 470         w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 471         SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
 472         w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 473         SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
 474         w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 475         SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
 476         w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 477         SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
 478         w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 479         SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
 480         w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 481         SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
 482         w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 483         SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
 484         w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 485         SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
 486         w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 487         SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
 488         w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 489         SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
 490         w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 491         SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
 492         w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 493         SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
 494
 495         w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 496         SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
 497         w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 498         SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
 499         w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 500         SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
 501         w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 502         SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
 503         w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 504         SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
 505         w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 506         SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
 507         w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 508         SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
 509         w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 510         SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
 511         w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 512         SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
 513         w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 514         SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
 515         w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 516         SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
 517         w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 518         SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
 519         w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 520         SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
 521         w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 522         SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
 523         w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 524         SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
 525         w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 526         SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
 527
 528         w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 529         SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
 530         w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 531         SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
 532         w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 533         SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
 534         w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 535         SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
 536         w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 537         SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
 538         w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 539         SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
 540         w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 541         SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
 542         w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 543         SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
 544         w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 545         SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
 546         w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 547         SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
 548         w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 549         SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
 550         w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 551         SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
 552         w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 553         SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
 554         w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 555         SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
 556         w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 557         SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
 558         w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 559         SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
 560
 561         w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 562         SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
 563         w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 564         SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
 565         w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 566         SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
 567         w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 568         SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
 569         w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 570         SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
 571         w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 572         SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
 573         w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 574         SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
 575         w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 576         SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
 577         w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 578         SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
 579         w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 580         SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
 581         w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 582         SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
 583         w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 584         SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
 585         w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 586         SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
 587         w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 588         SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
 589         w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 590         SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
 591         w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 592         SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
 593
 594         ctx->state.s64[0] += a;
 595         ctx->state.s64[1] += b;
 596         ctx->state.s64[2] += c;
 597         ctx->state.s64[3] += d;
 598         ctx->state.s64[4] += e;
 599         ctx->state.s64[5] += f;
 600         ctx->state.s64[6] += g;
 601         ctx->state.s64[7] += h;
 602
 603 }
 604 #endif  /* !__amd64 || !_KERNEL */
 605
 606
 607 /*
 608  * Encode()
 609  *
 610  * purpose: to convert a list of numbers from little endian to big endian
 611  *   input: uint8_t *   : place to store the converted big endian numbers
 612  *          uint32_t *  : place to get numbers to convert from
 613  *          size_t      : the length of the input in bytes
 614  *  output: void
 615  */
 616
 617 static void
 618 Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
 619     size_t len)
 620 {
 621         size_t          i, j;
 622
 623 #if     defined(__sparc)
 624         if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
 625                 for (i = 0, j = 0; j < len; i++, j += 4) {
 626                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 627                         *((uint32_t *)(output + j)) = input[i];
 628                 }
 629         } else {
 630 #endif  /* little endian -- will work on big endian, but slowly */
 631                 for (i = 0, j = 0; j < len; i++, j += 4) {
 632                         output[j]       = (input[i] >> 24) & 0xff;
 633                         output[j + 1]   = (input[i] >> 16) & 0xff;
 634                         output[j + 2]   = (input[i] >>  8) & 0xff;
 635                         output[j + 3]   = input[i] & 0xff;
 636                 }
 637 #if     defined(__sparc)
 638         }
 639 #endif
 640 }
 641
 642 static void
 643 Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
 644     size_t len)
 645 {
 646         size_t          i, j;
 647
 648 #if     defined(__sparc)
 649         if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
 650                 for (i = 0, j = 0; j < len; i++, j += 8) {
 651                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 652                         *((uint64_t *)(output + j)) = input[i];
 653                 }
 654         } else {
 655 #endif  /* little endian -- will work on big endian, but slowly */
 656                 for (i = 0, j = 0; j < len; i++, j += 8) {
 657
 658                         output[j]       = (input[i] >> 56) & 0xff;
 659                         output[j + 1]   = (input[i] >> 48) & 0xff;
 660                         output[j + 2]   = (input[i] >> 40) & 0xff;
 661                         output[j + 3]   = (input[i] >> 32) & 0xff;
 662                         output[j + 4]   = (input[i] >> 24) & 0xff;
 663                         output[j + 5]   = (input[i] >> 16) & 0xff;
 664                         output[j + 6]   = (input[i] >>  8) & 0xff;
 665                         output[j + 7]   = input[i] & 0xff;
 666                 }
 667 #if     defined(__sparc)
 668         }
 669 #endif
 670 }
 671
 672
 673 void
 674 SHA2Init(uint64_t mech, SHA2_CTX *ctx)
 675 {
 676
 677         switch (mech) {
 678         case SHA256_MECH_INFO_TYPE:
 679         case SHA256_HMAC_MECH_INFO_TYPE:
 680         case SHA256_HMAC_GEN_MECH_INFO_TYPE:
 681                 ctx->state.s32[0] = 0x6a09e667U;
 682                 ctx->state.s32[1] = 0xbb67ae85U;
 683                 ctx->state.s32[2] = 0x3c6ef372U;
 684                 ctx->state.s32[3] = 0xa54ff53aU;
 685                 ctx->state.s32[4] = 0x510e527fU;
 686                 ctx->state.s32[5] = 0x9b05688cU;
 687                 ctx->state.s32[6] = 0x1f83d9abU;
 688                 ctx->state.s32[7] = 0x5be0cd19U;
 689                 break;
 690         case SHA384_MECH_INFO_TYPE:
 691         case SHA384_HMAC_MECH_INFO_TYPE:
 692         case SHA384_HMAC_GEN_MECH_INFO_TYPE:
 693                 ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
 694                 ctx->state.s64[1] = 0x629a292a367cd507ULL;
 695                 ctx->state.s64[2] = 0x9159015a3070dd17ULL;
 696                 ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
 697                 ctx->state.s64[4] = 0x67332667ffc00b31ULL;
 698                 ctx->state.s64[5] = 0x8eb44a8768581511ULL;
 699                 ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
 700                 ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
 701                 break;
 702         case SHA512_MECH_INFO_TYPE:
 703         case SHA512_HMAC_MECH_INFO_TYPE:
 704         case SHA512_HMAC_GEN_MECH_INFO_TYPE:
 705                 ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
 706                 ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
 707                 ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
 708                 ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
 709                 ctx->state.s64[4] = 0x510e527fade682d1ULL;
 710                 ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
 711                 ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
 712                 ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
 713                 break;
 714         case SHA512_224_MECH_INFO_TYPE:
 715                 ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
 716                 ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
 717                 ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
 718                 ctx->state.s64[3] = 0x679DD514582F9FCFULL;
 719                 ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
 720                 ctx->state.s64[5] = 0x77E36F7304C48942ULL;
 721                 ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
 722                 ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
 723                 break;
 724         case SHA512_256_MECH_INFO_TYPE:
 725                 ctx->state.s64[0] = 0x22312194FC2BF72CULL;
 726                 ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
 727                 ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
 728                 ctx->state.s64[3] = 0x963877195940EABDULL;
 729                 ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
 730                 ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
 731                 ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
 732                 ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
 733                 break;
 734 #ifdef _KERNEL
 735         default:
 736                 cmn_err(CE_PANIC,
 737                     "sha2_init: failed to find a supported algorithm: 0x%x",
 738                     (uint32_t)mech);
 739
 740 #endif /* _KERNEL */
 741         }
 742
 743         ctx->algotype = (uint32_t)mech;
 744         ctx->count.c64[0] = ctx->count.c64[1] = 0;
 745 }
 746
 747 #ifndef _KERNEL
 748
 749 // #pragma inline(SHA256Init, SHA384Init, SHA512Init)
 750 void
 751 SHA256Init(SHA256_CTX *ctx)
 752 {
 753         SHA2Init(SHA256, ctx);
 754 }
 755
 756 void
 757 SHA384Init(SHA384_CTX *ctx)
 758 {
 759         SHA2Init(SHA384, ctx);
 760 }
 761
 762 void
 763 SHA512Init(SHA512_CTX *ctx)
 764 {
 765         SHA2Init(SHA512, ctx);
 766 }
 767
 768 #endif /* _KERNEL */
 769
 770 /*
 771  * SHA2Update()
 772  *
 773  * purpose: continues an sha2 digest operation, using the message block
 774  *          to update the context.
 775  *   input: SHA2_CTX *  : the context to update
 776  *          void *      : the message block
 777  *          size_t      : the length of the message block, in bytes
 778  *  output: void
 779  */
 780
 781 void
 782 SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
 783 {
 784         uint32_t        i, buf_index, buf_len, buf_limit;
 785         const uint8_t   *input = inptr;
 786         uint32_t        algotype = ctx->algotype;
 787
 788         /* check for noop */
 789         if (input_len == 0)
 790                 return;
 791
 792         if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 793                 buf_limit = 64;
 794
 795                 /* compute number of bytes mod 64 */
 796                 buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
 797
 798                 /* update number of bits */
 799                 if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
 800                         ctx->count.c32[0]++;
 801
 802                 ctx->count.c32[0] += (input_len >> 29);
 803
 804         } else {
 805                 buf_limit = 128;
 806
 807                 /* compute number of bytes mod 128 */
 808                 buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
 809
 810                 /* update number of bits */
 811                 if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
 812                         ctx->count.c64[0]++;
 813
 814                 ctx->count.c64[0] += (input_len >> 29);
 815         }
 816
 817         buf_len = buf_limit - buf_index;
 818
 819         /* transform as many times as possible */
 820         i = 0;
 821         if (input_len >= buf_len) {
 822
 823                 /*
 824                  * general optimization:
 825                  *
 826                  * only do initial bcopy() and SHA2Transform() if
 827                  * buf_index != 0.  if buf_index == 0, we're just
 828                  * wasting our time doing the bcopy() since there
 829                  * wasn't any data left over from a previous call to
 830                  * SHA2Update().
 831                  */
 832                 if (buf_index) {
 833                         bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 834                         if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
 835                                 SHA256Transform(ctx, ctx->buf_un.buf8);
 836                         else
 837                                 SHA512Transform(ctx, ctx->buf_un.buf8);
 838
 839                         i = buf_len;
 840                 }
 841
 842 #if !defined(__amd64) || !defined(_KERNEL)
 843                 if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 844                         for (; i + buf_limit - 1 < input_len; i += buf_limit) {
 845                                 SHA256Transform(ctx, &input[i]);
 846                         }
 847                 } else {
 848                         for (; i + buf_limit - 1 < input_len; i += buf_limit) {
 849                                 SHA512Transform(ctx, &input[i]);
 850                         }
 851                 }
 852
 853 #else
 854                 uint32_t block_count;
 855                 if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 856                         block_count = (input_len - i) >> 6;
 857                         if (block_count > 0) {
 858                                 SHA256TransformBlocks(ctx, &input[i],
 859                                     block_count);
 860                                 i += block_count << 6;
 861                         }
 862                 } else {
 863                         block_count = (input_len - i) >> 7;
 864                         if (block_count > 0) {
 865                                 SHA512TransformBlocks(ctx, &input[i],
 866                                     block_count);
 867                                 i += block_count << 7;
 868                         }
 869                 }
 870 #endif  /* !__amd64 || !_KERNEL */
 871
 872                 /*
 873                  * general optimization:
 874                  *
 875                  * if i and input_len are the same, return now instead
 876                  * of calling bcopy(), since the bcopy() in this case
 877                  * will be an expensive noop.
 878                  */
 879
 880                 if (input_len == i)
 881                         return;
 882
 883                 buf_index = 0;
 884         }
 885
 886         /* buffer remaining input */
 887         bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 888 }
 889
 890
 891 /*
 892  * SHA2Final()
 893  *
 894  * purpose: ends an sha2 digest operation, finalizing the message digest and
 895  *          zeroing the context.
 896  *   input: uchar_t *   : a buffer to store the digest
 897  *                      : The function actually uses void* because many
 898  *                      : callers pass things other than uchar_t here.
 899  *          SHA2_CTX *  : the context to finalize, save, and zero
 900  *  output: void
 901  */
 902
 903 void
 904 SHA2Final(void *digest, SHA2_CTX *ctx)
 905 {
 906         uint8_t         bitcount_be[sizeof (ctx->count.c32)];
 907         uint8_t         bitcount_be64[sizeof (ctx->count.c64)];
 908         uint32_t        index;
 909         uint32_t        algotype = ctx->algotype;
 910
 911         if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 912                 index  = (ctx->count.c32[1] >> 3) & 0x3f;
 913                 Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
 914                 SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
 915                 SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
 916                 Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
 917         } else {
 918                 index  = (ctx->count.c64[1] >> 3) & 0x7f;
 919                 Encode64(bitcount_be64, ctx->count.c64,
 920                     sizeof (bitcount_be64));
 921                 SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
 922                 SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
 923                 if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
 924                         ctx->state.s64[6] = ctx->state.s64[7] = 0;
 925                         Encode64(digest, ctx->state.s64,
 926                             sizeof (uint64_t) * 6);
 927                 } else if (algotype == SHA512_224_MECH_INFO_TYPE) {
 928                         uint8_t last[sizeof (uint64_t)];
 929                         /*
 930                          * Since SHA-512/224 doesn't align well to 64-bit
 931                          * boundaries, we must do the encoding in three steps:
 932                          * 1) encode the three 64-bit words that fit neatly
 933                          * 2) encode the last 64-bit word to a temp buffer
 934                          * 3) chop out the lower 32-bits from the temp buffer
 935                          *    and append them to the digest
 936                          */
 937                         Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
 938                         Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
 939                         bcopy(last, (uint8_t *)digest + 24, 4);
 940                 } else if (algotype == SHA512_256_MECH_INFO_TYPE) {
 941                         Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
 942                 } else {
 943                         Encode64(digest, ctx->state.s64,
 944                             sizeof (ctx->state.s64));
 945                 }
 946         }
 947
 948         /* zeroize sensitive information */
 949         bzero(ctx, sizeof (*ctx));
 950 }
 951
 952 #ifdef _KERNEL
 953 EXPORT_SYMBOL(SHA2Init);
 954 EXPORT_SYMBOL(SHA2Update);
 955 EXPORT_SYMBOL(SHA2Final);
 956 #endif