module/icp/algs/sha2/sha2.c

   1 /*
   2  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
   3  * Use is subject to license terms.
   4  */
   5 /*
   6  * Copyright 2013 Saso Kiselkov.  All rights reserved.
   7  */
   8
   9 /*
  10  * The basic framework for this code came from the reference
  11  * implementation for MD5.  That implementation is Copyright (C)
  12  * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  13  *
  14  * License to copy and use this software is granted provided that it
  15  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  16  * Algorithm" in all material mentioning or referencing this software
  17  * or this function.
  18  *
  19  * License is also granted to make and use derivative works provided
  20  * that such works are identified as "derived from the RSA Data
  21  * Security, Inc. MD5 Message-Digest Algorithm" in all material
  22  * mentioning or referencing the derived work.
  23  *
  24  * RSA Data Security, Inc. makes no representations concerning either
  25  * the merchantability of this software or the suitability of this
  26  * software for any particular purpose. It is provided "as is"
  27  * without express or implied warranty of any kind.
  28  *
  29  * These notices must be retained in any copies of any part of this
  30  * documentation and/or software.
  31  *
  32  * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
  33  * standard, available at
  34  * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
  35  * Not as fast as one would like -- further optimizations are encouraged
  36  * and appreciated.
  37  */
  38
  39 #include <sys/zfs_context.h>
  40 #define _SHA2_IMPL
  41 #include <sys/sha2.h>
  42 #include <sha2/sha2_consts.h>
  43
  44 #define _RESTRICT_KYWD
  45
  46 #ifdef _LITTLE_ENDIAN
  47 #include <sys/byteorder.h>
  48 #define HAVE_HTONL
  49 #endif
  50 #include <sys/isa_defs.h>       /* for _ILP32 */
  51
  52 static void Encode(uint8_t *, uint32_t *, size_t);
  53 static void Encode64(uint8_t *, uint64_t *, size_t);
  54
  55 #if     defined(__amd64)
  56 #define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
  57 #define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
  58
  59 void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
  60 void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
  61
  62 #else
  63 static void SHA256Transform(SHA2_CTX *, const uint8_t *);
  64 static void SHA512Transform(SHA2_CTX *, const uint8_t *);
  65 #endif  /* __amd64 */
  66
  67 static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
  68
  69 /*
  70  * The low-level checksum routines use a lot of stack space. On systems where
  71  * small stacks are enforced (like 32-bit kernel builds), insert compiler memory
  72  * barriers to reduce stack frame size. This can reduce the SHA512Transform()
  73  * stack frame usage from 3k to <1k on ARM32, for example.
  74  */
  75 #if defined(_ILP32) || defined(__powerpc)       /* small stack */
  76 #define SMALL_STACK_MEMORY_BARRIER      asm volatile("": : :"memory");
  77 #else
  78 #define SMALL_STACK_MEMORY_BARRIER
  79 #endif
  80
  81 /* Ch and Maj are the basic SHA2 functions. */
  82 #define Ch(b, c, d)     (((b) & (c)) ^ ((~b) & (d)))
  83 #define Maj(b, c, d)    (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
  84
  85 /* Rotates x right n bits. */
  86 #define ROTR(x, n)      \
  87         (((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
  88
  89 /* Shift x right n bits */
  90 #define SHR(x, n)       ((x) >> (n))
  91
  92 /* SHA256 Functions */
  93 #define BIGSIGMA0_256(x)        (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
  94 #define BIGSIGMA1_256(x)        (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
  95 #define SIGMA0_256(x)           (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
  96 #define SIGMA1_256(x)           (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
  97
  98 #define SHA256ROUND(a, b, c, d, e, f, g, h, i, w)                       \
  99         T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;  \
 100         d += T1;                                                        \
 101         T2 = BIGSIGMA0_256(a) + Maj(a, b, c);                           \
 102         h = T1 + T2
 103
 104 /* SHA384/512 Functions */
 105 #define BIGSIGMA0(x)    (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
 106 #define BIGSIGMA1(x)    (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
 107 #define SIGMA0(x)       (ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
 108 #define SIGMA1(x)       (ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
 109 #define SHA512ROUND(a, b, c, d, e, f, g, h, i, w)                       \
 110         T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;      \
 111         d += T1;                                                        \
 112         T2 = BIGSIGMA0(a) + Maj(a, b, c);                               \
 113         h = T1 + T2;                                                    \
 114         SMALL_STACK_MEMORY_BARRIER;
 115
 116 /*
 117  * sparc optimization:
 118  *
 119  * on the sparc, we can load big endian 32-bit data easily.  note that
 120  * special care must be taken to ensure the address is 32-bit aligned.
 121  * in the interest of speed, we don't check to make sure, since
 122  * careful programming can guarantee this for us.
 123  */
 124
 125 #if     defined(_BIG_ENDIAN)
 126 #define LOAD_BIG_32(addr)       (*(uint32_t *)(addr))
 127 #define LOAD_BIG_64(addr)       (*(uint64_t *)(addr))
 128
 129 #elif   defined(HAVE_HTONL)
 130 #define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
 131 #define LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
 132
 133 #else
 134 /* little endian -- will work on big endian, but slowly */
 135 #define LOAD_BIG_32(addr)       \
 136         (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
 137 #define LOAD_BIG_64(addr)       \
 138         (((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |    \
 139             ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \
 140             ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \
 141             ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
 142 #endif  /* _BIG_ENDIAN */
 143
 144
 145 #if     !defined(__amd64)
 146 /* SHA256 Transform */
 147
 148 static void
 149 SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
 150 {
 151         uint32_t a = ctx->state.s32[0];
 152         uint32_t b = ctx->state.s32[1];
 153         uint32_t c = ctx->state.s32[2];
 154         uint32_t d = ctx->state.s32[3];
 155         uint32_t e = ctx->state.s32[4];
 156         uint32_t f = ctx->state.s32[5];
 157         uint32_t g = ctx->state.s32[6];
 158         uint32_t h = ctx->state.s32[7];
 159
 160         uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
 161         uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
 162         uint32_t T1, T2;
 163
 164 #if     defined(__sparc)
 165         static const uint32_t sha256_consts[] = {
 166                 SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
 167                 SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
 168                 SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
 169                 SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
 170                 SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
 171                 SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
 172                 SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
 173                 SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
 174                 SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
 175                 SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
 176                 SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
 177                 SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
 178                 SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
 179                 SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
 180                 SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
 181                 SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
 182                 SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
 183                 SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
 184                 SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
 185                 SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
 186                 SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
 187                 SHA256_CONST_63
 188         };
 189 #endif  /* __sparc */
 190
 191         if ((uintptr_t)blk & 0x3) {             /* not 4-byte aligned? */
 192                 bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
 193                 blk = (uint8_t *)ctx->buf_un.buf32;
 194         }
 195
 196         /* LINTED E_BAD_PTR_CAST_ALIGN */
 197         w0 =  LOAD_BIG_32(blk + 4 * 0);
 198         SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
 199         /* LINTED E_BAD_PTR_CAST_ALIGN */
 200         w1 =  LOAD_BIG_32(blk + 4 * 1);
 201         SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
 202         /* LINTED E_BAD_PTR_CAST_ALIGN */
 203         w2 =  LOAD_BIG_32(blk + 4 * 2);
 204         SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
 205         /* LINTED E_BAD_PTR_CAST_ALIGN */
 206         w3 =  LOAD_BIG_32(blk + 4 * 3);
 207         SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
 208         /* LINTED E_BAD_PTR_CAST_ALIGN */
 209         w4 =  LOAD_BIG_32(blk + 4 * 4);
 210         SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
 211         /* LINTED E_BAD_PTR_CAST_ALIGN */
 212         w5 =  LOAD_BIG_32(blk + 4 * 5);
 213         SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
 214         /* LINTED E_BAD_PTR_CAST_ALIGN */
 215         w6 =  LOAD_BIG_32(blk + 4 * 6);
 216         SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
 217         /* LINTED E_BAD_PTR_CAST_ALIGN */
 218         w7 =  LOAD_BIG_32(blk + 4 * 7);
 219         SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
 220         /* LINTED E_BAD_PTR_CAST_ALIGN */
 221         w8 =  LOAD_BIG_32(blk + 4 * 8);
 222         SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
 223         /* LINTED E_BAD_PTR_CAST_ALIGN */
 224         w9 =  LOAD_BIG_32(blk + 4 * 9);
 225         SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
 226         /* LINTED E_BAD_PTR_CAST_ALIGN */
 227         w10 =  LOAD_BIG_32(blk + 4 * 10);
 228         SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
 229         /* LINTED E_BAD_PTR_CAST_ALIGN */
 230         w11 =  LOAD_BIG_32(blk + 4 * 11);
 231         SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
 232         /* LINTED E_BAD_PTR_CAST_ALIGN */
 233         w12 =  LOAD_BIG_32(blk + 4 * 12);
 234         SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
 235         /* LINTED E_BAD_PTR_CAST_ALIGN */
 236         w13 =  LOAD_BIG_32(blk + 4 * 13);
 237         SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
 238         /* LINTED E_BAD_PTR_CAST_ALIGN */
 239         w14 =  LOAD_BIG_32(blk + 4 * 14);
 240         SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
 241         /* LINTED E_BAD_PTR_CAST_ALIGN */
 242         w15 =  LOAD_BIG_32(blk + 4 * 15);
 243         SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
 244
 245         w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
 246         SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
 247         w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
 248         SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
 249         w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
 250         SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
 251         w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
 252         SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
 253         w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
 254         SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
 255         w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
 256         SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
 257         w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
 258         SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
 259         w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
 260         SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
 261         w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
 262         SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
 263         w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
 264         SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
 265         w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
 266         SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
 267         w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
 268         SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
 269         w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
 270         SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
 271         w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
 272         SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
 273         w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
 274         SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
 275         w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
 276         SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
 277
 278         w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
 279         SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
 280         w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
 281         SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
 282         w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
 283         SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
 284         w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
 285         SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
 286         w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
 287         SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
 288         w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
 289         SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
 290         w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
 291         SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
 292         w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
 293         SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
 294         w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
 295         SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
 296         w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
 297         SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
 298         w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
 299         SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
 300         w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
 301         SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
 302         w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
 303         SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
 304         w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
 305         SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
 306         w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
 307         SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
 308         w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
 309         SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
 310
 311         w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
 312         SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
 313         w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
 314         SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
 315         w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
 316         SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
 317         w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
 318         SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
 319         w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
 320         SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
 321         w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
 322         SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
 323         w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
 324         SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
 325         w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
 326         SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
 327         w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
 328         SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
 329         w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
 330         SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
 331         w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
 332         SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
 333         w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
 334         SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
 335         w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
 336         SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
 337         w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
 338         SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
 339         w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
 340         SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
 341         w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
 342         SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
 343
 344         ctx->state.s32[0] += a;
 345         ctx->state.s32[1] += b;
 346         ctx->state.s32[2] += c;
 347         ctx->state.s32[3] += d;
 348         ctx->state.s32[4] += e;
 349         ctx->state.s32[5] += f;
 350         ctx->state.s32[6] += g;
 351         ctx->state.s32[7] += h;
 352 }
 353
 354
 355 /* SHA384 and SHA512 Transform */
 356
 357 static void
 358 SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
 359 {
 360
 361         uint64_t a = ctx->state.s64[0];
 362         uint64_t b = ctx->state.s64[1];
 363         uint64_t c = ctx->state.s64[2];
 364         uint64_t d = ctx->state.s64[3];
 365         uint64_t e = ctx->state.s64[4];
 366         uint64_t f = ctx->state.s64[5];
 367         uint64_t g = ctx->state.s64[6];
 368         uint64_t h = ctx->state.s64[7];
 369
 370         uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
 371         uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
 372         uint64_t T1, T2;
 373
 374 #if     defined(__sparc)
 375         static const uint64_t sha512_consts[] = {
 376                 SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
 377                 SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
 378                 SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
 379                 SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
 380                 SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
 381                 SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
 382                 SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
 383                 SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
 384                 SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
 385                 SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
 386                 SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
 387                 SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
 388                 SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
 389                 SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
 390                 SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
 391                 SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
 392                 SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
 393                 SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
 394                 SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
 395                 SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
 396                 SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
 397                 SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
 398                 SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
 399                 SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
 400                 SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
 401                 SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
 402                 SHA512_CONST_78, SHA512_CONST_79
 403         };
 404 #endif  /* __sparc */
 405
 406
 407         if ((uintptr_t)blk & 0x7) {             /* not 8-byte aligned? */
 408                 bcopy(blk, ctx->buf_un.buf64,  sizeof (ctx->buf_un.buf64));
 409                 blk = (uint8_t *)ctx->buf_un.buf64;
 410         }
 411
 412         /* LINTED E_BAD_PTR_CAST_ALIGN */
 413         w0 =  LOAD_BIG_64(blk + 8 * 0);
 414         SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
 415         /* LINTED E_BAD_PTR_CAST_ALIGN */
 416         w1 =  LOAD_BIG_64(blk + 8 * 1);
 417         SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
 418         /* LINTED E_BAD_PTR_CAST_ALIGN */
 419         w2 =  LOAD_BIG_64(blk + 8 * 2);
 420         SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
 421         /* LINTED E_BAD_PTR_CAST_ALIGN */
 422         w3 =  LOAD_BIG_64(blk + 8 * 3);
 423         SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
 424         /* LINTED E_BAD_PTR_CAST_ALIGN */
 425         w4 =  LOAD_BIG_64(blk + 8 * 4);
 426         SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
 427         /* LINTED E_BAD_PTR_CAST_ALIGN */
 428         w5 =  LOAD_BIG_64(blk + 8 * 5);
 429         SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
 430         /* LINTED E_BAD_PTR_CAST_ALIGN */
 431         w6 =  LOAD_BIG_64(blk + 8 * 6);
 432         SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
 433         /* LINTED E_BAD_PTR_CAST_ALIGN */
 434         w7 =  LOAD_BIG_64(blk + 8 * 7);
 435         SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
 436         /* LINTED E_BAD_PTR_CAST_ALIGN */
 437         w8 =  LOAD_BIG_64(blk + 8 * 8);
 438         SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
 439         /* LINTED E_BAD_PTR_CAST_ALIGN */
 440         w9 =  LOAD_BIG_64(blk + 8 * 9);
 441         SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
 442         /* LINTED E_BAD_PTR_CAST_ALIGN */
 443         w10 =  LOAD_BIG_64(blk + 8 * 10);
 444         SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
 445         /* LINTED E_BAD_PTR_CAST_ALIGN */
 446         w11 =  LOAD_BIG_64(blk + 8 * 11);
 447         SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
 448         /* LINTED E_BAD_PTR_CAST_ALIGN */
 449         w12 =  LOAD_BIG_64(blk + 8 * 12);
 450         SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
 451         /* LINTED E_BAD_PTR_CAST_ALIGN */
 452         w13 =  LOAD_BIG_64(blk + 8 * 13);
 453         SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
 454         /* LINTED E_BAD_PTR_CAST_ALIGN */
 455         w14 =  LOAD_BIG_64(blk + 8 * 14);
 456         SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
 457         /* LINTED E_BAD_PTR_CAST_ALIGN */
 458         w15 =  LOAD_BIG_64(blk + 8 * 15);
 459         SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
 460
 461         w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 462         SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
 463         w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 464         SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
 465         w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 466         SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
 467         w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 468         SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
 469         w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 470         SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
 471         w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 472         SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
 473         w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 474         SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
 475         w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 476         SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
 477         w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 478         SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
 479         w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 480         SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
 481         w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 482         SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
 483         w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 484         SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
 485         w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 486         SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
 487         w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 488         SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
 489         w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 490         SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
 491         w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 492         SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
 493
 494         w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 495         SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
 496         w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 497         SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
 498         w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 499         SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
 500         w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 501         SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
 502         w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 503         SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
 504         w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 505         SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
 506         w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 507         SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
 508         w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 509         SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
 510         w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 511         SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
 512         w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 513         SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
 514         w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 515         SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
 516         w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 517         SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
 518         w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 519         SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
 520         w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 521         SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
 522         w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 523         SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
 524         w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 525         SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
 526
 527         w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 528         SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
 529         w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 530         SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
 531         w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 532         SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
 533         w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 534         SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
 535         w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 536         SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
 537         w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 538         SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
 539         w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 540         SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
 541         w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 542         SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
 543         w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 544         SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
 545         w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 546         SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
 547         w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 548         SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
 549         w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 550         SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
 551         w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 552         SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
 553         w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 554         SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
 555         w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 556         SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
 557         w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 558         SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
 559
 560         w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 561         SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
 562         w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 563         SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
 564         w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 565         SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
 566         w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 567         SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
 568         w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 569         SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
 570         w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 571         SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
 572         w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 573         SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
 574         w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 575         SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
 576         w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 577         SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
 578         w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 579         SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
 580         w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 581         SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
 582         w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 583         SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
 584         w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 585         SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
 586         w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 587         SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
 588         w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 589         SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
 590         w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 591         SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
 592
 593         ctx->state.s64[0] += a;
 594         ctx->state.s64[1] += b;
 595         ctx->state.s64[2] += c;
 596         ctx->state.s64[3] += d;
 597         ctx->state.s64[4] += e;
 598         ctx->state.s64[5] += f;
 599         ctx->state.s64[6] += g;
 600         ctx->state.s64[7] += h;
 601
 602 }
 603 #endif  /* !__amd64 */
 604
 605
 606 /*
 607  * Encode()
 608  *
 609  * purpose: to convert a list of numbers from little endian to big endian
 610  *   input: uint8_t *   : place to store the converted big endian numbers
 611  *          uint32_t *  : place to get numbers to convert from
 612  *          size_t      : the length of the input in bytes
 613  *  output: void
 614  */
 615
 616 static void
 617 Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
 618     size_t len)
 619 {
 620         size_t          i, j;
 621
 622 #if     defined(__sparc)
 623         if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
 624                 for (i = 0, j = 0; j < len; i++, j += 4) {
 625                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 626                         *((uint32_t *)(output + j)) = input[i];
 627                 }
 628         } else {
 629 #endif  /* little endian -- will work on big endian, but slowly */
 630                 for (i = 0, j = 0; j < len; i++, j += 4) {
 631                         output[j]       = (input[i] >> 24) & 0xff;
 632                         output[j + 1]   = (input[i] >> 16) & 0xff;
 633                         output[j + 2]   = (input[i] >>  8) & 0xff;
 634                         output[j + 3]   = input[i] & 0xff;
 635                 }
 636 #if     defined(__sparc)
 637         }
 638 #endif
 639 }
 640
 641 static void
 642 Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
 643     size_t len)
 644 {
 645         size_t          i, j;
 646
 647 #if     defined(__sparc)
 648         if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
 649                 for (i = 0, j = 0; j < len; i++, j += 8) {
 650                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 651                         *((uint64_t *)(output + j)) = input[i];
 652                 }
 653         } else {
 654 #endif  /* little endian -- will work on big endian, but slowly */
 655                 for (i = 0, j = 0; j < len; i++, j += 8) {
 656
 657                         output[j]       = (input[i] >> 56) & 0xff;
 658                         output[j + 1]   = (input[i] >> 48) & 0xff;
 659                         output[j + 2]   = (input[i] >> 40) & 0xff;
 660                         output[j + 3]   = (input[i] >> 32) & 0xff;
 661                         output[j + 4]   = (input[i] >> 24) & 0xff;
 662                         output[j + 5]   = (input[i] >> 16) & 0xff;
 663                         output[j + 6]   = (input[i] >>  8) & 0xff;
 664                         output[j + 7]   = input[i] & 0xff;
 665                 }
 666 #if     defined(__sparc)
 667         }
 668 #endif
 669 }
 670
 671
 672 void
 673 SHA2Init(uint64_t mech, SHA2_CTX *ctx)
 674 {
 675
 676         switch (mech) {
 677         case SHA256_MECH_INFO_TYPE:
 678         case SHA256_HMAC_MECH_INFO_TYPE:
 679         case SHA256_HMAC_GEN_MECH_INFO_TYPE:
 680                 ctx->state.s32[0] = 0x6a09e667U;
 681                 ctx->state.s32[1] = 0xbb67ae85U;
 682                 ctx->state.s32[2] = 0x3c6ef372U;
 683                 ctx->state.s32[3] = 0xa54ff53aU;
 684                 ctx->state.s32[4] = 0x510e527fU;
 685                 ctx->state.s32[5] = 0x9b05688cU;
 686                 ctx->state.s32[6] = 0x1f83d9abU;
 687                 ctx->state.s32[7] = 0x5be0cd19U;
 688                 break;
 689         case SHA384_MECH_INFO_TYPE:
 690         case SHA384_HMAC_MECH_INFO_TYPE:
 691         case SHA384_HMAC_GEN_MECH_INFO_TYPE:
 692                 ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
 693                 ctx->state.s64[1] = 0x629a292a367cd507ULL;
 694                 ctx->state.s64[2] = 0x9159015a3070dd17ULL;
 695                 ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
 696                 ctx->state.s64[4] = 0x67332667ffc00b31ULL;
 697                 ctx->state.s64[5] = 0x8eb44a8768581511ULL;
 698                 ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
 699                 ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
 700                 break;
 701         case SHA512_MECH_INFO_TYPE:
 702         case SHA512_HMAC_MECH_INFO_TYPE:
 703         case SHA512_HMAC_GEN_MECH_INFO_TYPE:
 704                 ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
 705                 ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
 706                 ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
 707                 ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
 708                 ctx->state.s64[4] = 0x510e527fade682d1ULL;
 709                 ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
 710                 ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
 711                 ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
 712                 break;
 713         case SHA512_224_MECH_INFO_TYPE:
 714                 ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
 715                 ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
 716                 ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
 717                 ctx->state.s64[3] = 0x679DD514582F9FCFULL;
 718                 ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
 719                 ctx->state.s64[5] = 0x77E36F7304C48942ULL;
 720                 ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
 721                 ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
 722                 break;
 723         case SHA512_256_MECH_INFO_TYPE:
 724                 ctx->state.s64[0] = 0x22312194FC2BF72CULL;
 725                 ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
 726                 ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
 727                 ctx->state.s64[3] = 0x963877195940EABDULL;
 728                 ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
 729                 ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
 730                 ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
 731                 ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
 732                 break;
 733 #ifdef _KERNEL
 734         default:
 735                 cmn_err(CE_PANIC,
 736                     "sha2_init: failed to find a supported algorithm: 0x%x",
 737                     (uint32_t)mech);
 738
 739 #endif /* _KERNEL */
 740         }
 741
 742         ctx->algotype = (uint32_t)mech;
 743         ctx->count.c64[0] = ctx->count.c64[1] = 0;
 744 }
 745
 746 #ifndef _KERNEL
 747
 748 // #pragma inline(SHA256Init, SHA384Init, SHA512Init)
 749 void
 750 SHA256Init(SHA256_CTX *ctx)
 751 {
 752         SHA2Init(SHA256, ctx);
 753 }
 754
 755 void
 756 SHA384Init(SHA384_CTX *ctx)
 757 {
 758         SHA2Init(SHA384, ctx);
 759 }
 760
 761 void
 762 SHA512Init(SHA512_CTX *ctx)
 763 {
 764         SHA2Init(SHA512, ctx);
 765 }
 766
 767 #endif /* _KERNEL */
 768
 769 /*
 770  * SHA2Update()
 771  *
 772  * purpose: continues an sha2 digest operation, using the message block
 773  *          to update the context.
 774  *   input: SHA2_CTX *  : the context to update
 775  *          void *      : the message block
 776  *          size_t      : the length of the message block, in bytes
 777  *  output: void
 778  */
 779
 780 void
 781 SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
 782 {
 783         uint32_t        i, buf_index, buf_len, buf_limit;
 784         const uint8_t   *input = inptr;
 785         uint32_t        algotype = ctx->algotype;
 786 #if defined(__amd64)
 787         uint32_t        block_count;
 788 #endif  /* !__amd64 */
 789
 790
 791         /* check for noop */
 792         if (input_len == 0)
 793                 return;
 794
 795         if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 796                 buf_limit = 64;
 797
 798                 /* compute number of bytes mod 64 */
 799                 buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
 800
 801                 /* update number of bits */
 802                 if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
 803                         ctx->count.c32[0]++;
 804
 805                 ctx->count.c32[0] += (input_len >> 29);
 806
 807         } else {
 808                 buf_limit = 128;
 809
 810                 /* compute number of bytes mod 128 */
 811                 buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
 812
 813                 /* update number of bits */
 814                 if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
 815                         ctx->count.c64[0]++;
 816
 817                 ctx->count.c64[0] += (input_len >> 29);
 818         }
 819
 820         buf_len = buf_limit - buf_index;
 821
 822         /* transform as many times as possible */
 823         i = 0;
 824         if (input_len >= buf_len) {
 825
 826                 /*
 827                  * general optimization:
 828                  *
 829                  * only do initial bcopy() and SHA2Transform() if
 830                  * buf_index != 0.  if buf_index == 0, we're just
 831                  * wasting our time doing the bcopy() since there
 832                  * wasn't any data left over from a previous call to
 833                  * SHA2Update().
 834                  */
 835                 if (buf_index) {
 836                         bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 837                         if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
 838                                 SHA256Transform(ctx, ctx->buf_un.buf8);
 839                         else
 840                                 SHA512Transform(ctx, ctx->buf_un.buf8);
 841
 842                         i = buf_len;
 843                 }
 844
 845 #if !defined(__amd64)
 846                 if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 847                         for (; i + buf_limit - 1 < input_len; i += buf_limit) {
 848                                 SHA256Transform(ctx, &input[i]);
 849                         }
 850                 } else {
 851                         for (; i + buf_limit - 1 < input_len; i += buf_limit) {
 852                                 SHA512Transform(ctx, &input[i]);
 853                         }
 854                 }
 855
 856 #else
 857                 if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 858                         block_count = (input_len - i) >> 6;
 859                         if (block_count > 0) {
 860                                 SHA256TransformBlocks(ctx, &input[i],
 861                                     block_count);
 862                                 i += block_count << 6;
 863                         }
 864                 } else {
 865                         block_count = (input_len - i) >> 7;
 866                         if (block_count > 0) {
 867                                 SHA512TransformBlocks(ctx, &input[i],
 868                                     block_count);
 869                                 i += block_count << 7;
 870                         }
 871                 }
 872 #endif  /* !__amd64 */
 873
 874                 /*
 875                  * general optimization:
 876                  *
 877                  * if i and input_len are the same, return now instead
 878                  * of calling bcopy(), since the bcopy() in this case
 879                  * will be an expensive noop.
 880                  */
 881
 882                 if (input_len == i)
 883                         return;
 884
 885                 buf_index = 0;
 886         }
 887
 888         /* buffer remaining input */
 889         bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 890 }
 891
 892
 893 /*
 894  * SHA2Final()
 895  *
 896  * purpose: ends an sha2 digest operation, finalizing the message digest and
 897  *          zeroing the context.
 898  *   input: uchar_t *   : a buffer to store the digest
 899  *                      : The function actually uses void* because many
 900  *                      : callers pass things other than uchar_t here.
 901  *          SHA2_CTX *  : the context to finalize, save, and zero
 902  *  output: void
 903  */
 904
 905 void
 906 SHA2Final(void *digest, SHA2_CTX *ctx)
 907 {
 908         uint8_t         bitcount_be[sizeof (ctx->count.c32)];
 909         uint8_t         bitcount_be64[sizeof (ctx->count.c64)];
 910         uint32_t        index;
 911         uint32_t        algotype = ctx->algotype;
 912
 913         if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 914                 index  = (ctx->count.c32[1] >> 3) & 0x3f;
 915                 Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
 916                 SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
 917                 SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
 918                 Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
 919         } else {
 920                 index  = (ctx->count.c64[1] >> 3) & 0x7f;
 921                 Encode64(bitcount_be64, ctx->count.c64,
 922                     sizeof (bitcount_be64));
 923                 SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
 924                 SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
 925                 if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
 926                         ctx->state.s64[6] = ctx->state.s64[7] = 0;
 927                         Encode64(digest, ctx->state.s64,
 928                             sizeof (uint64_t) * 6);
 929                 } else if (algotype == SHA512_224_MECH_INFO_TYPE) {
 930                         uint8_t last[sizeof (uint64_t)];
 931                         /*
 932                          * Since SHA-512/224 doesn't align well to 64-bit
 933                          * boundaries, we must do the encoding in three steps:
 934                          * 1) encode the three 64-bit words that fit neatly
 935                          * 2) encode the last 64-bit word to a temp buffer
 936                          * 3) chop out the lower 32-bits from the temp buffer
 937                          *    and append them to the digest
 938                          */
 939                         Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
 940                         Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
 941                         bcopy(last, (uint8_t *)digest + 24, 4);
 942                 } else if (algotype == SHA512_256_MECH_INFO_TYPE) {
 943                         Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
 944                 } else {
 945                         Encode64(digest, ctx->state.s64,
 946                             sizeof (ctx->state.s64));
 947                 }
 948         }
 949
 950         /* zeroize sensitive information */
 951         bzero(ctx, sizeof (*ctx));
 952 }
 953
 954
 955
 956 #ifdef _KERNEL
 957 EXPORT_SYMBOL(SHA2Init);
 958 EXPORT_SYMBOL(SHA2Update);
 959 EXPORT_SYMBOL(SHA2Final);
 960 #endif