crypto/gf128mul.c

   1 /* gf128mul.c - GF(2^128) multiplication functions
   2  *
   3  * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
   4  * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
   5  *
   6  * Based on Dr Brian Gladman's (GPL'd) work published at
   7  * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
   8  * See the original copyright notice below.
   9  *
  10  * This program is free software; you can redistribute it and/or modify it
  11  * under the terms of the GNU General Public License as published by the Free
  12  * Software Foundation; either version 2 of the License, or (at your option)
  13  * any later version.
  14  */
  15
  16 /*
  17  ---------------------------------------------------------------------------
  18  Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
  19
  20  LICENSE TERMS
  21
  22  The free distribution and use of this software in both source and binary
  23  form is allowed (with or without changes) provided that:
  24
  25    1. distributions of this source code include the above copyright
  26       notice, this list of conditions and the following disclaimer;
  27
  28    2. distributions in binary form include the above copyright
  29       notice, this list of conditions and the following disclaimer
  30       in the documentation and/or other associated materials;
  31
  32    3. the copyright holder's name is not used to endorse products
  33       built using this software without specific written permission.
  34
  35  ALTERNATIVELY, provided that this notice is retained in full, this product
  36  may be distributed under the terms of the GNU General Public License (GPL),
  37  in which case the provisions of the GPL apply INSTEAD OF those given above.
  38
  39  DISCLAIMER
  40
  41  This software is provided 'as is' with no explicit or implied warranties
  42  in respect of its properties, including, but not limited to, correctness
  43  and/or fitness for purpose.
  44  ---------------------------------------------------------------------------
  45  Issue 31/01/2006
  46
  47  This file provides fast multiplication in GF(2^128) as required by several
  48  cryptographic authentication modes
  49 */
  50
  51 #include <crypto/gf128mul.h>
  52 #include <linux/kernel.h>
  53 #include <linux/module.h>
  54 #include <linux/slab.h>
  55
  56 #define gf128mul_dat(q) { \
  57         q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
  58         q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
  59         q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
  60         q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
  61         q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
  62         q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
  63         q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
  64         q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
  65         q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
  66         q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
  67         q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
  68         q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
  69         q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
  70         q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
  71         q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
  72         q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
  73         q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
  74         q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
  75         q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
  76         q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
  77         q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
  78         q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
  79         q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
  80         q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
  81         q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
  82         q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
  83         q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
  84         q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
  85         q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
  86         q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
  87         q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
  88         q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
  89 }
  90
  91 /*      Given the value i in 0..255 as the byte overflow when a field element
  92     in GHASH is multiplied by x^8, this function will return the values that
  93     are generated in the lo 16-bit word of the field value by applying the
  94     modular polynomial. The values lo_byte and hi_byte are returned via the
  95     macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
  96     memory as required by a suitable definition of this macro operating on
  97     the table above
  98 */
  99
 100 #define xda_bbe(i) ( \
 101         (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
 102         (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
 103         (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
 104         (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
 105 )
 106
 107 #define xda_lle(i) ( \
 108         (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
 109         (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
 110         (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
 111         (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
 112 )
 113
 114 static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
 115 static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
 116
 117 /*
 118  * The following functions multiply a field element by x or by x^8 in
 119  * the polynomial field representation.  They use 64-bit word operations
 120  * to gain speed but compensate for machine endianness and hence work
 121  * correctly on both styles of machine.
 122  */
 123
 124 static void gf128mul_x_lle(be128 *r, const be128 *x)
 125 {
 126         u64 a = be64_to_cpu(x->a);
 127         u64 b = be64_to_cpu(x->b);
 128         u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
 129
 130         r->b = cpu_to_be64((b >> 1) | (a << 63));
 131         r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
 132 }
 133
 134 static void gf128mul_x_bbe(be128 *r, const be128 *x)
 135 {
 136         u64 a = be64_to_cpu(x->a);
 137         u64 b = be64_to_cpu(x->b);
 138         u64 _tt = gf128mul_table_bbe[a >> 63];
 139
 140         r->a = cpu_to_be64((a << 1) | (b >> 63));
 141         r->b = cpu_to_be64((b << 1) ^ _tt);
 142 }
 143
 144 void gf128mul_x_ble(be128 *r, const be128 *x)
 145 {
 146         u64 a = le64_to_cpu(x->a);
 147         u64 b = le64_to_cpu(x->b);
 148         u64 _tt = gf128mul_table_bbe[b >> 63];
 149
 150         r->a = cpu_to_le64((a << 1) ^ _tt);
 151         r->b = cpu_to_le64((b << 1) | (a >> 63));
 152 }
 153 EXPORT_SYMBOL(gf128mul_x_ble);
 154
 155 static void gf128mul_x8_lle(be128 *x)
 156 {
 157         u64 a = be64_to_cpu(x->a);
 158         u64 b = be64_to_cpu(x->b);
 159         u64 _tt = gf128mul_table_lle[b & 0xff];
 160
 161         x->b = cpu_to_be64((b >> 8) | (a << 56));
 162         x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
 163 }
 164
 165 static void gf128mul_x8_bbe(be128 *x)
 166 {
 167         u64 a = be64_to_cpu(x->a);
 168         u64 b = be64_to_cpu(x->b);
 169         u64 _tt = gf128mul_table_bbe[a >> 56];
 170
 171         x->a = cpu_to_be64((a << 8) | (b >> 56));
 172         x->b = cpu_to_be64((b << 8) ^ _tt);
 173 }
 174
 175 void gf128mul_lle(be128 *r, const be128 *b)
 176 {
 177         be128 p[8];
 178         int i;
 179
 180         p[0] = *r;
 181         for (i = 0; i < 7; ++i)
 182                 gf128mul_x_lle(&p[i + 1], &p[i]);
 183
 184         memset(r, 0, sizeof(*r));
 185         for (i = 0;;) {
 186                 u8 ch = ((u8 *)b)[15 - i];
 187
 188                 if (ch & 0x80)
 189                         be128_xor(r, r, &p[0]);
 190                 if (ch & 0x40)
 191                         be128_xor(r, r, &p[1]);
 192                 if (ch & 0x20)
 193                         be128_xor(r, r, &p[2]);
 194                 if (ch & 0x10)
 195                         be128_xor(r, r, &p[3]);
 196                 if (ch & 0x08)
 197                         be128_xor(r, r, &p[4]);
 198                 if (ch & 0x04)
 199                         be128_xor(r, r, &p[5]);
 200                 if (ch & 0x02)
 201                         be128_xor(r, r, &p[6]);
 202                 if (ch & 0x01)
 203                         be128_xor(r, r, &p[7]);
 204
 205                 if (++i >= 16)
 206                         break;
 207
 208                 gf128mul_x8_lle(r);
 209         }
 210 }
 211 EXPORT_SYMBOL(gf128mul_lle);
 212
 213 void gf128mul_bbe(be128 *r, const be128 *b)
 214 {
 215         be128 p[8];
 216         int i;
 217
 218         p[0] = *r;
 219         for (i = 0; i < 7; ++i)
 220                 gf128mul_x_bbe(&p[i + 1], &p[i]);
 221
 222         memset(r, 0, sizeof(*r));
 223         for (i = 0;;) {
 224                 u8 ch = ((u8 *)b)[i];
 225
 226                 if (ch & 0x80)
 227                         be128_xor(r, r, &p[7]);
 228                 if (ch & 0x40)
 229                         be128_xor(r, r, &p[6]);
 230                 if (ch & 0x20)
 231                         be128_xor(r, r, &p[5]);
 232                 if (ch & 0x10)
 233                         be128_xor(r, r, &p[4]);
 234                 if (ch & 0x08)
 235                         be128_xor(r, r, &p[3]);
 236                 if (ch & 0x04)
 237                         be128_xor(r, r, &p[2]);
 238                 if (ch & 0x02)
 239                         be128_xor(r, r, &p[1]);
 240                 if (ch & 0x01)
 241                         be128_xor(r, r, &p[0]);
 242
 243                 if (++i >= 16)
 244                         break;
 245
 246                 gf128mul_x8_bbe(r);
 247         }
 248 }
 249 EXPORT_SYMBOL(gf128mul_bbe);
 250
 251 /*      This version uses 64k bytes of table space.
 252     A 16 byte buffer has to be multiplied by a 16 byte key
 253     value in GF(2^128).  If we consider a GF(2^128) value in
 254     the buffer's lowest byte, we can construct a table of
 255     the 256 16 byte values that result from the 256 values
 256     of this byte.  This requires 4096 bytes. But we also
 257     need tables for each of the 16 higher bytes in the
 258     buffer as well, which makes 64 kbytes in total.
 259 */
 260 /* additional explanation
 261  * t[0][BYTE] contains g*BYTE
 262  * t[1][BYTE] contains g*x^8*BYTE
 263  *  ..
 264  * t[15][BYTE] contains g*x^120*BYTE */
 265 struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
 266 {
 267         struct gf128mul_64k *t;
 268         int i, j, k;
 269
 270         t = kzalloc(sizeof(*t), GFP_KERNEL);
 271         if (!t)
 272                 goto out;
 273
 274         for (i = 0; i < 16; i++) {
 275                 t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
 276                 if (!t->t[i]) {
 277                         gf128mul_free_64k(t);
 278                         t = NULL;
 279                         goto out;
 280                 }
 281         }
 282
 283         t->t[0]->t[1] = *g;
 284         for (j = 1; j <= 64; j <<= 1)
 285                 gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
 286
 287         for (i = 0;;) {
 288                 for (j = 2; j < 256; j += j)
 289                         for (k = 1; k < j; ++k)
 290                                 be128_xor(&t->t[i]->t[j + k],
 291                                           &t->t[i]->t[j], &t->t[i]->t[k]);
 292
 293                 if (++i >= 16)
 294                         break;
 295
 296                 for (j = 128; j > 0; j >>= 1) {
 297                         t->t[i]->t[j] = t->t[i - 1]->t[j];
 298                         gf128mul_x8_bbe(&t->t[i]->t[j]);
 299                 }
 300         }
 301
 302 out:
 303         return t;
 304 }
 305 EXPORT_SYMBOL(gf128mul_init_64k_bbe);
 306
 307 void gf128mul_free_64k(struct gf128mul_64k *t)
 308 {
 309         int i;
 310
 311         for (i = 0; i < 16; i++)
 312                 kzfree(t->t[i]);
 313         kzfree(t);
 314 }
 315 EXPORT_SYMBOL(gf128mul_free_64k);
 316
 317 void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
 318 {
 319         u8 *ap = (u8 *)a;
 320         be128 r[1];
 321         int i;
 322
 323         *r = t->t[0]->t[ap[15]];
 324         for (i = 1; i < 16; ++i)
 325                 be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
 326         *a = *r;
 327 }
 328 EXPORT_SYMBOL(gf128mul_64k_bbe);
 329
 330 /*      This version uses 4k bytes of table space.
 331     A 16 byte buffer has to be multiplied by a 16 byte key
 332     value in GF(2^128).  If we consider a GF(2^128) value in a
 333     single byte, we can construct a table of the 256 16 byte
 334     values that result from the 256 values of this byte.
 335     This requires 4096 bytes. If we take the highest byte in
 336     the buffer and use this table to get the result, we then
 337     have to multiply by x^120 to get the final value. For the
 338     next highest byte the result has to be multiplied by x^112
 339     and so on. But we can do this by accumulating the result
 340     in an accumulator starting with the result for the top
 341     byte.  We repeatedly multiply the accumulator value by
 342     x^8 and then add in (i.e. xor) the 16 bytes of the next
 343     lower byte in the buffer, stopping when we reach the
 344     lowest byte. This requires a 4096 byte table.
 345 */
 346 struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
 347 {
 348         struct gf128mul_4k *t;
 349         int j, k;
 350
 351         t = kzalloc(sizeof(*t), GFP_KERNEL);
 352         if (!t)
 353                 goto out;
 354
 355         t->t[128] = *g;
 356         for (j = 64; j > 0; j >>= 1)
 357                 gf128mul_x_lle(&t->t[j], &t->t[j+j]);
 358
 359         for (j = 2; j < 256; j += j)
 360                 for (k = 1; k < j; ++k)
 361                         be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
 362
 363 out:
 364         return t;
 365 }
 366 EXPORT_SYMBOL(gf128mul_init_4k_lle);
 367
 368 struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
 369 {
 370         struct gf128mul_4k *t;
 371         int j, k;
 372
 373         t = kzalloc(sizeof(*t), GFP_KERNEL);
 374         if (!t)
 375                 goto out;
 376
 377         t->t[1] = *g;
 378         for (j = 1; j <= 64; j <<= 1)
 379                 gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
 380
 381         for (j = 2; j < 256; j += j)
 382                 for (k = 1; k < j; ++k)
 383                         be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
 384
 385 out:
 386         return t;
 387 }
 388 EXPORT_SYMBOL(gf128mul_init_4k_bbe);
 389
 390 void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
 391 {
 392         u8 *ap = (u8 *)a;
 393         be128 r[1];
 394         int i = 15;
 395
 396         *r = t->t[ap[15]];
 397         while (i--) {
 398                 gf128mul_x8_lle(r);
 399                 be128_xor(r, r, &t->t[ap[i]]);
 400         }
 401         *a = *r;
 402 }
 403 EXPORT_SYMBOL(gf128mul_4k_lle);
 404
 405 void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
 406 {
 407         u8 *ap = (u8 *)a;
 408         be128 r[1];
 409         int i = 0;
 410
 411         *r = t->t[ap[0]];
 412         while (++i < 16) {
 413                 gf128mul_x8_bbe(r);
 414                 be128_xor(r, r, &t->t[ap[i]]);
 415         }
 416         *a = *r;
 417 }
 418 EXPORT_SYMBOL(gf128mul_4k_bbe);
 419
 420 MODULE_LICENSE("GPL");
 421 MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");