[mirror_ubuntu-bionic-kernel.git] / crypto / gf128mul.c

/* gf128mul.c - GF(2^128) multiplication functions
 *
 * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
 * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
 *
 * Based on Dr Brian Gladman's (GPL'd) work published at
 * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
 * See the original copyright notice below.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 */

/*
 ---------------------------------------------------------------------------
 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.

 LICENSE TERMS

 The free distribution and use of this software in both source and binary
 form is allowed (with or without changes) provided that:

   1. distributions of this source code include the above copyright
      notice, this list of conditions and the following disclaimer;

   2. distributions in binary form include the above copyright
      notice, this list of conditions and the following disclaimer
      in the documentation and/or other associated materials;

   3. the copyright holder's name is not used to endorse products
      built using this software without specific written permission.

 ALTERNATIVELY, provided that this notice is retained in full, this product
 may be distributed under the terms of the GNU General Public License (GPL),
 in which case the provisions of the GPL apply INSTEAD OF those given above.

 DISCLAIMER

 This software is provided 'as is' with no explicit or implied warranties
 in respect of its properties, including, but not limited to, correctness
 and/or fitness for purpose.
 ---------------------------------------------------------------------------
 Issue 31/01/2006

 This file provides fast multiplication in GF(2^128) as required by several
 cryptographic authentication modes
*/

#include <crypto/gf128mul.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>

#define gf128mul_dat(q) { \
	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
}

/*
 * Given a value i in 0..255 as the byte overflow when a field element
 * in GF(2^128) is multiplied by x^8, the following macro returns the
 * 16-bit value that must be XOR-ed into the low-degree end of the
 * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
 *
 * There are two versions of the macro, and hence two tables: one for
 * the "be" convention where the highest-order bit is the coefficient of
 * the highest-degree polynomial term, and one for the "le" convention
 * where the highest-order bit is the coefficient of the lowest-degree
 * polynomial term.  In both cases the values are stored in CPU byte
 * endianness such that the coefficients are ordered consistently across
 * bytes, i.e. in the "be" table bits 15..0 of the stored value
 * correspond to the coefficients of x^15..x^0, and in the "le" table
 * bits 15..0 correspond to the coefficients of x^0..x^15.
 *
 * Therefore, provided that the appropriate byte endianness conversions
 * are done by the multiplication functions (and these must be in place
 * anyway to support both little endian and big endian CPUs), the "be"
 * table can be used for multiplications of both "bbe" and "ble"
 * elements, and the "le" table can be used for multiplications of both
 * "lle" and "lbe" elements.
 */

#define xda_be(i) ( \
	(i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
	(i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
	(i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
	(i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
)

#define xda_le(i) ( \
	(i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
	(i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
	(i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
	(i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
)

static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);

/*
 * The following functions multiply a field element by x^8 in
 * the polynomial field representation.  They use 64-bit word operations
 * to gain speed but compensate for machine endianness and hence work
 * correctly on both styles of machine.
 */

static void gf128mul_x8_lle(be128 *x)
{
	u64 a = be64_to_cpu(x->a);
	u64 b = be64_to_cpu(x->b);
	u64 _tt = gf128mul_table_le[b & 0xff];

	x->b = cpu_to_be64((b >> 8) | (a << 56));
	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
}

static void gf128mul_x8_bbe(be128 *x)
{
	u64 a = be64_to_cpu(x->a);
	u64 b = be64_to_cpu(x->b);
	u64 _tt = gf128mul_table_be[a >> 56];

	x->a = cpu_to_be64((a << 8) | (b >> 56));
	x->b = cpu_to_be64((b << 8) ^ _tt);
}

void gf128mul_x8_ble(le128 *r, const le128 *x)
{
	u64 a = le64_to_cpu(x->a);
	u64 b = le64_to_cpu(x->b);

	/* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */
	u64 _tt = gf128mul_table_be[a >> 56];

	r->a = cpu_to_le64((a << 8) | (b >> 56));
	r->b = cpu_to_le64((b << 8) ^ _tt);
}
EXPORT_SYMBOL(gf128mul_x8_ble);

void gf128mul_lle(be128 *r, const be128 *b)
{
	be128 p[8];
	int i;

	p[0] = *r;
	for (i = 0; i < 7; ++i)
		gf128mul_x_lle(&p[i + 1], &p[i]);

	memset(r, 0, sizeof(*r));
	for (i = 0;;) {
		u8 ch = ((u8 *)b)[15 - i];

		if (ch & 0x80)
			be128_xor(r, r, &p[0]);
		if (ch & 0x40)
			be128_xor(r, r, &p[1]);
		if (ch & 0x20)
			be128_xor(r, r, &p[2]);
		if (ch & 0x10)
			be128_xor(r, r, &p[3]);
		if (ch & 0x08)
			be128_xor(r, r, &p[4]);
		if (ch & 0x04)
			be128_xor(r, r, &p[5]);
		if (ch & 0x02)
			be128_xor(r, r, &p[6]);
		if (ch & 0x01)
			be128_xor(r, r, &p[7]);

		if (++i >= 16)
			break;

		gf128mul_x8_lle(r);
	}
}
EXPORT_SYMBOL(gf128mul_lle);

void gf128mul_bbe(be128 *r, const be128 *b)
{
	be128 p[8];
	int i;

	p[0] = *r;
	for (i = 0; i < 7; ++i)
		gf128mul_x_bbe(&p[i + 1], &p[i]);

	memset(r, 0, sizeof(*r));
	for (i = 0;;) {
		u8 ch = ((u8 *)b)[i];

		if (ch & 0x80)
			be128_xor(r, r, &p[7]);
		if (ch & 0x40)
			be128_xor(r, r, &p[6]);
		if (ch & 0x20)
			be128_xor(r, r, &p[5]);
		if (ch & 0x10)
			be128_xor(r, r, &p[4]);
		if (ch & 0x08)
			be128_xor(r, r, &p[3]);
		if (ch & 0x04)
			be128_xor(r, r, &p[2]);
		if (ch & 0x02)
			be128_xor(r, r, &p[1]);
		if (ch & 0x01)
			be128_xor(r, r, &p[0]);

		if (++i >= 16)
			break;

		gf128mul_x8_bbe(r);
	}
}
EXPORT_SYMBOL(gf128mul_bbe);

/*      This version uses 64k bytes of table space.
    A 16 byte buffer has to be multiplied by a 16 byte key
    value in GF(2^128).  If we consider a GF(2^128) value in
    the buffer's lowest byte, we can construct a table of
    the 256 16 byte values that result from the 256 values
    of this byte.  This requires 4096 bytes. But we also
    need tables for each of the 16 higher bytes in the
    buffer as well, which makes 64 kbytes in total.
*/
/* additional explanation
 * t[0][BYTE] contains g*BYTE
 * t[1][BYTE] contains g*x^8*BYTE
 *  ..
 * t[15][BYTE] contains g*x^120*BYTE */
struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
{
	struct gf128mul_64k *t;
	int i, j, k;

	t = kzalloc(sizeof(*t), GFP_KERNEL);
	if (!t)
		goto out;

	for (i = 0; i < 16; i++) {
		t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
		if (!t->t[i]) {
			gf128mul_free_64k(t);
			t = NULL;
			goto out;
		}
	}

	t->t[0]->t[1] = *g;
	for (j = 1; j <= 64; j <<= 1)
		gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);

	for (i = 0;;) {
		for (j = 2; j < 256; j += j)
			for (k = 1; k < j; ++k)
				be128_xor(&t->t[i]->t[j + k],
					  &t->t[i]->t[j], &t->t[i]->t[k]);

		if (++i >= 16)
			break;

		for (j = 128; j > 0; j >>= 1) {
			t->t[i]->t[j] = t->t[i - 1]->t[j];
			gf128mul_x8_bbe(&t->t[i]->t[j]);
		}
	}

out:
	return t;
}
EXPORT_SYMBOL(gf128mul_init_64k_bbe);

void gf128mul_free_64k(struct gf128mul_64k *t)
{
	int i;

	for (i = 0; i < 16; i++)
		kzfree(t->t[i]);
	kzfree(t);
}
EXPORT_SYMBOL(gf128mul_free_64k);

void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t)
{
	u8 *ap = (u8 *)a;
	be128 r[1];
	int i;

	*r = t->t[0]->t[ap[15]];
	for (i = 1; i < 16; ++i)
		be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
	*a = *r;
}
EXPORT_SYMBOL(gf128mul_64k_bbe);

/*      This version uses 4k bytes of table space.
    A 16 byte buffer has to be multiplied by a 16 byte key
    value in GF(2^128).  If we consider a GF(2^128) value in a
    single byte, we can construct a table of the 256 16 byte
    values that result from the 256 values of this byte.
    This requires 4096 bytes. If we take the highest byte in
    the buffer and use this table to get the result, we then
    have to multiply by x^120 to get the final value. For the
    next highest byte the result has to be multiplied by x^112
    and so on. But we can do this by accumulating the result
    in an accumulator starting with the result for the top
    byte.  We repeatedly multiply the accumulator value by
    x^8 and then add in (i.e. xor) the 16 bytes of the next
    lower byte in the buffer, stopping when we reach the
    lowest byte. This requires a 4096 byte table.
*/
struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
{
	struct gf128mul_4k *t;
	int j, k;

	t = kzalloc(sizeof(*t), GFP_KERNEL);
	if (!t)
		goto out;

	t->t[128] = *g;
	for (j = 64; j > 0; j >>= 1)
		gf128mul_x_lle(&t->t[j], &t->t[j+j]);

	for (j = 2; j < 256; j += j)
		for (k = 1; k < j; ++k)
			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);

out:
	return t;
}
EXPORT_SYMBOL(gf128mul_init_4k_lle);

struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
{
	struct gf128mul_4k *t;
	int j, k;

	t = kzalloc(sizeof(*t), GFP_KERNEL);
	if (!t)
		goto out;

	t->t[1] = *g;
	for (j = 1; j <= 64; j <<= 1)
		gf128mul_x_bbe(&t->t[j + j], &t->t[j]);

	for (j = 2; j < 256; j += j)
		for (k = 1; k < j; ++k)
			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);

out:
	return t;
}
EXPORT_SYMBOL(gf128mul_init_4k_bbe);

void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
{
	u8 *ap = (u8 *)a;
	be128 r[1];
	int i = 15;

	*r = t->t[ap[15]];
	while (i--) {
		gf128mul_x8_lle(r);
		be128_xor(r, r, &t->t[ap[i]]);
	}
	*a = *r;
}
EXPORT_SYMBOL(gf128mul_4k_lle);

void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t)
{
	u8 *ap = (u8 *)a;
	be128 r[1];
	int i = 0;

	*r = t->t[ap[0]];
	while (++i < 16) {
		gf128mul_x8_bbe(r);
		be128_xor(r, r, &t->t[ap[i]]);
	}
	*a = *r;
}
EXPORT_SYMBOL(gf128mul_4k_bbe);

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
Commit	Line	Data
c494e070 RS	1	/* gf128mul.c - GF(2^128) multiplication functions
	2	*
	3	* Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
	4	* Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
	5	*
	6	* Based on Dr Brian Gladman's (GPL'd) work published at
8c882f64	7	* http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
c494e070 RS	8	* See the original copyright notice below.
	9	*
	10	* This program is free software; you can redistribute it and/or modify it
	11	* under the terms of the GNU General Public License as published by the Free
	12	* Software Foundation; either version 2 of the License, or (at your option)
	13	* any later version.
	14	*/
	15
	16	/*
	17	---------------------------------------------------------------------------
	18	Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
	19
	20	LICENSE TERMS
	21
	22	The free distribution and use of this software in both source and binary
	23	form is allowed (with or without changes) provided that:
	24
	25	1. distributions of this source code include the above copyright
	26	notice, this list of conditions and the following disclaimer;
	27
	28	2. distributions in binary form include the above copyright
	29	notice, this list of conditions and the following disclaimer
	30	in the documentation and/or other associated materials;
	31
	32	3. the copyright holder's name is not used to endorse products
	33	built using this software without specific written permission.
	34
	35	ALTERNATIVELY, provided that this notice is retained in full, this product
	36	may be distributed under the terms of the GNU General Public License (GPL),
	37	in which case the provisions of the GPL apply INSTEAD OF those given above.
	38
	39	DISCLAIMER
	40
	41	This software is provided 'as is' with no explicit or implied warranties
	42	in respect of its properties, including, but not limited to, correctness
	43	and/or fitness for purpose.
	44	---------------------------------------------------------------------------
	45	Issue 31/01/2006
	46
63be5b53	47	This file provides fast multiplication in GF(2^128) as required by several
c494e070 RS	48	cryptographic authentication modes
	49	*/
	50
	51	#include <crypto/gf128mul.h>
	52	#include <linux/kernel.h>
	53	#include <linux/module.h>
	54	#include <linux/slab.h>
	55
	56	#define gf128mul_dat(q) { \
	57	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
	58	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
	59	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
	60	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
	61	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
	62	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
	63	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
	64	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
	65	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
	66	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
	67	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
	68	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
	69	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
	70	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
	71	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
	72	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
	73	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
	74	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
	75	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
	76	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
	77	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
	78	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
	79	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
	80	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
	81	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
	82	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
	83	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
	84	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
	85	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
	86	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
	87	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
	88	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
	89	}
	90
f33fd647 EB	91	/*
	92	* Given a value i in 0..255 as the byte overflow when a field element
	93	* in GF(2^128) is multiplied by x^8, the following macro returns the
	94	* 16-bit value that must be XOR-ed into the low-degree end of the
	95	* product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
	96	*
	97	* There are two versions of the macro, and hence two tables: one for
	98	* the "be" convention where the highest-order bit is the coefficient of
	99	* the highest-degree polynomial term, and one for the "le" convention
	100	* where the highest-order bit is the coefficient of the lowest-degree
	101	* polynomial term. In both cases the values are stored in CPU byte
	102	* endianness such that the coefficients are ordered consistently across
	103	* bytes, i.e. in the "be" table bits 15..0 of the stored value
	104	* correspond to the coefficients of x^15..x^0, and in the "le" table
	105	* bits 15..0 correspond to the coefficients of x^0..x^15.
	106	*
	107	* Therefore, provided that the appropriate byte endianness conversions
	108	* are done by the multiplication functions (and these must be in place
	109	* anyway to support both little endian and big endian CPUs), the "be"
	110	* table can be used for multiplications of both "bbe" and "ble"
	111	* elements, and the "le" table can be used for multiplications of both
	112	* "lle" and "lbe" elements.
	113	*/
c494e070	114
f33fd647	115	#define xda_be(i) ( \
2416e4fa EB	116	(i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
	117	(i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
	118	(i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
	119	(i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
c494e070 RS	120	)
c494e070 RS	121
f33fd647	122	#define xda_le(i) ( \
2416e4fa EB	123	(i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
	124	(i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
	125	(i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
	126	(i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
c494e070 RS	127	)
c494e070 RS	128
f33fd647 EB	129	static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
f33fd647 EB	130	static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
c494e070	131
63be5b53	132	/*
acb9b159	133	* The following functions multiply a field element by x^8 in
63be5b53 EB	134	* the polynomial field representation. They use 64-bit word operations
63be5b53 EB	135	* to gain speed but compensate for machine endianness and hence work
c494e070 RS	136	* correctly on both styles of machine.
	137	*/
	138
c494e070 RS	139	static void gf128mul_x8_lle(be128 *x)
	140	{
	141	u64 a = be64_to_cpu(x->a);
	142	u64 b = be64_to_cpu(x->b);
f33fd647	143	u64 _tt = gf128mul_table_le[b & 0xff];
c494e070 RS	144
	145	x->b = cpu_to_be64((b >> 8) \| (a << 56));
	146	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
	147	}
	148
	149	static void gf128mul_x8_bbe(be128 *x)
	150	{
	151	u64 a = be64_to_cpu(x->a);
	152	u64 b = be64_to_cpu(x->b);
f33fd647	153	u64 _tt = gf128mul_table_be[a >> 56];
c494e070 RS	154
	155	x->a = cpu_to_be64((a << 8) \| (b >> 56));
	156	x->b = cpu_to_be64((b << 8) ^ _tt);
	157	}
	158
acfc5878 HJ	159	void gf128mul_x8_ble(le128 r, const le128 x)
	160	{
	161	u64 a = le64_to_cpu(x->a);
	162	u64 b = le64_to_cpu(x->b);
	163
	164	/* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */
	165	u64 _tt = gf128mul_table_be[a >> 56];
	166
	167	r->a = cpu_to_le64((a << 8) \| (b >> 56));
	168	r->b = cpu_to_le64((b << 8) ^ _tt);
	169	}
	170	EXPORT_SYMBOL(gf128mul_x8_ble);
	171
c494e070 RS	172	void gf128mul_lle(be128 r, const be128 b)
	173	{
	174	be128 p[8];
	175	int i;
	176
	177	p[0] = *r;
	178	for (i = 0; i < 7; ++i)
	179	gf128mul_x_lle(&p[i + 1], &p[i]);
	180
62542663	181	memset(r, 0, sizeof(*r));
c494e070 RS	182	for (i = 0;;) {
	183	u8 ch = ((u8 *)b)[15 - i];
	184
	185	if (ch & 0x80)
	186	be128_xor(r, r, &p[0]);
	187	if (ch & 0x40)
	188	be128_xor(r, r, &p[1]);
	189	if (ch & 0x20)
	190	be128_xor(r, r, &p[2]);
	191	if (ch & 0x10)
	192	be128_xor(r, r, &p[3]);
	193	if (ch & 0x08)
	194	be128_xor(r, r, &p[4]);
	195	if (ch & 0x04)
	196	be128_xor(r, r, &p[5]);
	197	if (ch & 0x02)
	198	be128_xor(r, r, &p[6]);
	199	if (ch & 0x01)
	200	be128_xor(r, r, &p[7]);
	201
	202	if (++i >= 16)
	203	break;
	204
	205	gf128mul_x8_lle(r);
	206	}
	207	}
	208	EXPORT_SYMBOL(gf128mul_lle);
	209
	210	void gf128mul_bbe(be128 r, const be128 b)
	211	{
	212	be128 p[8];
	213	int i;
	214
	215	p[0] = *r;
	216	for (i = 0; i < 7; ++i)
	217	gf128mul_x_bbe(&p[i + 1], &p[i]);
	218
62542663	219	memset(r, 0, sizeof(*r));
c494e070 RS	220	for (i = 0;;) {
	221	u8 ch = ((u8 *)b)[i];
	222
	223	if (ch & 0x80)
	224	be128_xor(r, r, &p[7]);
	225	if (ch & 0x40)
	226	be128_xor(r, r, &p[6]);
	227	if (ch & 0x20)
	228	be128_xor(r, r, &p[5]);
	229	if (ch & 0x10)
	230	be128_xor(r, r, &p[4]);
	231	if (ch & 0x08)
	232	be128_xor(r, r, &p[3]);
	233	if (ch & 0x04)
	234	be128_xor(r, r, &p[2]);
	235	if (ch & 0x02)
	236	be128_xor(r, r, &p[1]);
	237	if (ch & 0x01)
	238	be128_xor(r, r, &p[0]);
	239
	240	if (++i >= 16)
	241	break;
	242
	243	gf128mul_x8_bbe(r);
	244	}
	245	}
	246	EXPORT_SYMBOL(gf128mul_bbe);
	247
	248	/* This version uses 64k bytes of table space.
	249	A 16 byte buffer has to be multiplied by a 16 byte key
63be5b53	250	value in GF(2^128). If we consider a GF(2^128) value in
c494e070 RS	251	the buffer's lowest byte, we can construct a table of
	252	the 256 16 byte values that result from the 256 values
	253	of this byte. This requires 4096 bytes. But we also
	254	need tables for each of the 16 higher bytes in the
	255	buffer as well, which makes 64 kbytes in total.
	256	*/
	257	/* additional explanation
	258	* t[0][BYTE] contains g*BYTE
	259	* t[1][BYTE] contains gx^8BYTE
	260	* ..
	261	* t[15][BYTE] contains gx^120BYTE */
c494e070 RS	262	struct gf128mul_64k gf128mul_init_64k_bbe(const be128 g)
	263	{
	264	struct gf128mul_64k *t;
	265	int i, j, k;
	266
	267	t = kzalloc(sizeof(*t), GFP_KERNEL);
	268	if (!t)
	269	goto out;
	270
	271	for (i = 0; i < 16; i++) {
	272	t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
	273	if (!t->t[i]) {
	274	gf128mul_free_64k(t);
	275	t = NULL;
	276	goto out;
	277	}
	278	}
	279
	280	t->t[0]->t[1] = *g;
	281	for (j = 1; j <= 64; j <<= 1)
	282	gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
	283
	284	for (i = 0;;) {
	285	for (j = 2; j < 256; j += j)
	286	for (k = 1; k < j; ++k)
	287	be128_xor(&t->t[i]->t[j + k],
	288	&t->t[i]->t[j], &t->t[i]->t[k]);
	289
	290	if (++i >= 16)
	291	break;
	292
	293	for (j = 128; j > 0; j >>= 1) {
	294	t->t[i]->t[j] = t->t[i - 1]->t[j];
	295	gf128mul_x8_bbe(&t->t[i]->t[j]);
	296	}
	297	}
	298
	299	out:
	300	return t;
	301	}
	302	EXPORT_SYMBOL(gf128mul_init_64k_bbe);
	303
	304	void gf128mul_free_64k(struct gf128mul_64k *t)
	305	{
	306	int i;
	307
	308	for (i = 0; i < 16; i++)
75aa0a7c AC	309	kzfree(t->t[i]);
75aa0a7c AC	310	kzfree(t);
c494e070 RS	311	}
	312	EXPORT_SYMBOL(gf128mul_free_64k);
	313
3ea996dd	314	void gf128mul_64k_bbe(be128 a, const struct gf128mul_64k t)
c494e070 RS	315	{
	316	u8 ap = (u8 )a;
	317	be128 r[1];
	318	int i;
	319
	320	*r = t->t[0]->t[ap[15]];
	321	for (i = 1; i < 16; ++i)
	322	be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
	323	a = r;
	324	}
	325	EXPORT_SYMBOL(gf128mul_64k_bbe);
	326
	327	/* This version uses 4k bytes of table space.
	328	A 16 byte buffer has to be multiplied by a 16 byte key
63be5b53	329	value in GF(2^128). If we consider a GF(2^128) value in a
c494e070 RS	330	single byte, we can construct a table of the 256 16 byte
	331	values that result from the 256 values of this byte.
	332	This requires 4096 bytes. If we take the highest byte in
	333	the buffer and use this table to get the result, we then
	334	have to multiply by x^120 to get the final value. For the
	335	next highest byte the result has to be multiplied by x^112
	336	and so on. But we can do this by accumulating the result
	337	in an accumulator starting with the result for the top
	338	byte. We repeatedly multiply the accumulator value by
	339	x^8 and then add in (i.e. xor) the 16 bytes of the next
	340	lower byte in the buffer, stopping when we reach the
	341	lowest byte. This requires a 4096 byte table.
	342	*/
	343	struct gf128mul_4k gf128mul_init_4k_lle(const be128 g)
	344	{
	345	struct gf128mul_4k *t;
	346	int j, k;
	347
	348	t = kzalloc(sizeof(*t), GFP_KERNEL);
	349	if (!t)
	350	goto out;
	351
	352	t->t[128] = *g;
	353	for (j = 64; j > 0; j >>= 1)
	354	gf128mul_x_lle(&t->t[j], &t->t[j+j]);
	355
	356	for (j = 2; j < 256; j += j)
	357	for (k = 1; k < j; ++k)
	358	be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
	359
	360	out:
	361	return t;
	362	}
	363	EXPORT_SYMBOL(gf128mul_init_4k_lle);
	364
	365	struct gf128mul_4k gf128mul_init_4k_bbe(const be128 g)
	366	{
	367	struct gf128mul_4k *t;
	368	int j, k;
	369
	370	t = kzalloc(sizeof(*t), GFP_KERNEL);
	371	if (!t)
	372	goto out;
	373
	374	t->t[1] = *g;
	375	for (j = 1; j <= 64; j <<= 1)
	376	gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
	377
	378	for (j = 2; j < 256; j += j)
	379	for (k = 1; k < j; ++k)
	380	be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
	381
	382	out:
	383	return t;
	384	}
	385	EXPORT_SYMBOL(gf128mul_init_4k_bbe);
	386
3ea996dd	387	void gf128mul_4k_lle(be128 a, const struct gf128mul_4k t)
c494e070 RS	388	{
	389	u8 ap = (u8 )a;
	390	be128 r[1];
	391	int i = 15;
	392
	393	*r = t->t[ap[15]];
	394	while (i--) {
	395	gf128mul_x8_lle(r);
	396	be128_xor(r, r, &t->t[ap[i]]);
	397	}
	398	a = r;
	399	}
	400	EXPORT_SYMBOL(gf128mul_4k_lle);
	401
3ea996dd	402	void gf128mul_4k_bbe(be128 a, const struct gf128mul_4k t)
c494e070 RS	403	{
	404	u8 ap = (u8 )a;
	405	be128 r[1];
	406	int i = 0;
	407
	408	*r = t->t[ap[0]];
	409	while (++i < 16) {
	410	gf128mul_x8_bbe(r);
	411	be128_xor(r, r, &t->t[ap[i]]);
	412	}
	413	a = r;
	414	}
	415	EXPORT_SYMBOL(gf128mul_4k_bbe);
	416
	417	MODULE_LICENSE("GPL");
	418	MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");