[mirror_ubuntu-bionic-kernel.git] / crypto / gf128mul.c

/* gf128mul.c - GF(2^128) multiplication functions
 *
 * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
 * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
 *
 * Based on Dr Brian Gladman's (GPL'd) work published at
 * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
 * See the original copyright notice below.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 */

/*
 ---------------------------------------------------------------------------
 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.

 LICENSE TERMS

 The free distribution and use of this software in both source and binary
 form is allowed (with or without changes) provided that:

   1. distributions of this source code include the above copyright
      notice, this list of conditions and the following disclaimer;

   2. distributions in binary form include the above copyright
      notice, this list of conditions and the following disclaimer
      in the documentation and/or other associated materials;

   3. the copyright holder's name is not used to endorse products
      built using this software without specific written permission.

 ALTERNATIVELY, provided that this notice is retained in full, this product
 may be distributed under the terms of the GNU General Public License (GPL),
 in which case the provisions of the GPL apply INSTEAD OF those given above.

 DISCLAIMER

 This software is provided 'as is' with no explicit or implied warranties
 in respect of its properties, including, but not limited to, correctness
 and/or fitness for purpose.
 ---------------------------------------------------------------------------
 Issue 31/01/2006

 This file provides fast multiplication in GF(2^128) as required by several
 cryptographic authentication modes
*/

#include <crypto/gf128mul.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/slab.h>

#define gf128mul_dat(q) { \
	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
}

/*	Given the value i in 0..255 as the byte overflow when a field element
    in GHASH is multiplied by x^8, this function will return the values that
    are generated in the lo 16-bit word of the field value by applying the
    modular polynomial. The values lo_byte and hi_byte are returned via the
    macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
    memory as required by a suitable definition of this macro operating on
    the table above
*/

#define xx(p, q)	0x##p##q

#define xda_bbe(i) ( \
	(i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
	(i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
	(i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
	(i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
)

#define xda_lle(i) ( \
	(i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
	(i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
	(i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
	(i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
)

static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);

/*
 * The following functions multiply a field element by x or by x^8 in
 * the polynomial field representation.  They use 64-bit word operations
 * to gain speed but compensate for machine endianness and hence work
 * correctly on both styles of machine.
 */

static void gf128mul_x_lle(be128 *r, const be128 *x)
{
	u64 a = be64_to_cpu(x->a);
	u64 b = be64_to_cpu(x->b);
	u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];

	r->b = cpu_to_be64((b >> 1) | (a << 63));
	r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
}

static void gf128mul_x_bbe(be128 *r, const be128 *x)
{
	u64 a = be64_to_cpu(x->a);
	u64 b = be64_to_cpu(x->b);
	u64 _tt = gf128mul_table_bbe[a >> 63];

	r->a = cpu_to_be64((a << 1) | (b >> 63));
	r->b = cpu_to_be64((b << 1) ^ _tt);
}

void gf128mul_x_ble(be128 *r, const be128 *x)
{
	u64 a = le64_to_cpu(x->a);
	u64 b = le64_to_cpu(x->b);
	u64 _tt = gf128mul_table_bbe[b >> 63];

	r->a = cpu_to_le64((a << 1) ^ _tt);
	r->b = cpu_to_le64((b << 1) | (a >> 63));
}
EXPORT_SYMBOL(gf128mul_x_ble);

static void gf128mul_x8_lle(be128 *x)
{
	u64 a = be64_to_cpu(x->a);
	u64 b = be64_to_cpu(x->b);
	u64 _tt = gf128mul_table_lle[b & 0xff];

	x->b = cpu_to_be64((b >> 8) | (a << 56));
	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
}

static void gf128mul_x8_bbe(be128 *x)
{
	u64 a = be64_to_cpu(x->a);
	u64 b = be64_to_cpu(x->b);
	u64 _tt = gf128mul_table_bbe[a >> 56];

	x->a = cpu_to_be64((a << 8) | (b >> 56));
	x->b = cpu_to_be64((b << 8) ^ _tt);
}

void gf128mul_lle(be128 *r, const be128 *b)
{
	be128 p[8];
	int i;

	p[0] = *r;
	for (i = 0; i < 7; ++i)
		gf128mul_x_lle(&p[i + 1], &p[i]);

	memset(r, 0, sizeof(*r));
	for (i = 0;;) {
		u8 ch = ((u8 *)b)[15 - i];

		if (ch & 0x80)
			be128_xor(r, r, &p[0]);
		if (ch & 0x40)
			be128_xor(r, r, &p[1]);
		if (ch & 0x20)
			be128_xor(r, r, &p[2]);
		if (ch & 0x10)
			be128_xor(r, r, &p[3]);
		if (ch & 0x08)
			be128_xor(r, r, &p[4]);
		if (ch & 0x04)
			be128_xor(r, r, &p[5]);
		if (ch & 0x02)
			be128_xor(r, r, &p[6]);
		if (ch & 0x01)
			be128_xor(r, r, &p[7]);

		if (++i >= 16)
			break;

		gf128mul_x8_lle(r);
	}
}
EXPORT_SYMBOL(gf128mul_lle);

void gf128mul_bbe(be128 *r, const be128 *b)
{
	be128 p[8];
	int i;

	p[0] = *r;
	for (i = 0; i < 7; ++i)
		gf128mul_x_bbe(&p[i + 1], &p[i]);

	memset(r, 0, sizeof(*r));
	for (i = 0;;) {
		u8 ch = ((u8 *)b)[i];

		if (ch & 0x80)
			be128_xor(r, r, &p[7]);
		if (ch & 0x40)
			be128_xor(r, r, &p[6]);
		if (ch & 0x20)
			be128_xor(r, r, &p[5]);
		if (ch & 0x10)
			be128_xor(r, r, &p[4]);
		if (ch & 0x08)
			be128_xor(r, r, &p[3]);
		if (ch & 0x04)
			be128_xor(r, r, &p[2]);
		if (ch & 0x02)
			be128_xor(r, r, &p[1]);
		if (ch & 0x01)
			be128_xor(r, r, &p[0]);

		if (++i >= 16)
			break;

		gf128mul_x8_bbe(r);
	}
}
EXPORT_SYMBOL(gf128mul_bbe);

/*      This version uses 64k bytes of table space.
    A 16 byte buffer has to be multiplied by a 16 byte key
    value in GF(2^128).  If we consider a GF(2^128) value in
    the buffer's lowest byte, we can construct a table of
    the 256 16 byte values that result from the 256 values
    of this byte.  This requires 4096 bytes. But we also
    need tables for each of the 16 higher bytes in the
    buffer as well, which makes 64 kbytes in total.
*/
/* additional explanation
 * t[0][BYTE] contains g*BYTE
 * t[1][BYTE] contains g*x^8*BYTE
 *  ..
 * t[15][BYTE] contains g*x^120*BYTE */
struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
{
	struct gf128mul_64k *t;
	int i, j, k;

	t = kzalloc(sizeof(*t), GFP_KERNEL);
	if (!t)
		goto out;

	for (i = 0; i < 16; i++) {
		t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
		if (!t->t[i]) {
			gf128mul_free_64k(t);
			t = NULL;
			goto out;
		}
	}

	t->t[0]->t[1] = *g;
	for (j = 1; j <= 64; j <<= 1)
		gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);

	for (i = 0;;) {
		for (j = 2; j < 256; j += j)
			for (k = 1; k < j; ++k)
				be128_xor(&t->t[i]->t[j + k],
					  &t->t[i]->t[j], &t->t[i]->t[k]);

		if (++i >= 16)
			break;

		for (j = 128; j > 0; j >>= 1) {
			t->t[i]->t[j] = t->t[i - 1]->t[j];
			gf128mul_x8_bbe(&t->t[i]->t[j]);
		}
	}

out:
	return t;
}
EXPORT_SYMBOL(gf128mul_init_64k_bbe);

void gf128mul_free_64k(struct gf128mul_64k *t)
{
	int i;

	for (i = 0; i < 16; i++)
		kzfree(t->t[i]);
	kzfree(t);
}
EXPORT_SYMBOL(gf128mul_free_64k);

void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
{
	u8 *ap = (u8 *)a;
	be128 r[1];
	int i;

	*r = t->t[0]->t[ap[15]];
	for (i = 1; i < 16; ++i)
		be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
	*a = *r;
}
EXPORT_SYMBOL(gf128mul_64k_bbe);

/*      This version uses 4k bytes of table space.
    A 16 byte buffer has to be multiplied by a 16 byte key
    value in GF(2^128).  If we consider a GF(2^128) value in a
    single byte, we can construct a table of the 256 16 byte
    values that result from the 256 values of this byte.
    This requires 4096 bytes. If we take the highest byte in
    the buffer and use this table to get the result, we then
    have to multiply by x^120 to get the final value. For the
    next highest byte the result has to be multiplied by x^112
    and so on. But we can do this by accumulating the result
    in an accumulator starting with the result for the top
    byte.  We repeatedly multiply the accumulator value by
    x^8 and then add in (i.e. xor) the 16 bytes of the next
    lower byte in the buffer, stopping when we reach the
    lowest byte. This requires a 4096 byte table.
*/
struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
{
	struct gf128mul_4k *t;
	int j, k;

	t = kzalloc(sizeof(*t), GFP_KERNEL);
	if (!t)
		goto out;

	t->t[128] = *g;
	for (j = 64; j > 0; j >>= 1)
		gf128mul_x_lle(&t->t[j], &t->t[j+j]);

	for (j = 2; j < 256; j += j)
		for (k = 1; k < j; ++k)
			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);

out:
	return t;
}
EXPORT_SYMBOL(gf128mul_init_4k_lle);

struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
{
	struct gf128mul_4k *t;
	int j, k;

	t = kzalloc(sizeof(*t), GFP_KERNEL);
	if (!t)
		goto out;

	t->t[1] = *g;
	for (j = 1; j <= 64; j <<= 1)
		gf128mul_x_bbe(&t->t[j + j], &t->t[j]);

	for (j = 2; j < 256; j += j)
		for (k = 1; k < j; ++k)
			be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);

out:
	return t;
}
EXPORT_SYMBOL(gf128mul_init_4k_bbe);

void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
{
	u8 *ap = (u8 *)a;
	be128 r[1];
	int i = 15;

	*r = t->t[ap[15]];
	while (i--) {
		gf128mul_x8_lle(r);
		be128_xor(r, r, &t->t[ap[i]]);
	}
	*a = *r;
}
EXPORT_SYMBOL(gf128mul_4k_lle);

void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
{
	u8 *ap = (u8 *)a;
	be128 r[1];
	int i = 0;

	*r = t->t[ap[0]];
	while (++i < 16) {
		gf128mul_x8_bbe(r);
		be128_xor(r, r, &t->t[ap[i]]);
	}
	*a = *r;
}
EXPORT_SYMBOL(gf128mul_4k_bbe);

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
Commit	Line	Data
c494e070 RS	1	/* gf128mul.c - GF(2^128) multiplication functions
	2	*
	3	* Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
	4	* Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
	5	*
	6	* Based on Dr Brian Gladman's (GPL'd) work published at
8c882f64	7	* http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
c494e070 RS	8	* See the original copyright notice below.
	9	*
	10	* This program is free software; you can redistribute it and/or modify it
	11	* under the terms of the GNU General Public License as published by the Free
	12	* Software Foundation; either version 2 of the License, or (at your option)
	13	* any later version.
	14	*/
	15
	16	/*
	17	---------------------------------------------------------------------------
	18	Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved.
	19
	20	LICENSE TERMS
	21
	22	The free distribution and use of this software in both source and binary
	23	form is allowed (with or without changes) provided that:
	24
	25	1. distributions of this source code include the above copyright
	26	notice, this list of conditions and the following disclaimer;
	27
	28	2. distributions in binary form include the above copyright
	29	notice, this list of conditions and the following disclaimer
	30	in the documentation and/or other associated materials;
	31
	32	3. the copyright holder's name is not used to endorse products
	33	built using this software without specific written permission.
	34
	35	ALTERNATIVELY, provided that this notice is retained in full, this product
	36	may be distributed under the terms of the GNU General Public License (GPL),
	37	in which case the provisions of the GPL apply INSTEAD OF those given above.
	38
	39	DISCLAIMER
	40
	41	This software is provided 'as is' with no explicit or implied warranties
	42	in respect of its properties, including, but not limited to, correctness
	43	and/or fitness for purpose.
	44	---------------------------------------------------------------------------
	45	Issue 31/01/2006
	46
63be5b53	47	This file provides fast multiplication in GF(2^128) as required by several
c494e070 RS	48	cryptographic authentication modes
	49	*/
	50
	51	#include <crypto/gf128mul.h>
	52	#include <linux/kernel.h>
	53	#include <linux/module.h>
	54	#include <linux/slab.h>
	55
	56	#define gf128mul_dat(q) { \
	57	q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
	58	q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
	59	q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
	60	q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
	61	q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
	62	q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
	63	q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
	64	q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
	65	q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
	66	q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
	67	q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
	68	q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
	69	q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
	70	q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
	71	q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
	72	q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
	73	q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
	74	q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
	75	q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
	76	q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
	77	q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
	78	q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
	79	q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
	80	q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
	81	q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
	82	q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
	83	q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
	84	q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
	85	q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
	86	q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
	87	q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
	88	q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
	89	}
	90
	91	/* Given the value i in 0..255 as the byte overflow when a field element
25985edc	92	in GHASH is multiplied by x^8, this function will return the values that
c494e070 RS	93	are generated in the lo 16-bit word of the field value by applying the
	94	modular polynomial. The values lo_byte and hi_byte are returned via the
	95	macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
	96	memory as required by a suitable definition of this macro operating on
	97	the table above
	98	*/
	99
	100	#define xx(p, q) 0x##p##q
	101
	102	#define xda_bbe(i) ( \
	103	(i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
	104	(i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
	105	(i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
	106	(i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
	107	)
	108
	109	#define xda_lle(i) ( \
	110	(i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
	111	(i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
	112	(i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
	113	(i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
	114	)
	115
	116	static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
	117	static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
	118
63be5b53 EB	119	/*
	120	* The following functions multiply a field element by x or by x^8 in
	121	* the polynomial field representation. They use 64-bit word operations
	122	* to gain speed but compensate for machine endianness and hence work
c494e070 RS	123	* correctly on both styles of machine.
	124	*/
	125
	126	static void gf128mul_x_lle(be128 r, const be128 x)
	127	{
	128	u64 a = be64_to_cpu(x->a);
	129	u64 b = be64_to_cpu(x->b);
	130	u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
	131
	132	r->b = cpu_to_be64((b >> 1) \| (a << 63));
	133	r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
	134	}
	135
	136	static void gf128mul_x_bbe(be128 r, const be128 x)
	137	{
	138	u64 a = be64_to_cpu(x->a);
	139	u64 b = be64_to_cpu(x->b);
	140	u64 _tt = gf128mul_table_bbe[a >> 63];
	141
	142	r->a = cpu_to_be64((a << 1) \| (b >> 63));
	143	r->b = cpu_to_be64((b << 1) ^ _tt);
	144	}
	145
f19f5111 RS	146	void gf128mul_x_ble(be128 r, const be128 x)
	147	{
	148	u64 a = le64_to_cpu(x->a);
	149	u64 b = le64_to_cpu(x->b);
	150	u64 _tt = gf128mul_table_bbe[b >> 63];
	151
	152	r->a = cpu_to_le64((a << 1) ^ _tt);
	153	r->b = cpu_to_le64((b << 1) \| (a >> 63));
	154	}
	155	EXPORT_SYMBOL(gf128mul_x_ble);
	156
c494e070 RS	157	static void gf128mul_x8_lle(be128 *x)
	158	{
	159	u64 a = be64_to_cpu(x->a);
	160	u64 b = be64_to_cpu(x->b);
	161	u64 _tt = gf128mul_table_lle[b & 0xff];
	162
	163	x->b = cpu_to_be64((b >> 8) \| (a << 56));
	164	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
	165	}
	166
	167	static void gf128mul_x8_bbe(be128 *x)
	168	{
	169	u64 a = be64_to_cpu(x->a);
	170	u64 b = be64_to_cpu(x->b);
	171	u64 _tt = gf128mul_table_bbe[a >> 56];
	172
	173	x->a = cpu_to_be64((a << 8) \| (b >> 56));
	174	x->b = cpu_to_be64((b << 8) ^ _tt);
	175	}
	176
	177	void gf128mul_lle(be128 r, const be128 b)
	178	{
	179	be128 p[8];
	180	int i;
	181
	182	p[0] = *r;
	183	for (i = 0; i < 7; ++i)
	184	gf128mul_x_lle(&p[i + 1], &p[i]);
	185
62542663	186	memset(r, 0, sizeof(*r));
c494e070 RS	187	for (i = 0;;) {
	188	u8 ch = ((u8 *)b)[15 - i];
	189
	190	if (ch & 0x80)
	191	be128_xor(r, r, &p[0]);
	192	if (ch & 0x40)
	193	be128_xor(r, r, &p[1]);
	194	if (ch & 0x20)
	195	be128_xor(r, r, &p[2]);
	196	if (ch & 0x10)
	197	be128_xor(r, r, &p[3]);
	198	if (ch & 0x08)
	199	be128_xor(r, r, &p[4]);
	200	if (ch & 0x04)
	201	be128_xor(r, r, &p[5]);
	202	if (ch & 0x02)
	203	be128_xor(r, r, &p[6]);
	204	if (ch & 0x01)
	205	be128_xor(r, r, &p[7]);
	206
	207	if (++i >= 16)
	208	break;
	209
	210	gf128mul_x8_lle(r);
	211	}
	212	}
	213	EXPORT_SYMBOL(gf128mul_lle);
	214
	215	void gf128mul_bbe(be128 r, const be128 b)
	216	{
	217	be128 p[8];
	218	int i;
	219
	220	p[0] = *r;
	221	for (i = 0; i < 7; ++i)
	222	gf128mul_x_bbe(&p[i + 1], &p[i]);
	223
62542663	224	memset(r, 0, sizeof(*r));
c494e070 RS	225	for (i = 0;;) {
	226	u8 ch = ((u8 *)b)[i];
	227
	228	if (ch & 0x80)
	229	be128_xor(r, r, &p[7]);
	230	if (ch & 0x40)
	231	be128_xor(r, r, &p[6]);
	232	if (ch & 0x20)
	233	be128_xor(r, r, &p[5]);
	234	if (ch & 0x10)
	235	be128_xor(r, r, &p[4]);
	236	if (ch & 0x08)
	237	be128_xor(r, r, &p[3]);
	238	if (ch & 0x04)
	239	be128_xor(r, r, &p[2]);
	240	if (ch & 0x02)
	241	be128_xor(r, r, &p[1]);
	242	if (ch & 0x01)
	243	be128_xor(r, r, &p[0]);
	244
	245	if (++i >= 16)
	246	break;
	247
	248	gf128mul_x8_bbe(r);
	249	}
	250	}
	251	EXPORT_SYMBOL(gf128mul_bbe);
	252
	253	/* This version uses 64k bytes of table space.
	254	A 16 byte buffer has to be multiplied by a 16 byte key
63be5b53	255	value in GF(2^128). If we consider a GF(2^128) value in
c494e070 RS	256	the buffer's lowest byte, we can construct a table of
	257	the 256 16 byte values that result from the 256 values
	258	of this byte. This requires 4096 bytes. But we also
	259	need tables for each of the 16 higher bytes in the
	260	buffer as well, which makes 64 kbytes in total.
	261	*/
	262	/* additional explanation
	263	* t[0][BYTE] contains g*BYTE
	264	* t[1][BYTE] contains gx^8BYTE
	265	* ..
	266	* t[15][BYTE] contains gx^120BYTE */
c494e070 RS	267	struct gf128mul_64k gf128mul_init_64k_bbe(const be128 g)
	268	{
	269	struct gf128mul_64k *t;
	270	int i, j, k;
	271
	272	t = kzalloc(sizeof(*t), GFP_KERNEL);
	273	if (!t)
	274	goto out;
	275
	276	for (i = 0; i < 16; i++) {
	277	t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
	278	if (!t->t[i]) {
	279	gf128mul_free_64k(t);
	280	t = NULL;
	281	goto out;
	282	}
	283	}
	284
	285	t->t[0]->t[1] = *g;
	286	for (j = 1; j <= 64; j <<= 1)
	287	gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
	288
	289	for (i = 0;;) {
	290	for (j = 2; j < 256; j += j)
	291	for (k = 1; k < j; ++k)
	292	be128_xor(&t->t[i]->t[j + k],
	293	&t->t[i]->t[j], &t->t[i]->t[k]);
	294
	295	if (++i >= 16)
	296	break;
	297
	298	for (j = 128; j > 0; j >>= 1) {
	299	t->t[i]->t[j] = t->t[i - 1]->t[j];
	300	gf128mul_x8_bbe(&t->t[i]->t[j]);
	301	}
	302	}
	303
	304	out:
	305	return t;
	306	}
	307	EXPORT_SYMBOL(gf128mul_init_64k_bbe);
	308
	309	void gf128mul_free_64k(struct gf128mul_64k *t)
	310	{
	311	int i;
	312
	313	for (i = 0; i < 16; i++)
75aa0a7c AC	314	kzfree(t->t[i]);
75aa0a7c AC	315	kzfree(t);
c494e070 RS	316	}
	317	EXPORT_SYMBOL(gf128mul_free_64k);
	318
c494e070 RS	319	void gf128mul_64k_bbe(be128 a, struct gf128mul_64k t)
	320	{
	321	u8 ap = (u8 )a;
	322	be128 r[1];
	323	int i;
	324
	325	*r = t->t[0]->t[ap[15]];
	326	for (i = 1; i < 16; ++i)
	327	be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
	328	a = r;
	329	}
	330	EXPORT_SYMBOL(gf128mul_64k_bbe);
	331
	332	/* This version uses 4k bytes of table space.
	333	A 16 byte buffer has to be multiplied by a 16 byte key
63be5b53	334	value in GF(2^128). If we consider a GF(2^128) value in a
c494e070 RS	335	single byte, we can construct a table of the 256 16 byte
	336	values that result from the 256 values of this byte.
	337	This requires 4096 bytes. If we take the highest byte in
	338	the buffer and use this table to get the result, we then
	339	have to multiply by x^120 to get the final value. For the
	340	next highest byte the result has to be multiplied by x^112
	341	and so on. But we can do this by accumulating the result
	342	in an accumulator starting with the result for the top
	343	byte. We repeatedly multiply the accumulator value by
	344	x^8 and then add in (i.e. xor) the 16 bytes of the next
	345	lower byte in the buffer, stopping when we reach the
	346	lowest byte. This requires a 4096 byte table.
	347	*/
	348	struct gf128mul_4k gf128mul_init_4k_lle(const be128 g)
	349	{
	350	struct gf128mul_4k *t;
	351	int j, k;
	352
	353	t = kzalloc(sizeof(*t), GFP_KERNEL);
	354	if (!t)
	355	goto out;
	356
	357	t->t[128] = *g;
	358	for (j = 64; j > 0; j >>= 1)
	359	gf128mul_x_lle(&t->t[j], &t->t[j+j]);
	360
	361	for (j = 2; j < 256; j += j)
	362	for (k = 1; k < j; ++k)
	363	be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
	364
	365	out:
	366	return t;
	367	}
	368	EXPORT_SYMBOL(gf128mul_init_4k_lle);
	369
	370	struct gf128mul_4k gf128mul_init_4k_bbe(const be128 g)
	371	{
	372	struct gf128mul_4k *t;
	373	int j, k;
	374
	375	t = kzalloc(sizeof(*t), GFP_KERNEL);
	376	if (!t)
	377	goto out;
	378
	379	t->t[1] = *g;
	380	for (j = 1; j <= 64; j <<= 1)
	381	gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
	382
	383	for (j = 2; j < 256; j += j)
	384	for (k = 1; k < j; ++k)
	385	be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
	386
	387	out:
	388	return t;
	389	}
	390	EXPORT_SYMBOL(gf128mul_init_4k_bbe);
	391
	392	void gf128mul_4k_lle(be128 a, struct gf128mul_4k t)
	393	{
	394	u8 ap = (u8 )a;
	395	be128 r[1];
	396	int i = 15;
	397
	398	*r = t->t[ap[15]];
399	while (i--) {
400	gf128mul_x8_lle(r);
401	be128_xor(r, r, &t->t[ap[i]]);
402	}
403	a = r;
404	}
405	EXPORT_SYMBOL(gf128mul_4k_lle);
406
407	void gf128mul_4k_bbe(be128 a, struct gf128mul_4k t)
408	{
409	u8 ap = (u8 )a;
410	be128 r[1];
411	int i = 0;
412
413	*r = t->t[ap[0]];
414	while (++i < 16) {
415	gf128mul_x8_bbe(r);
416	be128_xor(r, r, &t->t[ap[i]]);
417	}
418	a = r;
419	}
420	EXPORT_SYMBOL(gf128mul_4k_bbe);
421
422	MODULE_LICENSE("GPL");
423	MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");