[mirror_ubuntu-bionic-kernel.git] / arch / x86 / crypto / sha256_ssse3_glue.c

/*
 * Cryptographic API.
 *
 * Glue code for the SHA256 Secure Hash Algorithm assembler
 * implementation using supplemental SSE3 / AVX / AVX2 instructions.
 *
 * This file is based on sha256_generic.c
 *
 * Copyright (C) 2013 Intel Corporation.
 *
 * Author:
 *     Tim Chen <tim.c.chen@linux.intel.com>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */


#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt

#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <linux/string.h>

asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest,
				     u64 rounds);
#ifdef CONFIG_AS_AVX
asmlinkage void sha256_transform_avx(const char *data, u32 *digest,
				     u64 rounds);
#endif
#ifdef CONFIG_AS_AVX2
asmlinkage void sha256_transform_rorx(const char *data, u32 *digest,
				     u64 rounds);
#endif

static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);


static int sha256_ssse3_init(struct shash_desc *desc)
{
	struct sha256_state *sctx = shash_desc_ctx(desc);

	sctx->state[0] = SHA256_H0;
	sctx->state[1] = SHA256_H1;
	sctx->state[2] = SHA256_H2;
	sctx->state[3] = SHA256_H3;
	sctx->state[4] = SHA256_H4;
	sctx->state[5] = SHA256_H5;
	sctx->state[6] = SHA256_H6;
	sctx->state[7] = SHA256_H7;
	sctx->count = 0;

	return 0;
}

static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
			       unsigned int len, unsigned int partial)
{
	struct sha256_state *sctx = shash_desc_ctx(desc);
	unsigned int done = 0;

	sctx->count += len;

	if (partial) {
		done = SHA256_BLOCK_SIZE - partial;
		memcpy(sctx->buf + partial, data, done);
		sha256_transform_asm(sctx->buf, sctx->state, 1);
	}

	if (len - done >= SHA256_BLOCK_SIZE) {
		const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;

		sha256_transform_asm(data + done, sctx->state, (u64) rounds);

		done += rounds * SHA256_BLOCK_SIZE;
	}

	memcpy(sctx->buf, data + done, len - done);

	return 0;
}

static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
			     unsigned int len)
{
	struct sha256_state *sctx = shash_desc_ctx(desc);
	unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
	int res;

	/* Handle the fast case right here */
	if (partial + len < SHA256_BLOCK_SIZE) {
		sctx->count += len;
		memcpy(sctx->buf + partial, data, len);

		return 0;
	}

	if (!irq_fpu_usable()) {
		res = crypto_sha256_update(desc, data, len);
	} else {
		kernel_fpu_begin();
		res = __sha256_ssse3_update(desc, data, len, partial);
		kernel_fpu_end();
	}

	return res;
}


/* Add padding and return the message digest. */
static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
{
	struct sha256_state *sctx = shash_desc_ctx(desc);
	unsigned int i, index, padlen;
	__be32 *dst = (__be32 *)out;
	__be64 bits;
	static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };

	bits = cpu_to_be64(sctx->count << 3);

	/* Pad out to 56 mod 64 and append length */
	index = sctx->count % SHA256_BLOCK_SIZE;
	padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);

	if (!irq_fpu_usable()) {
		crypto_sha256_update(desc, padding, padlen);
		crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
	} else {
		kernel_fpu_begin();
		/* We need to fill a whole block for __sha256_ssse3_update() */
		if (padlen <= 56) {
			sctx->count += padlen;
			memcpy(sctx->buf + index, padding, padlen);
		} else {
			__sha256_ssse3_update(desc, padding, padlen, index);
		}
		__sha256_ssse3_update(desc, (const u8 *)&bits,
					sizeof(bits), 56);
		kernel_fpu_end();
	}

	/* Store state in digest */
	for (i = 0; i < 8; i++)
		dst[i] = cpu_to_be32(sctx->state[i]);

	/* Wipe context */
	memset(sctx, 0, sizeof(*sctx));

	return 0;
}

static int sha256_ssse3_export(struct shash_desc *desc, void *out)
{
	struct sha256_state *sctx = shash_desc_ctx(desc);

	memcpy(out, sctx, sizeof(*sctx));

	return 0;
}

static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
{
	struct sha256_state *sctx = shash_desc_ctx(desc);

	memcpy(sctx, in, sizeof(*sctx));

	return 0;
}

static struct shash_alg alg = {
	.digestsize	=	SHA256_DIGEST_SIZE,
	.init		=	sha256_ssse3_init,
	.update		=	sha256_ssse3_update,
	.final		=	sha256_ssse3_final,
	.export		=	sha256_ssse3_export,
	.import		=	sha256_ssse3_import,
	.descsize	=	sizeof(struct sha256_state),
	.statesize	=	sizeof(struct sha256_state),
	.base		=	{
		.cra_name	=	"sha256",
		.cra_driver_name =	"sha256-ssse3",
		.cra_priority	=	150,
		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
		.cra_blocksize	=	SHA256_BLOCK_SIZE,
		.cra_module	=	THIS_MODULE,
	}
};

#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
{
	u64 xcr0;

	if (!cpu_has_avx || !cpu_has_osxsave)
		return false;

	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
		pr_info("AVX detected but unusable.\n");

		return false;
	}

	return true;
}
#endif

static int __init sha256_ssse3_mod_init(void)
{
	/* test for SSE3 first */
	if (cpu_has_ssse3)
		sha256_transform_asm = sha256_transform_ssse3;

#ifdef CONFIG_AS_AVX
	/* allow AVX to override SSSE3, it's a little faster */
	if (avx_usable()) {
#ifdef CONFIG_AS_AVX2
		if (boot_cpu_has(X86_FEATURE_AVX2))
			sha256_transform_asm = sha256_transform_rorx;
		else
#endif
			sha256_transform_asm = sha256_transform_avx;
	}
#endif

	if (sha256_transform_asm) {
#ifdef CONFIG_AS_AVX
		if (sha256_transform_asm == sha256_transform_avx)
			pr_info("Using AVX optimized SHA-256 implementation\n");
#ifdef CONFIG_AS_AVX2
		else if (sha256_transform_asm == sha256_transform_rorx)
			pr_info("Using AVX2 optimized SHA-256 implementation\n");
#endif
		else
#endif
			pr_info("Using SSSE3 optimized SHA-256 implementation\n");
		return crypto_register_shash(&alg);
	}
	pr_info("Neither AVX nor SSSE3 is available/usable.\n");

	return -ENODEV;
}

static void __exit sha256_ssse3_mod_fini(void)
{
	crypto_unregister_shash(&alg);
}

module_init(sha256_ssse3_mod_init);
module_exit(sha256_ssse3_mod_fini);

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");

MODULE_ALIAS("sha256");
Commit	Line	Data
8275d1aa TC	1	/*
	2	* Cryptographic API.
	3	*
	4	* Glue code for the SHA256 Secure Hash Algorithm assembler
	5	* implementation using supplemental SSE3 / AVX / AVX2 instructions.
	6	*
	7	* This file is based on sha256_generic.c
	8	*
	9	* Copyright (C) 2013 Intel Corporation.
	10	*
	11	* Author:
	12	* Tim Chen <tim.c.chen@linux.intel.com>
	13	*
	14	* This program is free software; you can redistribute it and/or modify it
	15	* under the terms of the GNU General Public License as published by the Free
	16	* Software Foundation; either version 2 of the License, or (at your option)
	17	* any later version.
	18	*
	19	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	20	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
	21	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
	22	* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
	23	* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
	24	* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
	25	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	26	* SOFTWARE.
	27	*/
	28
	29
	30	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
	31
	32	#include <crypto/internal/hash.h>
	33	#include <linux/init.h>
	34	#include <linux/module.h>
	35	#include <linux/mm.h>
	36	#include <linux/cryptohash.h>
	37	#include <linux/types.h>
	38	#include <crypto/sha.h>
	39	#include <asm/byteorder.h>
	40	#include <asm/i387.h>
	41	#include <asm/xcr.h>
	42	#include <asm/xsave.h>
	43	#include <linux/string.h>
	44
	45	asmlinkage void sha256_transform_ssse3(const char data, u32 digest,
	46	u64 rounds);
	47	#ifdef CONFIG_AS_AVX
	48	asmlinkage void sha256_transform_avx(const char data, u32 digest,
	49	u64 rounds);
	50	#endif
	51	#ifdef CONFIG_AS_AVX2
	52	asmlinkage void sha256_transform_rorx(const char data, u32 digest,
	53	u64 rounds);
	54	#endif
	55
	56	static asmlinkage void (sha256_transform_asm)(const char , u32 *, u64);
	57
	58
	59	static int sha256_ssse3_init(struct shash_desc *desc)
	60	{
	61	struct sha256_state *sctx = shash_desc_ctx(desc);
	62
	63	sctx->state[0] = SHA256_H0;
	64	sctx->state[1] = SHA256_H1;
65	sctx->state[2] = SHA256_H2;
66	sctx->state[3] = SHA256_H3;
67	sctx->state[4] = SHA256_H4;
68	sctx->state[5] = SHA256_H5;
69	sctx->state[6] = SHA256_H6;
70	sctx->state[7] = SHA256_H7;
71	sctx->count = 0;
72
73	return 0;
74	}
75
76	static int __sha256_ssse3_update(struct shash_desc desc, const u8 data,
77	unsigned int len, unsigned int partial)
78	{
79	struct sha256_state *sctx = shash_desc_ctx(desc);
80	unsigned int done = 0;
81
82	sctx->count += len;
83
84	if (partial) {
85	done = SHA256_BLOCK_SIZE - partial;
86	memcpy(sctx->buf + partial, data, done);
87	sha256_transform_asm(sctx->buf, sctx->state, 1);
88	}
89
90	if (len - done >= SHA256_BLOCK_SIZE) {
91	const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
92
93	sha256_transform_asm(data + done, sctx->state, (u64) rounds);
94
95	done += rounds * SHA256_BLOCK_SIZE;
96	}
97
98	memcpy(sctx->buf, data + done, len - done);
99
100	return 0;
101	}
102
103	static int sha256_ssse3_update(struct shash_desc desc, const u8 data,
104	unsigned int len)
105	{
106	struct sha256_state *sctx = shash_desc_ctx(desc);
107	unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
108	int res;
109
110	/* Handle the fast case right here */
111	if (partial + len < SHA256_BLOCK_SIZE) {
112	sctx->count += len;
113	memcpy(sctx->buf + partial, data, len);
114
115	return 0;
116	}
117
118	if (!irq_fpu_usable()) {
119	res = crypto_sha256_update(desc, data, len);
120	} else {
121	kernel_fpu_begin();
122	res = __sha256_ssse3_update(desc, data, len, partial);
123	kernel_fpu_end();
124	}
125
126	return res;
127	}
128
129
130	/* Add padding and return the message digest. */
131	static int sha256_ssse3_final(struct shash_desc desc, u8 out)
132	{
133	struct sha256_state *sctx = shash_desc_ctx(desc);
134	unsigned int i, index, padlen;
135	__be32 dst = (__be32 )out;
136	__be64 bits;
137	static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
138
139	bits = cpu_to_be64(sctx->count << 3);
140
141	/* Pad out to 56 mod 64 and append length */
142	index = sctx->count % SHA256_BLOCK_SIZE;
143	padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
144
145	if (!irq_fpu_usable()) {
146	crypto_sha256_update(desc, padding, padlen);
147	crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
148	} else {
149	kernel_fpu_begin();
150	/* We need to fill a whole block for __sha256_ssse3_update() */
151	if (padlen <= 56) {
152	sctx->count += padlen;
153	memcpy(sctx->buf + index, padding, padlen);
154	} else {
155	__sha256_ssse3_update(desc, padding, padlen, index);
156	}
157	__sha256_ssse3_update(desc, (const u8 *)&bits,
158	sizeof(bits), 56);
159	kernel_fpu_end();
160	}
161
162	/* Store state in digest */
163	for (i = 0; i < 8; i++)
164	dst[i] = cpu_to_be32(sctx->state[i]);
165
166	/* Wipe context */
167	memset(sctx, 0, sizeof(*sctx));
168
169	return 0;
170	}
171
172	static int sha256_ssse3_export(struct shash_desc desc, void out)
173	{
174	struct sha256_state *sctx = shash_desc_ctx(desc);
175
176	memcpy(out, sctx, sizeof(*sctx));
177
178	return 0;
179	}
180
181	static int sha256_ssse3_import(struct shash_desc desc, const void in)
182	{
183	struct sha256_state *sctx = shash_desc_ctx(desc);
184
185	memcpy(sctx, in, sizeof(*sctx));
186
187	return 0;
188	}
189
190	static struct shash_alg alg = {
191	.digestsize = SHA256_DIGEST_SIZE,
192	.init = sha256_ssse3_init,
193	.update = sha256_ssse3_update,
194	.final = sha256_ssse3_final,
195	.export = sha256_ssse3_export,
196	.import = sha256_ssse3_import,
197	.descsize = sizeof(struct sha256_state),
198	.statesize = sizeof(struct sha256_state),
199	.base = {
200	.cra_name = "sha256",
201	.cra_driver_name = "sha256-ssse3",
202	.cra_priority = 150,
203	.cra_flags = CRYPTO_ALG_TYPE_SHASH,
204	.cra_blocksize = SHA256_BLOCK_SIZE,
205	.cra_module = THIS_MODULE,
206	}
207	};
208
209	#ifdef CONFIG_AS_AVX
210	static bool __init avx_usable(void)
211	{
212	u64 xcr0;
213
214	if (!cpu_has_avx \|\| !cpu_has_osxsave)
215	return false;
216
217	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
218	if ((xcr0 & (XSTATE_SSE \| XSTATE_YMM)) != (XSTATE_SSE \| XSTATE_YMM)) {
219	pr_info("AVX detected but unusable.\n");
220
221	return false;
222	}
223
224	return true;
225	}
226	#endif
227
228	static int __init sha256_ssse3_mod_init(void)
229	{
230	/* test for SSE3 first */
231	if (cpu_has_ssse3)
232	sha256_transform_asm = sha256_transform_ssse3;
233
234	#ifdef CONFIG_AS_AVX
235	/* allow AVX to override SSSE3, it's a little faster */
236	if (avx_usable()) {
237	#ifdef CONFIG_AS_AVX2
238	if (boot_cpu_has(X86_FEATURE_AVX2))
239	sha256_transform_asm = sha256_transform_rorx;
240	else
241	#endif
242	sha256_transform_asm = sha256_transform_avx;
243	}
244	#endif
245
246	if (sha256_transform_asm) {
247	#ifdef CONFIG_AS_AVX
248	if (sha256_transform_asm == sha256_transform_avx)
249	pr_info("Using AVX optimized SHA-256 implementation\n");
250	#ifdef CONFIG_AS_AVX2
251	else if (sha256_transform_asm == sha256_transform_rorx)
252	pr_info("Using AVX2 optimized SHA-256 implementation\n");
253	#endif
254	else
255	#endif
256	pr_info("Using SSSE3 optimized SHA-256 implementation\n");
257	return crypto_register_shash(&alg);
258	}
259	pr_info("Neither AVX nor SSSE3 is available/usable.\n");
260
261	return -ENODEV;
262	}
263
264	static void __exit sha256_ssse3_mod_fini(void)
265	{
266	crypto_unregister_shash(&alg);
267	}
268
269	module_init(sha256_ssse3_mod_init);
270	module_exit(sha256_ssse3_mod_fini);
271
272	MODULE_LICENSE("GPL");
273	MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
274
275	MODULE_ALIAS("sha256");