[mirror_ubuntu-focal-kernel.git] / arch / mips / math-emu / sp_maddf.c

/*
 * IEEE754 floating point arithmetic
 * single precision: MADDF.f (Fused Multiply Add)
 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
 *
 * MIPS floating point support
 * Copyright (C) 2015 Imagination Technologies, Ltd.
 * Author: Markos Chandras <markos.chandras@imgtec.com>
 *
 *  This program is free software; you can distribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the
 *  Free Software Foundation; version 2 of the License.
 */

#include "ieee754sp.h"


static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
				 union ieee754sp y, enum maddf_flags flags)
{
	int re;
	int rs;
	unsigned rm;
	uint64_t rm64;
	uint64_t zm64;
	int s;

	COMPXSP;
	COMPYSP;
	COMPZSP;

	EXPLODEXSP;
	EXPLODEYSP;
	EXPLODEZSP;

	FLUSHXSP;
	FLUSHYSP;
	FLUSHZSP;

	ieee754_clearcx();

	/*
	 * Handle the cases when at least one of x, y or z is a NaN.
	 * Order of precedence is sNaN, qNaN and z, x, y.
	 */
	if (zc == IEEE754_CLASS_SNAN)
		return ieee754sp_nanxcpt(z);
	if (xc == IEEE754_CLASS_SNAN)
		return ieee754sp_nanxcpt(x);
	if (yc == IEEE754_CLASS_SNAN)
		return ieee754sp_nanxcpt(y);
	if (zc == IEEE754_CLASS_QNAN)
		return z;
	if (xc == IEEE754_CLASS_QNAN)
		return x;
	if (yc == IEEE754_CLASS_QNAN)
		return y;

	if (zc == IEEE754_CLASS_DNORM)
		SPDNORMZ;
	/* ZERO z cases are handled separately below */

	switch (CLPAIR(xc, yc)) {


	/*
	 * Infinity handling
	 */
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
		ieee754_setcx(IEEE754_INVALID_OPERATION);
		return ieee754sp_indef();

	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
		if ((zc == IEEE754_CLASS_INF) &&
		    ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
		     ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
			/*
			 * Cases of addition of infinities with opposite signs
			 * or subtraction of infinities with same signs.
			 */
			ieee754_setcx(IEEE754_INVALID_OPERATION);
			return ieee754sp_indef();
		}
		/*
		 * z is here either not an infinity, or an infinity having the
		 * same sign as product (x*y) (in case of MADDF.D instruction)
		 * or product -(x*y) (in MSUBF.D case). The result must be an
		 * infinity, and its sign is determined only by the value of
		 * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
		 */
		if (flags & MADDF_NEGATE_PRODUCT)
			return ieee754sp_inf(1 ^ (xs ^ ys));
		else
			return ieee754sp_inf(xs ^ ys);

	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
		if (zc == IEEE754_CLASS_INF)
			return ieee754sp_inf(zs);
		if (zc == IEEE754_CLASS_ZERO) {
			/* Handle cases +0 + (-0) and similar ones. */
			if ((!(flags & MADDF_NEGATE_PRODUCT)
					&& (zs == (xs ^ ys))) ||
			    ((flags & MADDF_NEGATE_PRODUCT)
					&& (zs != (xs ^ ys))))
				/*
				 * Cases of addition of zeros of equal signs
				 * or subtraction of zeroes of opposite signs.
				 * The sign of the resulting zero is in any
				 * such case determined only by the sign of z.
				 */
				return z;

			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
		}
		/* x*y is here 0, and z is not 0, so just return z */
		return z;

	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
		SPDNORMX;

	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
		if (zc == IEEE754_CLASS_INF)
			return ieee754sp_inf(zs);
		SPDNORMY;
		break;

	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
		if (zc == IEEE754_CLASS_INF)
			return ieee754sp_inf(zs);
		SPDNORMX;
		break;

	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
		if (zc == IEEE754_CLASS_INF)
			return ieee754sp_inf(zs);
		/* fall through to real computations */
	}

	/* Finally get to do some computation */

	/*
	 * Do the multiplication bit first
	 *
	 * rm = xm * ym, re = xe + ye basically
	 *
	 * At this point xm and ym should have been normalized.
	 */

	/* rm = xm * ym, re = xe+ye basically */
	assert(xm & SP_HIDDEN_BIT);
	assert(ym & SP_HIDDEN_BIT);

	re = xe + ye;
	rs = xs ^ ys;
	if (flags & MADDF_NEGATE_PRODUCT)
		rs ^= 1;

	/* Multiple 24 bit xm and ym to give 48 bit results */
	rm64 = (uint64_t)xm * ym;

	/* Shunt to top of word */
	rm64 = rm64 << 16;

	/* Put explicit bit at bit 62 if necessary */
	if ((int64_t) rm64 < 0) {
		rm64 = rm64 >> 1;
		re++;
	}

	assert(rm64 & (1 << 62));

	if (zc == IEEE754_CLASS_ZERO) {
		/*
		 * Move explicit bit from bit 62 to bit 26 since the
		 * ieee754sp_format code expects the mantissa to be
		 * 27 bits wide (24 + 3 rounding bits).
		 */
		rm = XSPSRS64(rm64, (62 - 26));
		return ieee754sp_format(rs, re, rm);
	}

	/* Move explicit bit from bit 23 to bit 62 */
	zm64 = (uint64_t)zm << (62 - 23);
	assert(zm64 & (1 << 62));

	/* Make the exponents the same */
	if (ze > re) {
		/*
		 * Have to shift r fraction right to align.
		 */
		s = ze - re;
		rm64 = XSPSRS64(rm64, s);
		re += s;
	} else if (re > ze) {
		/*
		 * Have to shift z fraction right to align.
		 */
		s = re - ze;
		zm64 = XSPSRS64(zm64, s);
		ze += s;
	}
	assert(ze == re);
	assert(ze <= SP_EMAX);

	/* Do the addition */
	if (zs == rs) {
		/*
		 * Generate 64 bit result by adding two 63 bit numbers
		 * leaving result in zm64, zs and ze.
		 */
		zm64 = zm64 + rm64;
		if ((int64_t)zm64 < 0) {	/* carry out */
			zm64 = XSPSRS1(zm64);
			ze++;
		}
	} else {
		if (zm64 >= rm64) {
			zm64 = zm64 - rm64;
		} else {
			zm64 = rm64 - zm64;
			zs = rs;
		}
		if (zm64 == 0)
			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);

		/*
		 * Put explicit bit at bit 62 if necessary.
		 */
		while ((zm64 >> 62) == 0) {
			zm64 <<= 1;
			ze--;
		}
	}

	/*
	 * Move explicit bit from bit 62 to bit 26 since the
	 * ieee754sp_format code expects the mantissa to be
	 * 27 bits wide (24 + 3 rounding bits).
	 */
	zm = XSPSRS64(zm64, (62 - 26));

	return ieee754sp_format(zs, ze, zm);
}

union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
				union ieee754sp y)
{
	return _sp_maddf(z, x, y, 0);
}

union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
				union ieee754sp y)
{
	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
}
Commit	Line	Data
e24c3bec MC	1	/*
	2	* IEEE754 floating point arithmetic
	3	* single precision: MADDF.f (Fused Multiply Add)
	4	* MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
	5	*
	6	* MIPS floating point support
	7	* Copyright (C) 2015 Imagination Technologies, Ltd.
	8	* Author: Markos Chandras <markos.chandras@imgtec.com>
	9	*
	10	* This program is free software; you can distribute it and/or modify it
	11	* under the terms of the GNU General Public License as published by the
	12	* Free Software Foundation; version 2 of the License.
	13	*/
	14
	15	#include "ieee754sp.h"
	16
6162051e PB	17
	18	static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
	19	union ieee754sp y, enum maddf_flags flags)
e24c3bec MC	20	{
	21	int re;
	22	int rs;
	23	unsigned rm;
b3b8e1eb DL	24	uint64_t rm64;
b3b8e1eb DL	25	uint64_t zm64;
e24c3bec MC	26	int s;
	27
	28	COMPXSP;
	29	COMPYSP;
e2d11e1a	30	COMPZSP;
e24c3bec MC	31
	32	EXPLODEXSP;
	33	EXPLODEYSP;
e2d11e1a	34	EXPLODEZSP;
e24c3bec MC	35
	36	FLUSHXSP;
	37	FLUSHYSP;
e2d11e1a	38	FLUSHZSP;
e24c3bec MC	39
	40	ieee754_clearcx();
	41
e840be6e AM	42	/*
	43	* Handle the cases when at least one of x, y or z is a NaN.
	44	* Order of precedence is sNaN, qNaN and z, x, y.
	45	*/
	46	if (zc == IEEE754_CLASS_SNAN)
e24c3bec	47	return ieee754sp_nanxcpt(z);
e840be6e AM	48	if (xc == IEEE754_CLASS_SNAN)
	49	return ieee754sp_nanxcpt(x);
	50	if (yc == IEEE754_CLASS_SNAN)
e24c3bec	51	return ieee754sp_nanxcpt(y);
e840be6e AM	52	if (zc == IEEE754_CLASS_QNAN)
	53	return z;
	54	if (xc == IEEE754_CLASS_QNAN)
	55	return x;
	56	if (yc == IEEE754_CLASS_QNAN)
	57	return y;
e24c3bec	58
e840be6e AM	59	if (zc == IEEE754_CLASS_DNORM)
	60	SPDNORMZ;
	61	/* ZERO z cases are handled separately below */
e24c3bec	62
e840be6e	63	switch (CLPAIR(xc, yc)) {
e24c3bec	64
e24c3bec MC	65
	66	/*
	67	* Infinity handling
	68	*/
	69	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
	70	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
e24c3bec MC	71	ieee754_setcx(IEEE754_INVALID_OPERATION);
	72	return ieee754sp_indef();
	73
	74	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
	75	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
	76	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
	77	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
	78	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
0c64fe63	79	if ((zc == IEEE754_CLASS_INF) &&
ae11c061 AM	80	((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) \|\|
ae11c061 AM	81	((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
0c64fe63 AM	82	/*
	83	* Cases of addition of infinities with opposite signs
	84	* or subtraction of infinities with same signs.
	85	*/
	86	ieee754_setcx(IEEE754_INVALID_OPERATION);
	87	return ieee754sp_indef();
	88	}
	89	/*
	90	* z is here either not an infinity, or an infinity having the
	91	* same sign as product (x*y) (in case of MADDF.D instruction)
	92	* or product -(x*y) (in MSUBF.D case). The result must be an
	93	* infinity, and its sign is determined only by the value of
ae11c061	94	* (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
0c64fe63	95	*/
ae11c061	96	if (flags & MADDF_NEGATE_PRODUCT)
0c64fe63 AM	97	return ieee754sp_inf(1 ^ (xs ^ ys));
	98	else
	99	return ieee754sp_inf(xs ^ ys);
e24c3bec MC	100
	101	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
	102	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
	103	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
	104	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
	105	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
	106	if (zc == IEEE754_CLASS_INF)
	107	return ieee754sp_inf(zs);
7cf64ce4 AM	108	if (zc == IEEE754_CLASS_ZERO) {
7cf64ce4 AM	109	/* Handle cases +0 + (-0) and similar ones. */
ae11c061	110	if ((!(flags & MADDF_NEGATE_PRODUCT)
7cf64ce4	111	&& (zs == (xs ^ ys))) \|\|
ae11c061	112	((flags & MADDF_NEGATE_PRODUCT)
7cf64ce4 AM	113	&& (zs != (xs ^ ys))))
	114	/*
	115	* Cases of addition of zeros of equal signs
	116	* or subtraction of zeroes of opposite signs.
	117	* The sign of the resulting zero is in any
	118	* such case determined only by the sign of z.
	119	*/
	120	return z;
	121
	122	return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
	123	}
	124	/* xy is here 0, and z is not 0, so just return z /
e24c3bec MC	125	return z;
	126
	127	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
	128	SPDNORMX;
	129
	130	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
e840be6e	131	if (zc == IEEE754_CLASS_INF)
e24c3bec MC	132	return ieee754sp_inf(zs);
	133	SPDNORMY;
	134	break;
	135
	136	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
e840be6e	137	if (zc == IEEE754_CLASS_INF)
e24c3bec MC	138	return ieee754sp_inf(zs);
	139	SPDNORMX;
	140	break;
	141
	142	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
e840be6e	143	if (zc == IEEE754_CLASS_INF)
e24c3bec MC	144	return ieee754sp_inf(zs);
	145	/* fall through to real computations */
	146	}
	147
	148	/* Finally get to do some computation */
	149
	150	/*
	151	* Do the multiplication bit first
	152	*
	153	* rm = xm * ym, re = xe + ye basically
	154	*
	155	* At this point xm and ym should have been normalized.
	156	*/
	157
	158	/* rm = xm * ym, re = xe+ye basically */
	159	assert(xm & SP_HIDDEN_BIT);
	160	assert(ym & SP_HIDDEN_BIT);
	161
	162	re = xe + ye;
	163	rs = xs ^ ys;
ae11c061	164	if (flags & MADDF_NEGATE_PRODUCT)
6162051e	165	rs ^= 1;
e24c3bec	166
b3b8e1eb DL	167	/* Multiple 24 bit xm and ym to give 48 bit results */
b3b8e1eb DL	168	rm64 = (uint64_t)xm * ym;
e24c3bec	169
b3b8e1eb DL	170	/* Shunt to top of word */
b3b8e1eb DL	171	rm64 = rm64 << 16;
e24c3bec	172
b3b8e1eb DL	173	/* Put explicit bit at bit 62 if necessary */
	174	if ((int64_t) rm64 < 0) {
	175	rm64 = rm64 >> 1;
e24c3bec	176	re++;
e24c3bec	177	}
e24c3bec	178
b3b8e1eb	179	assert(rm64 & (1 << 62));
e24c3bec	180
b3b8e1eb DL	181	if (zc == IEEE754_CLASS_ZERO) {
	182	/*
	183	* Move explicit bit from bit 62 to bit 26 since the
	184	* ieee754sp_format code expects the mantissa to be
	185	* 27 bits wide (24 + 3 rounding bits).
	186	*/
	187	rm = XSPSRS64(rm64, (62 - 26));
	188	return ieee754sp_format(rs, re, rm);
	189	}
e24c3bec	190
b3b8e1eb DL	191	/* Move explicit bit from bit 23 to bit 62 */
	192	zm64 = (uint64_t)zm << (62 - 23);
	193	assert(zm64 & (1 << 62));
e24c3bec	194
b3b8e1eb	195	/* Make the exponents the same */
e24c3bec MC	196	if (ze > re) {
e24c3bec MC	197	/*
db57f29d	198	* Have to shift r fraction right to align.
e24c3bec MC	199	*/
e24c3bec MC	200	s = ze - re;
b3b8e1eb	201	rm64 = XSPSRS64(rm64, s);
db57f29d	202	re += s;
e24c3bec MC	203	} else if (re > ze) {
e24c3bec MC	204	/*
db57f29d	205	* Have to shift z fraction right to align.
e24c3bec MC	206	*/
e24c3bec MC	207	s = re - ze;
b3b8e1eb	208	zm64 = XSPSRS64(zm64, s);
db57f29d	209	ze += s;
e24c3bec MC	210	}
	211	assert(ze == re);
	212	assert(ze <= SP_EMAX);
	213
b3b8e1eb	214	/* Do the addition */
e24c3bec MC	215	if (zs == rs) {
e24c3bec MC	216	/*
b3b8e1eb DL	217	* Generate 64 bit result by adding two 63 bit numbers
b3b8e1eb DL	218	* leaving result in zm64, zs and ze.
e24c3bec	219	*/
b3b8e1eb DL	220	zm64 = zm64 + rm64;
	221	if ((int64_t)zm64 < 0) { /* carry out */
	222	zm64 = XSPSRS1(zm64);
db57f29d	223	ze++;
e24c3bec MC	224	}
e24c3bec MC	225	} else {
b3b8e1eb DL	226	if (zm64 >= rm64) {
b3b8e1eb DL	227	zm64 = zm64 - rm64;
e24c3bec	228	} else {
b3b8e1eb	229	zm64 = rm64 - zm64;
e24c3bec MC	230	zs = rs;
e24c3bec MC	231	}
b3b8e1eb	232	if (zm64 == 0)
e24c3bec MC	233	return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
	234
	235	/*
b3b8e1eb	236	* Put explicit bit at bit 62 if necessary.
e24c3bec	237	*/
b3b8e1eb DL	238	while ((zm64 >> 62) == 0) {
b3b8e1eb DL	239	zm64 <<= 1;
e24c3bec MC	240	ze--;
e24c3bec MC	241	}
e24c3bec	242	}
b3b8e1eb DL	243
	244	/*
	245	* Move explicit bit from bit 62 to bit 26 since the
	246	* ieee754sp_format code expects the mantissa to be
	247	* 27 bits wide (24 + 3 rounding bits).
	248	*/
	249	zm = XSPSRS64(zm64, (62 - 26));
	250
e24c3bec MC	251	return ieee754sp_format(zs, ze, zm);
e24c3bec MC	252	}
6162051e PB	253
	254	union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
	255	union ieee754sp y)
	256	{
	257	return _sp_maddf(z, x, y, 0);
	258	}
	259
	260	union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
	261	union ieee754sp y)
	262	{
ae11c061	263	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
6162051e	264	}