[rustc.git] / vendor / compiler_builtins / compiler-rt / lib / builtins / hexagon / dfdiv.S

//===----------------------Hexagon builtin routine ------------------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

/* Double Precision Divide */

#define A r1:0
#define AH r1
#define AL r0

#define B r3:2
#define BH r3
#define BL r2

#define Q r5:4
#define QH r5
#define QL r4

#define PROD r7:6
#define PRODHI r7
#define PRODLO r6

#define SFONE r8
#define SFDEN r9
#define SFERROR r10
#define SFRECIP r11

#define EXPBA r13:12
#define EXPB r13
#define EXPA r12

#define REMSUB2 r15:14


#define SIGN r28

#define Q_POSITIVE p3
#define NORMAL p2
#define NO_OVF_UNF p1
#define P_TMP p0

#define RECIPEST_SHIFT 3
#define QADJ 61

#define DFCLASS_NORMAL 0x02
#define DFCLASS_NUMBER 0x0F
#define DFCLASS_INFINITE 0x08
#define DFCLASS_ZERO 0x01
#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)

#define DF_MANTBITS 52
#define DF_EXPBITS 11
#define SF_MANTBITS 23
#define SF_EXPBITS 8
#define DF_BIAS 0x3ff

#define SR_ROUND_OFF 22

#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG

	.text
	.global __hexagon_divdf3
	.type __hexagon_divdf3,@function
	Q6_ALIAS(divdf3)
        FAST_ALIAS(divdf3)
        FAST2_ALIAS(divdf3)
	.p2align 5
__hexagon_divdf3:
	{
		NORMAL = dfclass(A,#DFCLASS_NORMAL)
		NORMAL = dfclass(B,#DFCLASS_NORMAL)
		EXPBA = combine(BH,AH)
		SIGN = xor(AH,BH)
	}
#undef A
#undef AH
#undef AL
#undef B
#undef BH
#undef BL
#define REM r1:0
#define REMHI r1
#define REMLO r0
#define DENOM r3:2
#define DENOMHI r3
#define DENOMLO r2
	{
		if (!NORMAL) jump .Ldiv_abnormal
		PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
		SFONE = ##0x3f800001
	}
	{
		SFDEN = or(SFONE,PRODLO)
		EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
		EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
		Q_POSITIVE = cmp.gt(SIGN,#-1)
	}
#undef SIGN
#define ONE r28
.Ldenorm_continue:
	{
		SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
		SFERROR = and(SFONE,#-2)
		ONE = #1
		EXPA = sub(EXPA,EXPB)
	}
#undef EXPB
#define RECIPEST r13
	{
		SFERROR -= sfmpy(SFRECIP,SFDEN):lib
		REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
		RECIPEST = ##0x00800000 << RECIPEST_SHIFT
	}
	{
		SFRECIP += sfmpy(SFRECIP,SFERROR):lib
		DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
		SFERROR = and(SFONE,#-2)
	}
	{
		SFERROR -= sfmpy(SFRECIP,SFDEN):lib
		QH = #-DF_BIAS+1
		QL = #DF_BIAS-1
	}
	{
		SFRECIP += sfmpy(SFRECIP,SFERROR):lib
		NO_OVF_UNF = cmp.gt(EXPA,QH)
		NO_OVF_UNF = !cmp.gt(EXPA,QL)
	}
	{
		RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
		Q = #0
		EXPA = add(EXPA,#-QADJ)
	}
#undef SFERROR
#undef SFRECIP
#define TMP r10
#define TMP1 r11
	{
		RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
	}

#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
	{ \
		PROD = mpyu(RECIPEST,REMHI); \
		REM = asl(REM,# ## ( REMSHIFT )); \
	}; \
	{ \
		PRODLO = # ## 0; \
		REM -= mpyu(PRODHI,DENOMLO); \
		REMSUB2 = mpyu(PRODHI,DENOMHI); \
	}; \
	{ \
		Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
		REM -= asl(REMSUB2, # ## 32); \
		EXTRA \
	}


	DIV_ITER1B(ASL,14,15,)
	DIV_ITER1B(ASR,1,15,)
	DIV_ITER1B(ASR,16,15,)
	DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)

#undef REMSUB2
#define TMPPAIR r15:14
#define TMPPAIRHI r15
#define TMPPAIRLO r14
#undef RECIPEST
#define EXPB r13
	{
		// compare or sub with carry
		TMPPAIR = sub(REM,DENOM)
		P_TMP = cmp.gtu(DENOM,REM)
		// set up amt to add to q
		if (!P_TMP.new) PRODLO  = #2
	}
	{
		Q = add(Q,PROD)
		if (!P_TMP) REM = TMPPAIR
		TMPPAIR = #0
	}
	{
		P_TMP = cmp.eq(REM,TMPPAIR)
		if (!P_TMP.new) QL = or(QL,ONE)
	}
	{
		PROD = neg(Q)
	}
	{
		if (!Q_POSITIVE) Q = PROD
	}
#undef REM
#undef REMHI
#undef REMLO
#undef DENOM
#undef DENOMLO
#undef DENOMHI
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
	{
		A = convert_d2df(Q)
		if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
	}
	{
		AH += asl(EXPA,#DF_MANTBITS-32)
		jumpr r31
	}

.Ldiv_ovf_unf:
	{
		AH += asl(EXPA,#DF_MANTBITS-32)
		EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
	}
	{
		PROD = abs(Q)
		EXPA = add(EXPA,EXPB)
	}
	{
		P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS)		// overflow
		if (P_TMP.new) jump:nt .Ldiv_ovf
	}
	{
		P_TMP = cmp.gt(EXPA,#0)
		if (P_TMP.new) jump:nt .Lpossible_unf		// round up to normal possible...
	}
	/* Underflow */
	/* We know what the infinite range exponent should be (EXPA) */
	/* Q is 2's complement, PROD is abs(Q) */
	/* Normalize Q, shift right, add a high bit, convert, change exponent */

#define FUDGE1 7	// how much to shift right
#define FUDGE2 4	// how many guard/round to keep at lsbs

	{
		EXPB = add(clb(PROD),#-1)			// doesn't need to be added in since
		EXPA = sub(#FUDGE1,EXPA)			// we extract post-converted exponent
		TMP = USR
		TMP1 = #63
	}
	{
		EXPB = min(EXPA,TMP1)
		TMP1 = or(TMP,#0x030)
		PROD = asl(PROD,EXPB)
		EXPA = #0
	}
	{
		TMPPAIR = extractu(PROD,EXPBA)				// bits that will get shifted out
		PROD = lsr(PROD,EXPB)					// shift out bits
		B = #1
	}
	{
		P_TMP = cmp.gtu(B,TMPPAIR)
		if (!P_TMP.new) PRODLO = or(BL,PRODLO)
		PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
	}
	{
		Q = neg(PROD)
		P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
		if (!P_TMP.new) TMP = TMP1
	}
	{
		USR = TMP
		if (Q_POSITIVE) Q = PROD
		TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
	}
	{
		A = convert_d2df(Q)
	}
	{
		AH += asl(TMP,#DF_MANTBITS-32)
		jumpr r31
	}


.Lpossible_unf:
	/* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */
	/* The answer is correct, but we need to raise Underflow */
	{
		B = extractu(A,#63,#0)
		TMPPAIR = combine(##0x00100000,#0)		// min normal
		TMP = #0x7FFF
	}
	{
		P_TMP = dfcmp.eq(TMPPAIR,B)		// Is everything zero in the rounded value...
		P_TMP = bitsset(PRODHI,TMP)		// but a bunch of bits set in the unrounded abs(quotient)?
	}

#if (__HEXAGON_ARCH__ == 60)
		TMP = USR		// If not, just return
		if (!P_TMP) jumpr r31   // Else, we want to set Unf+Inexact
					// Note that inexact is already set...
#else
	{
		if (!P_TMP) jumpr r31			// If not, just return
		TMP = USR				// Else, we want to set Unf+Inexact
	}						// Note that inexact is already set...
#endif
	{
		TMP = or(TMP,#0x30)
	}
	{
		USR = TMP
	}
	{
		p0 = dfcmp.eq(A,A)
		jumpr r31
	}

.Ldiv_ovf:
	/*
	 * Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
	 */
	{
		TMP = USR
		B = combine(##0x7fefffff,#-1)
		AH = mux(Q_POSITIVE,#0,#-1)
	}
	{
		PROD = combine(##0x7ff00000,#0)
		QH = extractu(TMP,#2,#SR_ROUND_OFF)
		TMP = or(TMP,#0x28)
	}
	{
		USR = TMP
		QH ^= lsr(AH,#31)
		QL = QH
	}
	{
		p0 = !cmp.eq(QL,#1)		// if not round-to-zero
		p0 = !cmp.eq(QH,#2)		// and not rounding the other way
		if (p0.new) B = PROD		// go to inf
		p0 = dfcmp.eq(B,B)		// get exceptions
	}
	{
		A = insert(B,#63,#0)
		jumpr r31
	}

#undef ONE
#define SIGN r28
#undef NORMAL
#undef NO_OVF_UNF
#define P_INF p1
#define P_ZERO p2
.Ldiv_abnormal:
	{
		P_TMP = dfclass(A,#DFCLASS_NUMBER)
		P_TMP = dfclass(B,#DFCLASS_NUMBER)
		Q_POSITIVE = cmp.gt(SIGN,#-1)
	}
	{
		P_INF = dfclass(A,#DFCLASS_INFINITE)
		P_INF = dfclass(B,#DFCLASS_INFINITE)
	}
	{
		P_ZERO = dfclass(A,#DFCLASS_ZERO)
		P_ZERO = dfclass(B,#DFCLASS_ZERO)
	}
	{
		if (!P_TMP) jump .Ldiv_nan
		if (P_INF) jump .Ldiv_invalid
	}
	{
		if (P_ZERO) jump .Ldiv_invalid
	}
	{
		P_ZERO = dfclass(A,#DFCLASS_NONZERO)		// nonzero
		P_ZERO = dfclass(B,#DFCLASS_NONINFINITE)	// non-infinite
	}
	{
		P_INF = dfclass(A,#DFCLASS_NONINFINITE)	// non-infinite
		P_INF = dfclass(B,#DFCLASS_NONZERO)	// nonzero
	}
	{
		if (!P_ZERO) jump .Ldiv_zero_result
		if (!P_INF) jump .Ldiv_inf_result
	}
	/* Now we've narrowed it down to (de)normal / (de)normal */
	/* Set up A/EXPA B/EXPB and go back */
#undef P_ZERO
#undef P_INF
#define P_TMP2 p1
	{
		P_TMP = dfclass(A,#DFCLASS_NORMAL)
		P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
		TMP = ##0x00100000
	}
	{
		EXPBA = combine(BH,AH)
		AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)		// clear out hidden bit, sign bit
		BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32)		// clear out hidden bit, sign bit
	}
	{
		if (P_TMP) AH = or(AH,TMP)				// if normal, add back in hidden bit
		if (P_TMP2) BH = or(BH,TMP)				// if normal, add back in hidden bit
	}
	{
		QH = add(clb(A),#-DF_EXPBITS)
		QL = add(clb(B),#-DF_EXPBITS)
		TMP = #1
	}
	{
		EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
		EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
	}
	{
		A = asl(A,QH)
		B = asl(B,QL)
		if (!P_TMP) EXPA = sub(TMP,QH)
		if (!P_TMP2) EXPB = sub(TMP,QL)
	}	// recreate values needed by resume coke
	{
		PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
	}
	{
		SFDEN = or(SFONE,PRODLO)
		jump .Ldenorm_continue
	}

.Ldiv_zero_result:
	{
		AH = xor(AH,BH)
		B = #0
	}
	{
		A = insert(B,#63,#0)
		jumpr r31
	}
.Ldiv_inf_result:
	{
		p2 = dfclass(B,#DFCLASS_ZERO)
		p2 = dfclass(A,#DFCLASS_NONINFINITE)
	}
	{
		TMP = USR
		if (!p2) jump 1f
		AH = xor(AH,BH)
	}
	{
		TMP = or(TMP,#0x04)		// DBZ
	}
	{
		USR = TMP
	}
1:
	{
		B = combine(##0x7ff00000,#0)
		p0 = dfcmp.uo(B,B)		// take possible exception
	}
	{
		A = insert(B,#63,#0)
		jumpr r31
	}
.Ldiv_nan:
	{
		p0 = dfclass(A,#0x10)
		p1 = dfclass(B,#0x10)
		if (!p0.new) A = B
		if (!p1.new) B = A
	}
	{
		QH = convert_df2sf(A)	// get possible invalid exceptions
		QL = convert_df2sf(B)
	}
	{
		A = #-1
		jumpr r31
	}

.Ldiv_invalid:
	{
		TMP = ##0x7f800001
	}
	{
		A = convert_sf2df(TMP)		// get invalid, get DF qNaN
		jumpr r31
	}
END(__hexagon_divdf3)
Commit	Line	Data
8faf50e0 XL	1	//===----------------------Hexagon builtin routine ------------------------===//
	2	//
	3	// The LLVM Compiler Infrastructure
	4	//
	5	// This file is dual licensed under the MIT and the University of Illinois Open
	6	// Source Licenses. See LICENSE.TXT for details.
	7	//
	8	//===----------------------------------------------------------------------===//
	9
	10	/* Double Precision Divide */
	11
	12	#define A r1:0
	13	#define AH r1
	14	#define AL r0
	15
	16	#define B r3:2
	17	#define BH r3
	18	#define BL r2
	19
	20	#define Q r5:4
	21	#define QH r5
	22	#define QL r4
	23
	24	#define PROD r7:6
	25	#define PRODHI r7
	26	#define PRODLO r6
	27
	28	#define SFONE r8
	29	#define SFDEN r9
	30	#define SFERROR r10
	31	#define SFRECIP r11
	32
	33	#define EXPBA r13:12
	34	#define EXPB r13
	35	#define EXPA r12
	36
	37	#define REMSUB2 r15:14
	38
	39
	40
	41	#define SIGN r28
	42
	43	#define Q_POSITIVE p3
	44	#define NORMAL p2
	45	#define NO_OVF_UNF p1
	46	#define P_TMP p0
	47
	48	#define RECIPEST_SHIFT 3
	49	#define QADJ 61
	50
	51	#define DFCLASS_NORMAL 0x02
	52	#define DFCLASS_NUMBER 0x0F
	53	#define DFCLASS_INFINITE 0x08
	54	#define DFCLASS_ZERO 0x01
	55	#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
	56	#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
	57
	58	#define DF_MANTBITS 52
	59	#define DF_EXPBITS 11
	60	#define SF_MANTBITS 23
	61	#define SF_EXPBITS 8
	62	#define DF_BIAS 0x3ff
	63
	64	#define SR_ROUND_OFF 22
65
66	#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
67	#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
68	#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
69	#define END(TAG) .size TAG,.-TAG
70
71	.text
72	.global __hexagon_divdf3
73	.type __hexagon_divdf3,@function
74	Q6_ALIAS(divdf3)
75	FAST_ALIAS(divdf3)
76	FAST2_ALIAS(divdf3)
77	.p2align 5
78	__hexagon_divdf3:
79	{
80	NORMAL = dfclass(A,#DFCLASS_NORMAL)
81	NORMAL = dfclass(B,#DFCLASS_NORMAL)
82	EXPBA = combine(BH,AH)
83	SIGN = xor(AH,BH)
84	}
85	#undef A
86	#undef AH
87	#undef AL
88	#undef B
89	#undef BH
90	#undef BL
91	#define REM r1:0
92	#define REMHI r1
93	#define REMLO r0
94	#define DENOM r3:2
95	#define DENOMHI r3
96	#define DENOMLO r2
97	{
98	if (!NORMAL) jump .Ldiv_abnormal
99	PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
100	SFONE = ##0x3f800001
101	}
102	{
103	SFDEN = or(SFONE,PRODLO)
104	EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
105	EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
106	Q_POSITIVE = cmp.gt(SIGN,#-1)
107	}
108	#undef SIGN
109	#define ONE r28
110	.Ldenorm_continue:
111	{
112	SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
113	SFERROR = and(SFONE,#-2)
114	ONE = #1
115	EXPA = sub(EXPA,EXPB)
116	}
117	#undef EXPB
118	#define RECIPEST r13
119	{
120	SFERROR -= sfmpy(SFRECIP,SFDEN):lib
121	REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
122	RECIPEST = ##0x00800000 << RECIPEST_SHIFT
123	}
124	{
125	SFRECIP += sfmpy(SFRECIP,SFERROR):lib
126	DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
127	SFERROR = and(SFONE,#-2)
128	}
129	{
130	SFERROR -= sfmpy(SFRECIP,SFDEN):lib
131	QH = #-DF_BIAS+1
132	QL = #DF_BIAS-1
133	}
134	{
135	SFRECIP += sfmpy(SFRECIP,SFERROR):lib
136	NO_OVF_UNF = cmp.gt(EXPA,QH)
137	NO_OVF_UNF = !cmp.gt(EXPA,QL)
138	}
139	{
140	RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
141	Q = #0
142	EXPA = add(EXPA,#-QADJ)
143	}
144	#undef SFERROR
145	#undef SFRECIP
146	#define TMP r10
147	#define TMP1 r11
148	{
149	RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
150	}
151
152	#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
153	{ \
154	PROD = mpyu(RECIPEST,REMHI); \
155	REM = asl(REM,# ## ( REMSHIFT )); \
156	}; \
157	{ \
158	PRODLO = # ## 0; \
159	REM -= mpyu(PRODHI,DENOMLO); \
160	REMSUB2 = mpyu(PRODHI,DENOMHI); \
161	}; \
162	{ \
163	Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
164	REM -= asl(REMSUB2, # ## 32); \
165	EXTRA \
166	}
167
168
169	DIV_ITER1B(ASL,14,15,)
170	DIV_ITER1B(ASR,1,15,)
171	DIV_ITER1B(ASR,16,15,)
172	DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
173
174	#undef REMSUB2
175	#define TMPPAIR r15:14
176	#define TMPPAIRHI r15
177	#define TMPPAIRLO r14
178	#undef RECIPEST
179	#define EXPB r13
180	{
181	// compare or sub with carry
182	TMPPAIR = sub(REM,DENOM)
183	P_TMP = cmp.gtu(DENOM,REM)
184	// set up amt to add to q
185	if (!P_TMP.new) PRODLO = #2
186	}
187	{
188	Q = add(Q,PROD)
189	if (!P_TMP) REM = TMPPAIR
190	TMPPAIR = #0
191	}
192	{
193	P_TMP = cmp.eq(REM,TMPPAIR)
194	if (!P_TMP.new) QL = or(QL,ONE)
195	}
196	{
197	PROD = neg(Q)
198	}
199	{
200	if (!Q_POSITIVE) Q = PROD
201	}
202	#undef REM
203	#undef REMHI
204	#undef REMLO
205	#undef DENOM
206	#undef DENOMLO
207	#undef DENOMHI
208	#define A r1:0
209	#define AH r1
210	#define AL r0
211	#define B r3:2
212	#define BH r3
213	#define BL r2
214	{
215	A = convert_d2df(Q)
216	if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
217	}
218	{
219	AH += asl(EXPA,#DF_MANTBITS-32)
220	jumpr r31
221	}
222
223	.Ldiv_ovf_unf:
224	{
225	AH += asl(EXPA,#DF_MANTBITS-32)
226	EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
227	}
228	{
229	PROD = abs(Q)
230	EXPA = add(EXPA,EXPB)
231	}
232	{
233	P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow
234	if (P_TMP.new) jump:nt .Ldiv_ovf
235	}
236	{
237	P_TMP = cmp.gt(EXPA,#0)
238	if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible...
239	}
240	/* Underflow */
241	/* We know what the infinite range exponent should be (EXPA) */
242	/* Q is 2's complement, PROD is abs(Q) */
243	/* Normalize Q, shift right, add a high bit, convert, change exponent */
244
245	#define FUDGE1 7 // how much to shift right
246	#define FUDGE2 4 // how many guard/round to keep at lsbs
247
248	{
249	EXPB = add(clb(PROD),#-1) // doesn't need to be added in since
250	EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent
251	TMP = USR
252	TMP1 = #63
253	}
254	{
255	EXPB = min(EXPA,TMP1)
256	TMP1 = or(TMP,#0x030)
257	PROD = asl(PROD,EXPB)
258	EXPA = #0
259	}
260	{
261	TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out
262	PROD = lsr(PROD,EXPB) // shift out bits
263	B = #1
264	}
265	{
266	P_TMP = cmp.gtu(B,TMPPAIR)
267	if (!P_TMP.new) PRODLO = or(BL,PRODLO)
268	PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
269	}
270	{
271	Q = neg(PROD)
272	P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
273	if (!P_TMP.new) TMP = TMP1
274	}
275	{
276	USR = TMP
277	if (Q_POSITIVE) Q = PROD
278	TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
279	}
280	{
281	A = convert_d2df(Q)
282	}
283	{
284	AH += asl(TMP,#DF_MANTBITS-32)
285	jumpr r31
286	}
287
288
289	.Lpossible_unf:
290	/* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */
291	/* The answer is correct, but we need to raise Underflow */
292	{
293	B = extractu(A,#63,#0)
294	TMPPAIR = combine(##0x00100000,#0) // min normal
295	TMP = #0x7FFF
296	}
297	{
298	P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value...
299	P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)?
300	}
301
302	#if (__HEXAGON_ARCH__ == 60)
303	TMP = USR // If not, just return
304	if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact
305	// Note that inexact is already set...
306	#else
307	{
308	if (!P_TMP) jumpr r31 // If not, just return
309	TMP = USR // Else, we want to set Unf+Inexact
310	} // Note that inexact is already set...
311	#endif
312	{
313	TMP = or(TMP,#0x30)
314	}
315	{
316	USR = TMP
317	}
318	{
319	p0 = dfcmp.eq(A,A)
320	jumpr r31
321	}
322
323	.Ldiv_ovf:
324	/*
325	* Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
326	*/
327	{
328	TMP = USR
329	B = combine(##0x7fefffff,#-1)
330	AH = mux(Q_POSITIVE,#0,#-1)
331	}
332	{
333	PROD = combine(##0x7ff00000,#0)
334	QH = extractu(TMP,#2,#SR_ROUND_OFF)
335	TMP = or(TMP,#0x28)
336	}
337	{
338	USR = TMP
339	QH ^= lsr(AH,#31)
340	QL = QH
341	}
342	{
343	p0 = !cmp.eq(QL,#1) // if not round-to-zero
344	p0 = !cmp.eq(QH,#2) // and not rounding the other way
345	if (p0.new) B = PROD // go to inf
346	p0 = dfcmp.eq(B,B) // get exceptions
347	}
348	{
349	A = insert(B,#63,#0)
350	jumpr r31
351	}
352
353	#undef ONE
354	#define SIGN r28
355	#undef NORMAL
356	#undef NO_OVF_UNF
357	#define P_INF p1
358	#define P_ZERO p2
359	.Ldiv_abnormal:
360	{
361	P_TMP = dfclass(A,#DFCLASS_NUMBER)
362	P_TMP = dfclass(B,#DFCLASS_NUMBER)
363	Q_POSITIVE = cmp.gt(SIGN,#-1)
364	}
365	{
366	P_INF = dfclass(A,#DFCLASS_INFINITE)
367	P_INF = dfclass(B,#DFCLASS_INFINITE)
368	}
369	{
370	P_ZERO = dfclass(A,#DFCLASS_ZERO)
371	P_ZERO = dfclass(B,#DFCLASS_ZERO)
372	}
373	{
374	if (!P_TMP) jump .Ldiv_nan
375	if (P_INF) jump .Ldiv_invalid
376	}
377	{
378	if (P_ZERO) jump .Ldiv_invalid
379	}
380	{
381	P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero
382	P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite
383	}
384	{
385	P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
386	P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero
387	}
388	{
389	if (!P_ZERO) jump .Ldiv_zero_result
390	if (!P_INF) jump .Ldiv_inf_result
391	}
392	/* Now we've narrowed it down to (de)normal / (de)normal */
393	/* Set up A/EXPA B/EXPB and go back */
394	#undef P_ZERO
395	#undef P_INF
396	#define P_TMP2 p1
397	{
398	P_TMP = dfclass(A,#DFCLASS_NORMAL)
399	P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
400	TMP = ##0x00100000
401	}
402	{
403	EXPBA = combine(BH,AH)
404	AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
405	BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
406	}
407	{
408	if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit
409	if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit
410	}
411	{
412	QH = add(clb(A),#-DF_EXPBITS)
413	QL = add(clb(B),#-DF_EXPBITS)
414	TMP = #1
415	}
416	{
417	EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
418	EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
419	}
420	{
421	A = asl(A,QH)
422	B = asl(B,QL)
423	if (!P_TMP) EXPA = sub(TMP,QH)
424	if (!P_TMP2) EXPB = sub(TMP,QL)
425	} // recreate values needed by resume coke
426	{
427	PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
428	}
429	{
430	SFDEN = or(SFONE,PRODLO)
431	jump .Ldenorm_continue
432	}
433
434	.Ldiv_zero_result:
435	{
436	AH = xor(AH,BH)
437	B = #0
438	}
439	{
440	A = insert(B,#63,#0)
441	jumpr r31
442	}
443	.Ldiv_inf_result:
444	{
445	p2 = dfclass(B,#DFCLASS_ZERO)
446	p2 = dfclass(A,#DFCLASS_NONINFINITE)
447	}
448	{
449	TMP = USR
450	if (!p2) jump 1f
451	AH = xor(AH,BH)
452	}
453	{
454	TMP = or(TMP,#0x04) // DBZ
455	}
456	{
457	USR = TMP
458	}
459	1:
460	{
461	B = combine(##0x7ff00000,#0)
462	p0 = dfcmp.uo(B,B) // take possible exception
463	}
464	{
465	A = insert(B,#63,#0)
466	jumpr r31
467	}
468	.Ldiv_nan:
469	{
470	p0 = dfclass(A,#0x10)
471	p1 = dfclass(B,#0x10)
472	if (!p0.new) A = B
473	if (!p1.new) B = A
474	}
475	{
476	QH = convert_df2sf(A) // get possible invalid exceptions
477	QL = convert_df2sf(B)
478	}
479	{
480	A = #-1
481	jumpr r31
482	}
483
484	.Ldiv_invalid:
485	{
486	TMP = ##0x7f800001
487	}
488	{
489	A = convert_sf2df(TMP) // get invalid, get DF qNaN
490	jumpr r31
491	}
492	END(__hexagon_divdf3)