]> git.proxmox.com Git - rustc.git/blame - vendor/compiler_builtins/compiler-rt/lib/builtins/hexagon/dfdiv.S
New upstream version 1.36.0+dfsg1
[rustc.git] / vendor / compiler_builtins / compiler-rt / lib / builtins / hexagon / dfdiv.S
CommitLineData
8faf50e0
XL
1//===----------------------Hexagon builtin routine ------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is dual licensed under the MIT and the University of Illinois Open
6// Source Licenses. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10/* Double Precision Divide */
11
12#define A r1:0
13#define AH r1
14#define AL r0
15
16#define B r3:2
17#define BH r3
18#define BL r2
19
20#define Q r5:4
21#define QH r5
22#define QL r4
23
24#define PROD r7:6
25#define PRODHI r7
26#define PRODLO r6
27
28#define SFONE r8
29#define SFDEN r9
30#define SFERROR r10
31#define SFRECIP r11
32
33#define EXPBA r13:12
34#define EXPB r13
35#define EXPA r12
36
37#define REMSUB2 r15:14
38
39
40
41#define SIGN r28
42
43#define Q_POSITIVE p3
44#define NORMAL p2
45#define NO_OVF_UNF p1
46#define P_TMP p0
47
48#define RECIPEST_SHIFT 3
49#define QADJ 61
50
51#define DFCLASS_NORMAL 0x02
52#define DFCLASS_NUMBER 0x0F
53#define DFCLASS_INFINITE 0x08
54#define DFCLASS_ZERO 0x01
55#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
56#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
57
58#define DF_MANTBITS 52
59#define DF_EXPBITS 11
60#define SF_MANTBITS 23
61#define SF_EXPBITS 8
62#define DF_BIAS 0x3ff
63
64#define SR_ROUND_OFF 22
65
66#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
67#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
68#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
69#define END(TAG) .size TAG,.-TAG
70
71 .text
72 .global __hexagon_divdf3
73 .type __hexagon_divdf3,@function
74 Q6_ALIAS(divdf3)
75 FAST_ALIAS(divdf3)
76 FAST2_ALIAS(divdf3)
77 .p2align 5
78__hexagon_divdf3:
79 {
80 NORMAL = dfclass(A,#DFCLASS_NORMAL)
81 NORMAL = dfclass(B,#DFCLASS_NORMAL)
82 EXPBA = combine(BH,AH)
83 SIGN = xor(AH,BH)
84 }
85#undef A
86#undef AH
87#undef AL
88#undef B
89#undef BH
90#undef BL
91#define REM r1:0
92#define REMHI r1
93#define REMLO r0
94#define DENOM r3:2
95#define DENOMHI r3
96#define DENOMLO r2
97 {
98 if (!NORMAL) jump .Ldiv_abnormal
99 PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
100 SFONE = ##0x3f800001
101 }
102 {
103 SFDEN = or(SFONE,PRODLO)
104 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
105 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
106 Q_POSITIVE = cmp.gt(SIGN,#-1)
107 }
108#undef SIGN
109#define ONE r28
110.Ldenorm_continue:
111 {
112 SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
113 SFERROR = and(SFONE,#-2)
114 ONE = #1
115 EXPA = sub(EXPA,EXPB)
116 }
117#undef EXPB
118#define RECIPEST r13
119 {
120 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
121 REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
122 RECIPEST = ##0x00800000 << RECIPEST_SHIFT
123 }
124 {
125 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
126 DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
127 SFERROR = and(SFONE,#-2)
128 }
129 {
130 SFERROR -= sfmpy(SFRECIP,SFDEN):lib
131 QH = #-DF_BIAS+1
132 QL = #DF_BIAS-1
133 }
134 {
135 SFRECIP += sfmpy(SFRECIP,SFERROR):lib
136 NO_OVF_UNF = cmp.gt(EXPA,QH)
137 NO_OVF_UNF = !cmp.gt(EXPA,QL)
138 }
139 {
140 RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
141 Q = #0
142 EXPA = add(EXPA,#-QADJ)
143 }
144#undef SFERROR
145#undef SFRECIP
146#define TMP r10
147#define TMP1 r11
148 {
149 RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
150 }
151
152#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
153 { \
154 PROD = mpyu(RECIPEST,REMHI); \
155 REM = asl(REM,# ## ( REMSHIFT )); \
156 }; \
157 { \
158 PRODLO = # ## 0; \
159 REM -= mpyu(PRODHI,DENOMLO); \
160 REMSUB2 = mpyu(PRODHI,DENOMHI); \
161 }; \
162 { \
163 Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
164 REM -= asl(REMSUB2, # ## 32); \
165 EXTRA \
166 }
167
168
169 DIV_ITER1B(ASL,14,15,)
170 DIV_ITER1B(ASR,1,15,)
171 DIV_ITER1B(ASR,16,15,)
172 DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
173
174#undef REMSUB2
175#define TMPPAIR r15:14
176#define TMPPAIRHI r15
177#define TMPPAIRLO r14
178#undef RECIPEST
179#define EXPB r13
180 {
181 // compare or sub with carry
182 TMPPAIR = sub(REM,DENOM)
183 P_TMP = cmp.gtu(DENOM,REM)
184 // set up amt to add to q
185 if (!P_TMP.new) PRODLO = #2
186 }
187 {
188 Q = add(Q,PROD)
189 if (!P_TMP) REM = TMPPAIR
190 TMPPAIR = #0
191 }
192 {
193 P_TMP = cmp.eq(REM,TMPPAIR)
194 if (!P_TMP.new) QL = or(QL,ONE)
195 }
196 {
197 PROD = neg(Q)
198 }
199 {
200 if (!Q_POSITIVE) Q = PROD
201 }
202#undef REM
203#undef REMHI
204#undef REMLO
205#undef DENOM
206#undef DENOMLO
207#undef DENOMHI
208#define A r1:0
209#define AH r1
210#define AL r0
211#define B r3:2
212#define BH r3
213#define BL r2
214 {
215 A = convert_d2df(Q)
216 if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
217 }
218 {
219 AH += asl(EXPA,#DF_MANTBITS-32)
220 jumpr r31
221 }
222
223.Ldiv_ovf_unf:
224 {
225 AH += asl(EXPA,#DF_MANTBITS-32)
226 EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
227 }
228 {
229 PROD = abs(Q)
230 EXPA = add(EXPA,EXPB)
231 }
232 {
233 P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow
234 if (P_TMP.new) jump:nt .Ldiv_ovf
235 }
236 {
237 P_TMP = cmp.gt(EXPA,#0)
238 if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible...
239 }
240 /* Underflow */
241 /* We know what the infinite range exponent should be (EXPA) */
242 /* Q is 2's complement, PROD is abs(Q) */
243 /* Normalize Q, shift right, add a high bit, convert, change exponent */
244
245#define FUDGE1 7 // how much to shift right
246#define FUDGE2 4 // how many guard/round to keep at lsbs
247
248 {
249 EXPB = add(clb(PROD),#-1) // doesn't need to be added in since
250 EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent
251 TMP = USR
252 TMP1 = #63
253 }
254 {
255 EXPB = min(EXPA,TMP1)
256 TMP1 = or(TMP,#0x030)
257 PROD = asl(PROD,EXPB)
258 EXPA = #0
259 }
260 {
261 TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out
262 PROD = lsr(PROD,EXPB) // shift out bits
263 B = #1
264 }
265 {
266 P_TMP = cmp.gtu(B,TMPPAIR)
267 if (!P_TMP.new) PRODLO = or(BL,PRODLO)
268 PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
269 }
270 {
271 Q = neg(PROD)
272 P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
273 if (!P_TMP.new) TMP = TMP1
274 }
275 {
276 USR = TMP
277 if (Q_POSITIVE) Q = PROD
278 TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
279 }
280 {
281 A = convert_d2df(Q)
282 }
283 {
284 AH += asl(TMP,#DF_MANTBITS-32)
285 jumpr r31
286 }
287
288
289.Lpossible_unf:
290 /* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */
291 /* The answer is correct, but we need to raise Underflow */
292 {
293 B = extractu(A,#63,#0)
294 TMPPAIR = combine(##0x00100000,#0) // min normal
295 TMP = #0x7FFF
296 }
297 {
298 P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value...
299 P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)?
300 }
301
302#if (__HEXAGON_ARCH__ == 60)
303 TMP = USR // If not, just return
304 if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact
305 // Note that inexact is already set...
306#else
307 {
308 if (!P_TMP) jumpr r31 // If not, just return
309 TMP = USR // Else, we want to set Unf+Inexact
310 } // Note that inexact is already set...
311#endif
312 {
313 TMP = or(TMP,#0x30)
314 }
315 {
316 USR = TMP
317 }
318 {
319 p0 = dfcmp.eq(A,A)
320 jumpr r31
321 }
322
323.Ldiv_ovf:
324 /*
325 * Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
326 */
327 {
328 TMP = USR
329 B = combine(##0x7fefffff,#-1)
330 AH = mux(Q_POSITIVE,#0,#-1)
331 }
332 {
333 PROD = combine(##0x7ff00000,#0)
334 QH = extractu(TMP,#2,#SR_ROUND_OFF)
335 TMP = or(TMP,#0x28)
336 }
337 {
338 USR = TMP
339 QH ^= lsr(AH,#31)
340 QL = QH
341 }
342 {
343 p0 = !cmp.eq(QL,#1) // if not round-to-zero
344 p0 = !cmp.eq(QH,#2) // and not rounding the other way
345 if (p0.new) B = PROD // go to inf
346 p0 = dfcmp.eq(B,B) // get exceptions
347 }
348 {
349 A = insert(B,#63,#0)
350 jumpr r31
351 }
352
353#undef ONE
354#define SIGN r28
355#undef NORMAL
356#undef NO_OVF_UNF
357#define P_INF p1
358#define P_ZERO p2
359.Ldiv_abnormal:
360 {
361 P_TMP = dfclass(A,#DFCLASS_NUMBER)
362 P_TMP = dfclass(B,#DFCLASS_NUMBER)
363 Q_POSITIVE = cmp.gt(SIGN,#-1)
364 }
365 {
366 P_INF = dfclass(A,#DFCLASS_INFINITE)
367 P_INF = dfclass(B,#DFCLASS_INFINITE)
368 }
369 {
370 P_ZERO = dfclass(A,#DFCLASS_ZERO)
371 P_ZERO = dfclass(B,#DFCLASS_ZERO)
372 }
373 {
374 if (!P_TMP) jump .Ldiv_nan
375 if (P_INF) jump .Ldiv_invalid
376 }
377 {
378 if (P_ZERO) jump .Ldiv_invalid
379 }
380 {
381 P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero
382 P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite
383 }
384 {
385 P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
386 P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero
387 }
388 {
389 if (!P_ZERO) jump .Ldiv_zero_result
390 if (!P_INF) jump .Ldiv_inf_result
391 }
392 /* Now we've narrowed it down to (de)normal / (de)normal */
393 /* Set up A/EXPA B/EXPB and go back */
394#undef P_ZERO
395#undef P_INF
396#define P_TMP2 p1
397 {
398 P_TMP = dfclass(A,#DFCLASS_NORMAL)
399 P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
400 TMP = ##0x00100000
401 }
402 {
403 EXPBA = combine(BH,AH)
404 AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
405 BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
406 }
407 {
408 if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit
409 if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit
410 }
411 {
412 QH = add(clb(A),#-DF_EXPBITS)
413 QL = add(clb(B),#-DF_EXPBITS)
414 TMP = #1
415 }
416 {
417 EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
418 EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
419 }
420 {
421 A = asl(A,QH)
422 B = asl(B,QL)
423 if (!P_TMP) EXPA = sub(TMP,QH)
424 if (!P_TMP2) EXPB = sub(TMP,QL)
425 } // recreate values needed by resume coke
426 {
427 PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
428 }
429 {
430 SFDEN = or(SFONE,PRODLO)
431 jump .Ldenorm_continue
432 }
433
434.Ldiv_zero_result:
435 {
436 AH = xor(AH,BH)
437 B = #0
438 }
439 {
440 A = insert(B,#63,#0)
441 jumpr r31
442 }
443.Ldiv_inf_result:
444 {
445 p2 = dfclass(B,#DFCLASS_ZERO)
446 p2 = dfclass(A,#DFCLASS_NONINFINITE)
447 }
448 {
449 TMP = USR
450 if (!p2) jump 1f
451 AH = xor(AH,BH)
452 }
453 {
454 TMP = or(TMP,#0x04) // DBZ
455 }
456 {
457 USR = TMP
458 }
4591:
460 {
461 B = combine(##0x7ff00000,#0)
462 p0 = dfcmp.uo(B,B) // take possible exception
463 }
464 {
465 A = insert(B,#63,#0)
466 jumpr r31
467 }
468.Ldiv_nan:
469 {
470 p0 = dfclass(A,#0x10)
471 p1 = dfclass(B,#0x10)
472 if (!p0.new) A = B
473 if (!p1.new) B = A
474 }
475 {
476 QH = convert_df2sf(A) // get possible invalid exceptions
477 QL = convert_df2sf(B)
478 }
479 {
480 A = #-1
481 jumpr r31
482 }
483
484.Ldiv_invalid:
485 {
486 TMP = ##0x7f800001
487 }
488 {
489 A = convert_sf2df(TMP) // get invalid, get DF qNaN
490 jumpr r31
491 }
492END(__hexagon_divdf3)