]>
Commit | Line | Data |
---|---|---|
8faf50e0 XL |
1 | //===----------------------Hexagon builtin routine ------------------------===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is dual licensed under the MIT and the University of Illinois Open | |
6 | // Source Licenses. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | ||
10 | /* Double Precision Divide */ | |
11 | ||
12 | #define A r1:0 | |
13 | #define AH r1 | |
14 | #define AL r0 | |
15 | ||
16 | #define B r3:2 | |
17 | #define BH r3 | |
18 | #define BL r2 | |
19 | ||
20 | #define Q r5:4 | |
21 | #define QH r5 | |
22 | #define QL r4 | |
23 | ||
24 | #define PROD r7:6 | |
25 | #define PRODHI r7 | |
26 | #define PRODLO r6 | |
27 | ||
28 | #define SFONE r8 | |
29 | #define SFDEN r9 | |
30 | #define SFERROR r10 | |
31 | #define SFRECIP r11 | |
32 | ||
33 | #define EXPBA r13:12 | |
34 | #define EXPB r13 | |
35 | #define EXPA r12 | |
36 | ||
37 | #define REMSUB2 r15:14 | |
38 | ||
39 | ||
40 | ||
41 | #define SIGN r28 | |
42 | ||
43 | #define Q_POSITIVE p3 | |
44 | #define NORMAL p2 | |
45 | #define NO_OVF_UNF p1 | |
46 | #define P_TMP p0 | |
47 | ||
48 | #define RECIPEST_SHIFT 3 | |
49 | #define QADJ 61 | |
50 | ||
51 | #define DFCLASS_NORMAL 0x02 | |
52 | #define DFCLASS_NUMBER 0x0F | |
53 | #define DFCLASS_INFINITE 0x08 | |
54 | #define DFCLASS_ZERO 0x01 | |
55 | #define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO) | |
56 | #define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE) | |
57 | ||
58 | #define DF_MANTBITS 52 | |
59 | #define DF_EXPBITS 11 | |
60 | #define SF_MANTBITS 23 | |
61 | #define SF_EXPBITS 8 | |
62 | #define DF_BIAS 0x3ff | |
63 | ||
64 | #define SR_ROUND_OFF 22 | |
65 | ||
66 | #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG | |
67 | #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG | |
68 | #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG | |
69 | #define END(TAG) .size TAG,.-TAG | |
70 | ||
71 | .text | |
72 | .global __hexagon_divdf3 | |
73 | .type __hexagon_divdf3,@function | |
74 | Q6_ALIAS(divdf3) | |
75 | FAST_ALIAS(divdf3) | |
76 | FAST2_ALIAS(divdf3) | |
77 | .p2align 5 | |
78 | __hexagon_divdf3: | |
79 | { | |
80 | NORMAL = dfclass(A,#DFCLASS_NORMAL) | |
81 | NORMAL = dfclass(B,#DFCLASS_NORMAL) | |
82 | EXPBA = combine(BH,AH) | |
83 | SIGN = xor(AH,BH) | |
84 | } | |
85 | #undef A | |
86 | #undef AH | |
87 | #undef AL | |
88 | #undef B | |
89 | #undef BH | |
90 | #undef BL | |
91 | #define REM r1:0 | |
92 | #define REMHI r1 | |
93 | #define REMLO r0 | |
94 | #define DENOM r3:2 | |
95 | #define DENOMHI r3 | |
96 | #define DENOMLO r2 | |
97 | { | |
98 | if (!NORMAL) jump .Ldiv_abnormal | |
99 | PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) | |
100 | SFONE = ##0x3f800001 | |
101 | } | |
102 | { | |
103 | SFDEN = or(SFONE,PRODLO) | |
104 | EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) | |
105 | EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) | |
106 | Q_POSITIVE = cmp.gt(SIGN,#-1) | |
107 | } | |
108 | #undef SIGN | |
109 | #define ONE r28 | |
110 | .Ldenorm_continue: | |
111 | { | |
112 | SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN) | |
113 | SFERROR = and(SFONE,#-2) | |
114 | ONE = #1 | |
115 | EXPA = sub(EXPA,EXPB) | |
116 | } | |
117 | #undef EXPB | |
118 | #define RECIPEST r13 | |
119 | { | |
120 | SFERROR -= sfmpy(SFRECIP,SFDEN):lib | |
121 | REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) | |
122 | RECIPEST = ##0x00800000 << RECIPEST_SHIFT | |
123 | } | |
124 | { | |
125 | SFRECIP += sfmpy(SFRECIP,SFERROR):lib | |
126 | DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) | |
127 | SFERROR = and(SFONE,#-2) | |
128 | } | |
129 | { | |
130 | SFERROR -= sfmpy(SFRECIP,SFDEN):lib | |
131 | QH = #-DF_BIAS+1 | |
132 | QL = #DF_BIAS-1 | |
133 | } | |
134 | { | |
135 | SFRECIP += sfmpy(SFRECIP,SFERROR):lib | |
136 | NO_OVF_UNF = cmp.gt(EXPA,QH) | |
137 | NO_OVF_UNF = !cmp.gt(EXPA,QL) | |
138 | } | |
139 | { | |
140 | RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT) | |
141 | Q = #0 | |
142 | EXPA = add(EXPA,#-QADJ) | |
143 | } | |
144 | #undef SFERROR | |
145 | #undef SFRECIP | |
146 | #define TMP r10 | |
147 | #define TMP1 r11 | |
148 | { | |
149 | RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT)) | |
150 | } | |
151 | ||
152 | #define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \ | |
153 | { \ | |
154 | PROD = mpyu(RECIPEST,REMHI); \ | |
155 | REM = asl(REM,# ## ( REMSHIFT )); \ | |
156 | }; \ | |
157 | { \ | |
158 | PRODLO = # ## 0; \ | |
159 | REM -= mpyu(PRODHI,DENOMLO); \ | |
160 | REMSUB2 = mpyu(PRODHI,DENOMHI); \ | |
161 | }; \ | |
162 | { \ | |
163 | Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \ | |
164 | REM -= asl(REMSUB2, # ## 32); \ | |
165 | EXTRA \ | |
166 | } | |
167 | ||
168 | ||
169 | DIV_ITER1B(ASL,14,15,) | |
170 | DIV_ITER1B(ASR,1,15,) | |
171 | DIV_ITER1B(ASR,16,15,) | |
172 | DIV_ITER1B(ASR,31,15,PROD=# ( 0 );) | |
173 | ||
174 | #undef REMSUB2 | |
175 | #define TMPPAIR r15:14 | |
176 | #define TMPPAIRHI r15 | |
177 | #define TMPPAIRLO r14 | |
178 | #undef RECIPEST | |
179 | #define EXPB r13 | |
180 | { | |
181 | // compare or sub with carry | |
182 | TMPPAIR = sub(REM,DENOM) | |
183 | P_TMP = cmp.gtu(DENOM,REM) | |
184 | // set up amt to add to q | |
185 | if (!P_TMP.new) PRODLO = #2 | |
186 | } | |
187 | { | |
188 | Q = add(Q,PROD) | |
189 | if (!P_TMP) REM = TMPPAIR | |
190 | TMPPAIR = #0 | |
191 | } | |
192 | { | |
193 | P_TMP = cmp.eq(REM,TMPPAIR) | |
194 | if (!P_TMP.new) QL = or(QL,ONE) | |
195 | } | |
196 | { | |
197 | PROD = neg(Q) | |
198 | } | |
199 | { | |
200 | if (!Q_POSITIVE) Q = PROD | |
201 | } | |
202 | #undef REM | |
203 | #undef REMHI | |
204 | #undef REMLO | |
205 | #undef DENOM | |
206 | #undef DENOMLO | |
207 | #undef DENOMHI | |
208 | #define A r1:0 | |
209 | #define AH r1 | |
210 | #define AL r0 | |
211 | #define B r3:2 | |
212 | #define BH r3 | |
213 | #define BL r2 | |
214 | { | |
215 | A = convert_d2df(Q) | |
216 | if (!NO_OVF_UNF) jump .Ldiv_ovf_unf | |
217 | } | |
218 | { | |
219 | AH += asl(EXPA,#DF_MANTBITS-32) | |
220 | jumpr r31 | |
221 | } | |
222 | ||
223 | .Ldiv_ovf_unf: | |
224 | { | |
225 | AH += asl(EXPA,#DF_MANTBITS-32) | |
226 | EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) | |
227 | } | |
228 | { | |
229 | PROD = abs(Q) | |
230 | EXPA = add(EXPA,EXPB) | |
231 | } | |
232 | { | |
233 | P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow | |
234 | if (P_TMP.new) jump:nt .Ldiv_ovf | |
235 | } | |
236 | { | |
237 | P_TMP = cmp.gt(EXPA,#0) | |
238 | if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible... | |
239 | } | |
240 | /* Underflow */ | |
241 | /* We know what the infinite range exponent should be (EXPA) */ | |
242 | /* Q is 2's complement, PROD is abs(Q) */ | |
243 | /* Normalize Q, shift right, add a high bit, convert, change exponent */ | |
244 | ||
245 | #define FUDGE1 7 // how much to shift right | |
246 | #define FUDGE2 4 // how many guard/round to keep at lsbs | |
247 | ||
248 | { | |
249 | EXPB = add(clb(PROD),#-1) // doesn't need to be added in since | |
250 | EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent | |
251 | TMP = USR | |
252 | TMP1 = #63 | |
253 | } | |
254 | { | |
255 | EXPB = min(EXPA,TMP1) | |
256 | TMP1 = or(TMP,#0x030) | |
257 | PROD = asl(PROD,EXPB) | |
258 | EXPA = #0 | |
259 | } | |
260 | { | |
261 | TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out | |
262 | PROD = lsr(PROD,EXPB) // shift out bits | |
263 | B = #1 | |
264 | } | |
265 | { | |
266 | P_TMP = cmp.gtu(B,TMPPAIR) | |
267 | if (!P_TMP.new) PRODLO = or(BL,PRODLO) | |
268 | PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2) | |
269 | } | |
270 | { | |
271 | Q = neg(PROD) | |
272 | P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1) | |
273 | if (!P_TMP.new) TMP = TMP1 | |
274 | } | |
275 | { | |
276 | USR = TMP | |
277 | if (Q_POSITIVE) Q = PROD | |
278 | TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2) | |
279 | } | |
280 | { | |
281 | A = convert_d2df(Q) | |
282 | } | |
283 | { | |
284 | AH += asl(TMP,#DF_MANTBITS-32) | |
285 | jumpr r31 | |
286 | } | |
287 | ||
288 | ||
289 | .Lpossible_unf: | |
290 | /* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */ | |
291 | /* The answer is correct, but we need to raise Underflow */ | |
292 | { | |
293 | B = extractu(A,#63,#0) | |
294 | TMPPAIR = combine(##0x00100000,#0) // min normal | |
295 | TMP = #0x7FFF | |
296 | } | |
297 | { | |
298 | P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value... | |
299 | P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)? | |
300 | } | |
301 | ||
302 | #if (__HEXAGON_ARCH__ == 60) | |
303 | TMP = USR // If not, just return | |
304 | if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact | |
305 | // Note that inexact is already set... | |
306 | #else | |
307 | { | |
308 | if (!P_TMP) jumpr r31 // If not, just return | |
309 | TMP = USR // Else, we want to set Unf+Inexact | |
310 | } // Note that inexact is already set... | |
311 | #endif | |
312 | { | |
313 | TMP = or(TMP,#0x30) | |
314 | } | |
315 | { | |
316 | USR = TMP | |
317 | } | |
318 | { | |
319 | p0 = dfcmp.eq(A,A) | |
320 | jumpr r31 | |
321 | } | |
322 | ||
323 | .Ldiv_ovf: | |
324 | /* | |
325 | * Raise Overflow, and choose the correct overflow value (saturated normal or infinity) | |
326 | */ | |
327 | { | |
328 | TMP = USR | |
329 | B = combine(##0x7fefffff,#-1) | |
330 | AH = mux(Q_POSITIVE,#0,#-1) | |
331 | } | |
332 | { | |
333 | PROD = combine(##0x7ff00000,#0) | |
334 | QH = extractu(TMP,#2,#SR_ROUND_OFF) | |
335 | TMP = or(TMP,#0x28) | |
336 | } | |
337 | { | |
338 | USR = TMP | |
339 | QH ^= lsr(AH,#31) | |
340 | QL = QH | |
341 | } | |
342 | { | |
343 | p0 = !cmp.eq(QL,#1) // if not round-to-zero | |
344 | p0 = !cmp.eq(QH,#2) // and not rounding the other way | |
345 | if (p0.new) B = PROD // go to inf | |
346 | p0 = dfcmp.eq(B,B) // get exceptions | |
347 | } | |
348 | { | |
349 | A = insert(B,#63,#0) | |
350 | jumpr r31 | |
351 | } | |
352 | ||
353 | #undef ONE | |
354 | #define SIGN r28 | |
355 | #undef NORMAL | |
356 | #undef NO_OVF_UNF | |
357 | #define P_INF p1 | |
358 | #define P_ZERO p2 | |
359 | .Ldiv_abnormal: | |
360 | { | |
361 | P_TMP = dfclass(A,#DFCLASS_NUMBER) | |
362 | P_TMP = dfclass(B,#DFCLASS_NUMBER) | |
363 | Q_POSITIVE = cmp.gt(SIGN,#-1) | |
364 | } | |
365 | { | |
366 | P_INF = dfclass(A,#DFCLASS_INFINITE) | |
367 | P_INF = dfclass(B,#DFCLASS_INFINITE) | |
368 | } | |
369 | { | |
370 | P_ZERO = dfclass(A,#DFCLASS_ZERO) | |
371 | P_ZERO = dfclass(B,#DFCLASS_ZERO) | |
372 | } | |
373 | { | |
374 | if (!P_TMP) jump .Ldiv_nan | |
375 | if (P_INF) jump .Ldiv_invalid | |
376 | } | |
377 | { | |
378 | if (P_ZERO) jump .Ldiv_invalid | |
379 | } | |
380 | { | |
381 | P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero | |
382 | P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite | |
383 | } | |
384 | { | |
385 | P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite | |
386 | P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero | |
387 | } | |
388 | { | |
389 | if (!P_ZERO) jump .Ldiv_zero_result | |
390 | if (!P_INF) jump .Ldiv_inf_result | |
391 | } | |
392 | /* Now we've narrowed it down to (de)normal / (de)normal */ | |
393 | /* Set up A/EXPA B/EXPB and go back */ | |
394 | #undef P_ZERO | |
395 | #undef P_INF | |
396 | #define P_TMP2 p1 | |
397 | { | |
398 | P_TMP = dfclass(A,#DFCLASS_NORMAL) | |
399 | P_TMP2 = dfclass(B,#DFCLASS_NORMAL) | |
400 | TMP = ##0x00100000 | |
401 | } | |
402 | { | |
403 | EXPBA = combine(BH,AH) | |
404 | AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit | |
405 | BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit | |
406 | } | |
407 | { | |
408 | if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit | |
409 | if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit | |
410 | } | |
411 | { | |
412 | QH = add(clb(A),#-DF_EXPBITS) | |
413 | QL = add(clb(B),#-DF_EXPBITS) | |
414 | TMP = #1 | |
415 | } | |
416 | { | |
417 | EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) | |
418 | EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) | |
419 | } | |
420 | { | |
421 | A = asl(A,QH) | |
422 | B = asl(B,QL) | |
423 | if (!P_TMP) EXPA = sub(TMP,QH) | |
424 | if (!P_TMP2) EXPB = sub(TMP,QL) | |
425 | } // recreate values needed by resume coke | |
426 | { | |
427 | PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) | |
428 | } | |
429 | { | |
430 | SFDEN = or(SFONE,PRODLO) | |
431 | jump .Ldenorm_continue | |
432 | } | |
433 | ||
434 | .Ldiv_zero_result: | |
435 | { | |
436 | AH = xor(AH,BH) | |
437 | B = #0 | |
438 | } | |
439 | { | |
440 | A = insert(B,#63,#0) | |
441 | jumpr r31 | |
442 | } | |
443 | .Ldiv_inf_result: | |
444 | { | |
445 | p2 = dfclass(B,#DFCLASS_ZERO) | |
446 | p2 = dfclass(A,#DFCLASS_NONINFINITE) | |
447 | } | |
448 | { | |
449 | TMP = USR | |
450 | if (!p2) jump 1f | |
451 | AH = xor(AH,BH) | |
452 | } | |
453 | { | |
454 | TMP = or(TMP,#0x04) // DBZ | |
455 | } | |
456 | { | |
457 | USR = TMP | |
458 | } | |
459 | 1: | |
460 | { | |
461 | B = combine(##0x7ff00000,#0) | |
462 | p0 = dfcmp.uo(B,B) // take possible exception | |
463 | } | |
464 | { | |
465 | A = insert(B,#63,#0) | |
466 | jumpr r31 | |
467 | } | |
468 | .Ldiv_nan: | |
469 | { | |
470 | p0 = dfclass(A,#0x10) | |
471 | p1 = dfclass(B,#0x10) | |
472 | if (!p0.new) A = B | |
473 | if (!p1.new) B = A | |
474 | } | |
475 | { | |
476 | QH = convert_df2sf(A) // get possible invalid exceptions | |
477 | QL = convert_df2sf(B) | |
478 | } | |
479 | { | |
480 | A = #-1 | |
481 | jumpr r31 | |
482 | } | |
483 | ||
484 | .Ldiv_invalid: | |
485 | { | |
486 | TMP = ##0x7f800001 | |
487 | } | |
488 | { | |
489 | A = convert_sf2df(TMP) // get invalid, get DF qNaN | |
490 | jumpr r31 | |
491 | } | |
492 | END(__hexagon_divdf3) |