]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/m68k/ifpsp060/src/ilsp.S
Linux-2.6.12-rc2
[mirror_ubuntu-artful-kernel.git] / arch / m68k / ifpsp060 / src / ilsp.S
1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2 MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3 M68000 Hi-Performance Microprocessor Division
4 M68060 Software Package
5 Production Release P1.00 -- October 10, 1994
6
7 M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
8
9 THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10 To the maximum extent permitted by applicable law,
11 MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12 INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13 and any warranty against infringement with regard to the SOFTWARE
14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15
16 To the maximum extent permitted by applicable law,
17 IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18 (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19 BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20 ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21 Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22
23 You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24 so long as this entire notice is retained without alteration in any modified and/or
25 redistributed versions, and that such modified versions are clearly identified as such.
26 No licenses are granted by implication, estoppel or otherwise under any patents
27 or trademarks of Motorola, Inc.
28 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29 # litop.s:
30 # This file is appended to the top of the 060FPLSP package
31 # and contains the entry points into the package. The user, in
32 # effect, branches to one of the branch table entries located here.
33 #
34
35 bra.l _060LSP__idivs64_
36 short 0x0000
37 bra.l _060LSP__idivu64_
38 short 0x0000
39
40 bra.l _060LSP__imuls64_
41 short 0x0000
42 bra.l _060LSP__imulu64_
43 short 0x0000
44
45 bra.l _060LSP__cmp2_Ab_
46 short 0x0000
47 bra.l _060LSP__cmp2_Aw_
48 short 0x0000
49 bra.l _060LSP__cmp2_Al_
50 short 0x0000
51 bra.l _060LSP__cmp2_Db_
52 short 0x0000
53 bra.l _060LSP__cmp2_Dw_
54 short 0x0000
55 bra.l _060LSP__cmp2_Dl_
56 short 0x0000
57
58 # leave room for future possible aditions.
59 align 0x200
60
61 #########################################################################
62 # XDEF **************************************************************** #
63 # _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. #
64 # _060LSP__idivs64_(): Emulate 64-bit signed div instruction. #
65 # #
66 # This is the library version which is accessed as a subroutine #
67 # and therefore does not work exactly like the 680X0 div{s,u}.l #
68 # 64-bit divide instruction. #
69 # #
70 # XREF **************************************************************** #
71 # None. #
72 # #
73 # INPUT *************************************************************** #
74 # 0x4(sp) = divisor #
75 # 0x8(sp) = hi(dividend) #
76 # 0xc(sp) = lo(dividend) #
77 # 0x10(sp) = pointer to location to place quotient/remainder #
78 # #
79 # OUTPUT ************************************************************** #
80 # 0x10(sp) = points to location of remainder/quotient. #
81 # remainder is in first longword, quotient is in 2nd. #
82 # #
83 # ALGORITHM *********************************************************** #
84 # If the operands are signed, make them unsigned and save the #
85 # sign info for later. Separate out special cases like divide-by-zero #
86 # or 32-bit divides if possible. Else, use a special math algorithm #
87 # to calculate the result. #
88 # Restore sign info if signed instruction. Set the condition #
89 # codes before performing the final "rts". If the divisor was equal to #
90 # zero, then perform a divide-by-zero using a 16-bit implemented #
91 # divide instruction. This way, the operating system can record that #
92 # the event occurred even though it may not point to the correct place. #
93 # #
94 #########################################################################
95
96 set POSNEG, -1
97 set NDIVISOR, -2
98 set NDIVIDEND, -3
99 set DDSECOND, -4
100 set DDNORMAL, -8
101 set DDQUOTIENT, -12
102 set DIV64_CC, -16
103
104 ##########
105 # divs.l #
106 ##########
107 global _060LSP__idivs64_
108 _060LSP__idivs64_:
109 # PROLOGUE BEGIN ########################################################
110 link.w %a6,&-16
111 movm.l &0x3f00,-(%sp) # save d2-d7
112 # fmovm.l &0x0,-(%sp) # save no fpregs
113 # PROLOGUE END ##########################################################
114
115 mov.w %cc,DIV64_CC(%a6)
116 st POSNEG(%a6) # signed operation
117 bra.b ldiv64_cont
118
119 ##########
120 # divu.l #
121 ##########
122 global _060LSP__idivu64_
123 _060LSP__idivu64_:
124 # PROLOGUE BEGIN ########################################################
125 link.w %a6,&-16
126 movm.l &0x3f00,-(%sp) # save d2-d7
127 # fmovm.l &0x0,-(%sp) # save no fpregs
128 # PROLOGUE END ##########################################################
129
130 mov.w %cc,DIV64_CC(%a6)
131 sf POSNEG(%a6) # unsigned operation
132
133 ldiv64_cont:
134 mov.l 0x8(%a6),%d7 # fetch divisor
135
136 beq.w ldiv64eq0 # divisor is = 0!!!
137
138 mov.l 0xc(%a6), %d5 # get dividend hi
139 mov.l 0x10(%a6), %d6 # get dividend lo
140
141 # separate signed and unsigned divide
142 tst.b POSNEG(%a6) # signed or unsigned?
143 beq.b ldspecialcases # use positive divide
144
145 # save the sign of the divisor
146 # make divisor unsigned if it's negative
147 tst.l %d7 # chk sign of divisor
148 slt NDIVISOR(%a6) # save sign of divisor
149 bpl.b ldsgndividend
150 neg.l %d7 # complement negative divisor
151
152 # save the sign of the dividend
153 # make dividend unsigned if it's negative
154 ldsgndividend:
155 tst.l %d5 # chk sign of hi(dividend)
156 slt NDIVIDEND(%a6) # save sign of dividend
157 bpl.b ldspecialcases
158
159 mov.w &0x0, %cc # clear 'X' cc bit
160 negx.l %d6 # complement signed dividend
161 negx.l %d5
162
163 # extract some special cases:
164 # - is (dividend == 0) ?
165 # - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
166 ldspecialcases:
167 tst.l %d5 # is (hi(dividend) == 0)
168 bne.b ldnormaldivide # no, so try it the long way
169
170 tst.l %d6 # is (lo(dividend) == 0), too
171 beq.w lddone # yes, so (dividend == 0)
172
173 cmp.l %d7,%d6 # is (divisor <= lo(dividend))
174 bls.b ld32bitdivide # yes, so use 32 bit divide
175
176 exg %d5,%d6 # q = 0, r = dividend
177 bra.w ldivfinish # can't divide, we're done.
178
179 ld32bitdivide:
180 tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div!
181
182 bra.b ldivfinish
183
184 ldnormaldivide:
185 # last special case:
186 # - is hi(dividend) >= divisor ? if yes, then overflow
187 cmp.l %d7,%d5
188 bls.b lddovf # answer won't fit in 32 bits
189
190 # perform the divide algorithm:
191 bsr.l ldclassical # do int divide
192
193 # separate into signed and unsigned finishes.
194 ldivfinish:
195 tst.b POSNEG(%a6) # do divs, divu separately
196 beq.b lddone # divu has no processing!!!
197
198 # it was a divs.l, so ccode setting is a little more complicated...
199 tst.b NDIVIDEND(%a6) # remainder has same sign
200 beq.b ldcc # as dividend.
201 neg.l %d5 # sgn(rem) = sgn(dividend)
202 ldcc:
203 mov.b NDIVISOR(%a6), %d0
204 eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative
205 beq.b ldqpos # branch to quot positive
206
207 # 0x80000000 is the largest number representable as a 32-bit negative
208 # number. the negative of 0x80000000 is 0x80000000.
209 cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits?
210 bhi.b lddovf
211
212 neg.l %d6 # make (-quot) 2's comp
213
214 bra.b lddone
215
216 ldqpos:
217 btst &0x1f, %d6 # will (+quot) fit in 32 bits?
218 bne.b lddovf
219
220 lddone:
221 # if the register numbers are the same, only the quotient gets saved.
222 # so, if we always save the quotient second, we save ourselves a cmp&beq
223 andi.w &0x10,DIV64_CC(%a6)
224 mov.w DIV64_CC(%a6),%cc
225 tst.l %d6 # may set 'N' ccode bit
226
227 # here, the result is in d1 and d0. the current strategy is to save
228 # the values at the location pointed to by a0.
229 # use movm here to not disturb the condition codes.
230 ldexit:
231 movm.l &0x0060,([0x14,%a6]) # save result
232
233 # EPILOGUE BEGIN ########################################################
234 # fmovm.l (%sp)+,&0x0 # restore no fpregs
235 movm.l (%sp)+,&0x00fc # restore d2-d7
236 unlk %a6
237 # EPILOGUE END ##########################################################
238
239 rts
240
241 # the result should be the unchanged dividend
242 lddovf:
243 mov.l 0xc(%a6), %d5 # get dividend hi
244 mov.l 0x10(%a6), %d6 # get dividend lo
245
246 andi.w &0x1c,DIV64_CC(%a6)
247 ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit
248 mov.w DIV64_CC(%a6),%cc
249
250 bra.b ldexit
251
252 ldiv64eq0:
253 mov.l 0xc(%a6),([0x14,%a6])
254 mov.l 0x10(%a6),([0x14,%a6],0x4)
255
256 mov.w DIV64_CC(%a6),%cc
257
258 # EPILOGUE BEGIN ########################################################
259 # fmovm.l (%sp)+,&0x0 # restore no fpregs
260 movm.l (%sp)+,&0x00fc # restore d2-d7
261 unlk %a6
262 # EPILOGUE END ##########################################################
263
264 divu.w &0x0,%d0 # force a divbyzero exception
265 rts
266
267 ###########################################################################
268 #########################################################################
269 # This routine uses the 'classical' Algorithm D from Donald Knuth's #
270 # Art of Computer Programming, vol II, Seminumerical Algorithms. #
271 # For this implementation b=2**16, and the target is U1U2U3U4/V1V2, #
272 # where U,V are words of the quadword dividend and longword divisor, #
273 # and U1, V1 are the most significant words. #
274 # #
275 # The most sig. longword of the 64 bit dividend must be in %d5, least #
276 # in %d6. The divisor must be in the variable ddivisor, and the #
277 # signed/unsigned flag ddusign must be set (0=unsigned,1=signed). #
278 # The quotient is returned in %d6, remainder in %d5, unless the #
279 # v (overflow) bit is set in the saved %ccr. If overflow, the dividend #
280 # is unchanged. #
281 #########################################################################
282 ldclassical:
283 # if the divisor msw is 0, use simpler algorithm then the full blown
284 # one at ddknuth:
285
286 cmpi.l %d7, &0xffff
287 bhi.b lddknuth # go use D. Knuth algorithm
288
289 # Since the divisor is only a word (and larger than the mslw of the dividend),
290 # a simpler algorithm may be used :
291 # In the general case, four quotient words would be created by
292 # dividing the divisor word into each dividend word. In this case,
293 # the first two quotient words must be zero, or overflow would occur.
294 # Since we already checked this case above, we can treat the most significant
295 # longword of the dividend as (0) remainder (see Knuth) and merely complete
296 # the last two divisions to get a quotient longword and word remainder:
297
298 clr.l %d1
299 swap %d5 # same as r*b if previous step rqd
300 swap %d6 # get u3 to lsw position
301 mov.w %d6, %d5 # rb + u3
302
303 divu.w %d7, %d5
304
305 mov.w %d5, %d1 # first quotient word
306 swap %d6 # get u4
307 mov.w %d6, %d5 # rb + u4
308
309 divu.w %d7, %d5
310
311 swap %d1
312 mov.w %d5, %d1 # 2nd quotient 'digit'
313 clr.w %d5
314 swap %d5 # now remainder
315 mov.l %d1, %d6 # and quotient
316
317 rts
318
319 lddknuth:
320 # In this algorithm, the divisor is treated as a 2 digit (word) number
321 # which is divided into a 3 digit (word) dividend to get one quotient
322 # digit (word). After subtraction, the dividend is shifted and the
323 # process repeated. Before beginning, the divisor and quotient are
324 # 'normalized' so that the process of estimating the quotient digit
325 # will yield verifiably correct results..
326
327 clr.l DDNORMAL(%a6) # count of shifts for normalization
328 clr.b DDSECOND(%a6) # clear flag for quotient digits
329 clr.l %d1 # %d1 will hold trial quotient
330 lddnchk:
331 btst &31, %d7 # must we normalize? first word of
332 bne.b lddnormalized # divisor (V1) must be >= 65536/2
333 addq.l &0x1, DDNORMAL(%a6) # count normalization shifts
334 lsl.l &0x1, %d7 # shift the divisor
335 lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2
336 roxl.l &0x1, %d5 # shift u1,u2
337 bra.w lddnchk
338 lddnormalized:
339
340 # Now calculate an estimate of the quotient words (msw first, then lsw).
341 # The comments use subscripts for the first quotient digit determination.
342 mov.l %d7, %d3 # divisor
343 mov.l %d5, %d2 # dividend mslw
344 swap %d2
345 swap %d3
346 cmp.w %d2, %d3 # V1 = U1 ?
347 bne.b lddqcalc1
348 mov.w &0xffff, %d1 # use max trial quotient word
349 bra.b lddadj0
350 lddqcalc1:
351 mov.l %d5, %d1
352
353 divu.w %d3, %d1 # use quotient of mslw/msw
354
355 andi.l &0x0000ffff, %d1 # zero any remainder
356 lddadj0:
357
358 # now test the trial quotient and adjust. This step plus the
359 # normalization assures (according to Knuth) that the trial
360 # quotient will be at worst 1 too large.
361 mov.l %d6, -(%sp)
362 clr.w %d6 # word u3 left
363 swap %d6 # in lsw position
364 lddadj1: mov.l %d7, %d3
365 mov.l %d1, %d2
366 mulu.w %d7, %d2 # V2q
367 swap %d3
368 mulu.w %d1, %d3 # V1q
369 mov.l %d5, %d4 # U1U2
370 sub.l %d3, %d4 # U1U2 - V1q
371
372 swap %d4
373
374 mov.w %d4,%d0
375 mov.w %d6,%d4 # insert lower word (U3)
376
377 tst.w %d0 # is upper word set?
378 bne.w lddadjd1
379
380 # add.l %d6, %d4 # (U1U2 - V1q) + U3
381
382 cmp.l %d2, %d4
383 bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ?
384 subq.l &0x1, %d1 # yes, decrement and recheck
385 bra.b lddadj1
386 lddadjd1:
387 # now test the word by multiplying it by the divisor (V1V2) and comparing
388 # the 3 digit (word) result with the current dividend words
389 mov.l %d5, -(%sp) # save %d5 (%d6 already saved)
390 mov.l %d1, %d6
391 swap %d6 # shift answer to ms 3 words
392 mov.l %d7, %d5
393 bsr.l ldmm2
394 mov.l %d5, %d2 # now %d2,%d3 are trial*divisor
395 mov.l %d6, %d3
396 mov.l (%sp)+, %d5 # restore dividend
397 mov.l (%sp)+, %d6
398 sub.l %d3, %d6
399 subx.l %d2, %d5 # subtract double precision
400 bcc ldd2nd # no carry, do next quotient digit
401 subq.l &0x1, %d1 # q is one too large
402 # need to add back divisor longword to current ms 3 digits of dividend
403 # - according to Knuth, this is done only 2 out of 65536 times for random
404 # divisor, dividend selection.
405 clr.l %d2
406 mov.l %d7, %d3
407 swap %d3
408 clr.w %d3 # %d3 now ls word of divisor
409 add.l %d3, %d6 # aligned with 3rd word of dividend
410 addx.l %d2, %d5
411 mov.l %d7, %d3
412 clr.w %d3 # %d3 now ms word of divisor
413 swap %d3 # aligned with 2nd word of dividend
414 add.l %d3, %d5
415 ldd2nd:
416 tst.b DDSECOND(%a6) # both q words done?
417 bne.b lddremain
418 # first quotient digit now correct. store digit and shift the
419 # (subtracted) dividend
420 mov.w %d1, DDQUOTIENT(%a6)
421 clr.l %d1
422 swap %d5
423 swap %d6
424 mov.w %d6, %d5
425 clr.w %d6
426 st DDSECOND(%a6) # second digit
427 bra.w lddnormalized
428 lddremain:
429 # add 2nd word to quotient, get the remainder.
430 mov.w %d1, DDQUOTIENT+2(%a6)
431 # shift down one word/digit to renormalize remainder.
432 mov.w %d5, %d6
433 swap %d6
434 swap %d5
435 mov.l DDNORMAL(%a6), %d7 # get norm shift count
436 beq.b lddrn
437 subq.l &0x1, %d7 # set for loop count
438 lddnlp:
439 lsr.l &0x1, %d5 # shift into %d6
440 roxr.l &0x1, %d6
441 dbf %d7, lddnlp
442 lddrn:
443 mov.l %d6, %d5 # remainder
444 mov.l DDQUOTIENT(%a6), %d6 # quotient
445
446 rts
447 ldmm2:
448 # factors for the 32X32->64 multiplication are in %d5 and %d6.
449 # returns 64 bit result in %d5 (hi) %d6(lo).
450 # destroys %d2,%d3,%d4.
451
452 # multiply hi,lo words of each factor to get 4 intermediate products
453 mov.l %d6, %d2
454 mov.l %d6, %d3
455 mov.l %d5, %d4
456 swap %d3
457 swap %d4
458 mulu.w %d5, %d6 # %d6 <- lsw*lsw
459 mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source
460 mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest
461 mulu.w %d4, %d3 # %d3 <- msw*msw
462 # now use swap and addx to consolidate to two longwords
463 clr.l %d4
464 swap %d6
465 add.w %d5, %d6 # add msw of l*l to lsw of m*l product
466 addx.w %d4, %d3 # add any carry to m*m product
467 add.w %d2, %d6 # add in lsw of other m*l product
468 addx.w %d4, %d3 # add any carry to m*m product
469 swap %d6 # %d6 is low 32 bits of final product
470 clr.w %d5
471 clr.w %d2 # lsw of two mixed products used,
472 swap %d5 # now use msws of longwords
473 swap %d2
474 add.l %d2, %d5
475 add.l %d3, %d5 # %d5 now ms 32 bits of final product
476 rts
477
478 #########################################################################
479 # XDEF **************************************************************** #
480 # _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction #
481 # _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. #
482 # #
483 # This is the library version which is accessed as a subroutine #
484 # and therefore does not work exactly like the 680X0 mul{s,u}.l #
485 # 64-bit multiply instruction. #
486 # #
487 # XREF **************************************************************** #
488 # None #
489 # #
490 # INPUT *************************************************************** #
491 # 0x4(sp) = multiplier #
492 # 0x8(sp) = multiplicand #
493 # 0xc(sp) = pointer to location to place 64-bit result #
494 # #
495 # OUTPUT ************************************************************** #
496 # 0xc(sp) = points to location of 64-bit result #
497 # #
498 # ALGORITHM *********************************************************** #
499 # Perform the multiply in pieces using 16x16->32 unsigned #
500 # multiplies and "add" instructions. #
501 # Set the condition codes as appropriate before performing an #
502 # "rts". #
503 # #
504 #########################################################################
505
506 set MUL64_CC, -4
507
508 global _060LSP__imulu64_
509 _060LSP__imulu64_:
510
511 # PROLOGUE BEGIN ########################################################
512 link.w %a6,&-4
513 movm.l &0x3800,-(%sp) # save d2-d4
514 # fmovm.l &0x0,-(%sp) # save no fpregs
515 # PROLOGUE END ##########################################################
516
517 mov.w %cc,MUL64_CC(%a6) # save incoming ccodes
518
519 mov.l 0x8(%a6),%d0 # store multiplier in d0
520 beq.w mulu64_zero # handle zero separately
521
522 mov.l 0xc(%a6),%d1 # get multiplicand in d1
523 beq.w mulu64_zero # handle zero separately
524
525 #########################################################################
526 # 63 32 0 #
527 # ---------------------------- #
528 # | hi(mplier) * hi(mplicand)| #
529 # ---------------------------- #
530 # ----------------------------- #
531 # | hi(mplier) * lo(mplicand) | #
532 # ----------------------------- #
533 # ----------------------------- #
534 # | lo(mplier) * hi(mplicand) | #
535 # ----------------------------- #
536 # | ----------------------------- #
537 # --|-- | lo(mplier) * lo(mplicand) | #
538 # | ----------------------------- #
539 # ======================================================== #
540 # -------------------------------------------------------- #
541 # | hi(result) | lo(result) | #
542 # -------------------------------------------------------- #
543 #########################################################################
544 mulu64_alg:
545 # load temp registers with operands
546 mov.l %d0,%d2 # mr in d2
547 mov.l %d0,%d3 # mr in d3
548 mov.l %d1,%d4 # md in d4
549 swap %d3 # hi(mr) in lo d3
550 swap %d4 # hi(md) in lo d4
551
552 # complete necessary multiplies:
553 mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
554 mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
555 mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
556 mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
557
558 # add lo portions of [2],[3] to hi portion of [1].
559 # add carries produced from these adds to [4].
560 # lo([1]) is the final lo 16 bits of the result.
561 clr.l %d4 # load d4 w/ zero value
562 swap %d0 # hi([1]) <==> lo([1])
563 add.w %d1,%d0 # hi([1]) + lo([2])
564 addx.l %d4,%d3 # [4] + carry
565 add.w %d2,%d0 # hi([1]) + lo([3])
566 addx.l %d4,%d3 # [4] + carry
567 swap %d0 # lo([1]) <==> hi([1])
568
569 # lo portions of [2],[3] have been added in to final result.
570 # now, clear lo, put hi in lo reg, and add to [4]
571 clr.w %d1 # clear lo([2])
572 clr.w %d2 # clear hi([3])
573 swap %d1 # hi([2]) in lo d1
574 swap %d2 # hi([3]) in lo d2
575 add.l %d2,%d1 # [4] + hi([2])
576 add.l %d3,%d1 # [4] + hi([3])
577
578 # now, grab the condition codes. only one that can be set is 'N'.
579 # 'N' CAN be set if the operation is unsigned if bit 63 is set.
580 mov.w MUL64_CC(%a6),%d4
581 andi.b &0x10,%d4 # keep old 'X' bit
582 tst.l %d1 # may set 'N' bit
583 bpl.b mulu64_ddone
584 ori.b &0x8,%d4 # set 'N' bit
585 mulu64_ddone:
586 mov.w %d4,%cc
587
588 # here, the result is in d1 and d0. the current strategy is to save
589 # the values at the location pointed to by a0.
590 # use movm here to not disturb the condition codes.
591 mulu64_end:
592 exg %d1,%d0
593 movm.l &0x0003,([0x10,%a6]) # save result
594
595 # EPILOGUE BEGIN ########################################################
596 # fmovm.l (%sp)+,&0x0 # restore no fpregs
597 movm.l (%sp)+,&0x001c # restore d2-d4
598 unlk %a6
599 # EPILOGUE END ##########################################################
600
601 rts
602
603 # one or both of the operands is zero so the result is also zero.
604 # save the zero result to the register file and set the 'Z' ccode bit.
605 mulu64_zero:
606 clr.l %d0
607 clr.l %d1
608
609 mov.w MUL64_CC(%a6),%d4
610 andi.b &0x10,%d4
611 ori.b &0x4,%d4
612 mov.w %d4,%cc # set 'Z' ccode bit
613
614 bra.b mulu64_end
615
616 ##########
617 # muls.l #
618 ##########
619 global _060LSP__imuls64_
620 _060LSP__imuls64_:
621
622 # PROLOGUE BEGIN ########################################################
623 link.w %a6,&-4
624 movm.l &0x3c00,-(%sp) # save d2-d5
625 # fmovm.l &0x0,-(%sp) # save no fpregs
626 # PROLOGUE END ##########################################################
627
628 mov.w %cc,MUL64_CC(%a6) # save incoming ccodes
629
630 mov.l 0x8(%a6),%d0 # store multiplier in d0
631 beq.b mulu64_zero # handle zero separately
632
633 mov.l 0xc(%a6),%d1 # get multiplicand in d1
634 beq.b mulu64_zero # handle zero separately
635
636 clr.b %d5 # clear sign tag
637 tst.l %d0 # is multiplier negative?
638 bge.b muls64_chk_md_sgn # no
639 neg.l %d0 # make multiplier positive
640
641 ori.b &0x1,%d5 # save multiplier sgn
642
643 # the result sign is the exclusive or of the operand sign bits.
644 muls64_chk_md_sgn:
645 tst.l %d1 # is multiplicand negative?
646 bge.b muls64_alg # no
647 neg.l %d1 # make multiplicand positive
648
649 eori.b &0x1,%d5 # calculate correct sign
650
651 #########################################################################
652 # 63 32 0 #
653 # ---------------------------- #
654 # | hi(mplier) * hi(mplicand)| #
655 # ---------------------------- #
656 # ----------------------------- #
657 # | hi(mplier) * lo(mplicand) | #
658 # ----------------------------- #
659 # ----------------------------- #
660 # | lo(mplier) * hi(mplicand) | #
661 # ----------------------------- #
662 # | ----------------------------- #
663 # --|-- | lo(mplier) * lo(mplicand) | #
664 # | ----------------------------- #
665 # ======================================================== #
666 # -------------------------------------------------------- #
667 # | hi(result) | lo(result) | #
668 # -------------------------------------------------------- #
669 #########################################################################
670 muls64_alg:
671 # load temp registers with operands
672 mov.l %d0,%d2 # mr in d2
673 mov.l %d0,%d3 # mr in d3
674 mov.l %d1,%d4 # md in d4
675 swap %d3 # hi(mr) in lo d3
676 swap %d4 # hi(md) in lo d4
677
678 # complete necessary multiplies:
679 mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
680 mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
681 mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
682 mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
683
684 # add lo portions of [2],[3] to hi portion of [1].
685 # add carries produced from these adds to [4].
686 # lo([1]) is the final lo 16 bits of the result.
687 clr.l %d4 # load d4 w/ zero value
688 swap %d0 # hi([1]) <==> lo([1])
689 add.w %d1,%d0 # hi([1]) + lo([2])
690 addx.l %d4,%d3 # [4] + carry
691 add.w %d2,%d0 # hi([1]) + lo([3])
692 addx.l %d4,%d3 # [4] + carry
693 swap %d0 # lo([1]) <==> hi([1])
694
695 # lo portions of [2],[3] have been added in to final result.
696 # now, clear lo, put hi in lo reg, and add to [4]
697 clr.w %d1 # clear lo([2])
698 clr.w %d2 # clear hi([3])
699 swap %d1 # hi([2]) in lo d1
700 swap %d2 # hi([3]) in lo d2
701 add.l %d2,%d1 # [4] + hi([2])
702 add.l %d3,%d1 # [4] + hi([3])
703
704 tst.b %d5 # should result be signed?
705 beq.b muls64_done # no
706
707 # result should be a signed negative number.
708 # compute 2's complement of the unsigned number:
709 # -negate all bits and add 1
710 muls64_neg:
711 not.l %d0 # negate lo(result) bits
712 not.l %d1 # negate hi(result) bits
713 addq.l &1,%d0 # add 1 to lo(result)
714 addx.l %d4,%d1 # add carry to hi(result)
715
716 muls64_done:
717 mov.w MUL64_CC(%a6),%d4
718 andi.b &0x10,%d4 # keep old 'X' bit
719 tst.l %d1 # may set 'N' bit
720 bpl.b muls64_ddone
721 ori.b &0x8,%d4 # set 'N' bit
722 muls64_ddone:
723 mov.w %d4,%cc
724
725 # here, the result is in d1 and d0. the current strategy is to save
726 # the values at the location pointed to by a0.
727 # use movm here to not disturb the condition codes.
728 muls64_end:
729 exg %d1,%d0
730 movm.l &0x0003,([0x10,%a6]) # save result at (a0)
731
732 # EPILOGUE BEGIN ########################################################
733 # fmovm.l (%sp)+,&0x0 # restore no fpregs
734 movm.l (%sp)+,&0x003c # restore d2-d5
735 unlk %a6
736 # EPILOGUE END ##########################################################
737
738 rts
739
740 # one or both of the operands is zero so the result is also zero.
741 # save the zero result to the register file and set the 'Z' ccode bit.
742 muls64_zero:
743 clr.l %d0
744 clr.l %d1
745
746 mov.w MUL64_CC(%a6),%d4
747 andi.b &0x10,%d4
748 ori.b &0x4,%d4
749 mov.w %d4,%cc # set 'Z' ccode bit
750
751 bra.b muls64_end
752
753 #########################################################################
754 # XDEF **************************************************************** #
755 # _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". #
756 # _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". #
757 # _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". #
758 # _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". #
759 # _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". #
760 # _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". #
761 # #
762 # This is the library version which is accessed as a subroutine #
763 # and therefore does not work exactly like the 680X0 "cmp2" #
764 # instruction. #
765 # #
766 # XREF **************************************************************** #
767 # None #
768 # #
769 # INPUT *************************************************************** #
770 # 0x4(sp) = Rn #
771 # 0x8(sp) = pointer to boundary pair #
772 # #
773 # OUTPUT ************************************************************** #
774 # cc = condition codes are set correctly #
775 # #
776 # ALGORITHM *********************************************************** #
777 # In the interest of simplicity, all operands are converted to #
778 # longword size whether the operation is byte, word, or long. The #
779 # bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
780 # also sign extended. If Rn is an address register, it need not be sign #
781 # extended since the full register is always used. #
782 # The condition codes are set correctly before the final "rts". #
783 # #
784 #########################################################################
785
786 set CMP2_CC, -4
787
788 global _060LSP__cmp2_Ab_
789 _060LSP__cmp2_Ab_:
790
791 # PROLOGUE BEGIN ########################################################
792 link.w %a6,&-4
793 movm.l &0x3800,-(%sp) # save d2-d4
794 # fmovm.l &0x0,-(%sp) # save no fpregs
795 # PROLOGUE END ##########################################################
796
797 mov.w %cc,CMP2_CC(%a6)
798 mov.l 0x8(%a6), %d2 # get regval
799
800 mov.b ([0xc,%a6],0x0),%d0
801 mov.b ([0xc,%a6],0x1),%d1
802
803 extb.l %d0 # sign extend lo bnd
804 extb.l %d1 # sign extend hi bnd
805 bra.w l_cmp2_cmp # go do the compare emulation
806
807 global _060LSP__cmp2_Aw_
808 _060LSP__cmp2_Aw_:
809
810 # PROLOGUE BEGIN ########################################################
811 link.w %a6,&-4
812 movm.l &0x3800,-(%sp) # save d2-d4
813 # fmovm.l &0x0,-(%sp) # save no fpregs
814 # PROLOGUE END ##########################################################
815
816 mov.w %cc,CMP2_CC(%a6)
817 mov.l 0x8(%a6), %d2 # get regval
818
819 mov.w ([0xc,%a6],0x0),%d0
820 mov.w ([0xc,%a6],0x2),%d1
821
822 ext.l %d0 # sign extend lo bnd
823 ext.l %d1 # sign extend hi bnd
824 bra.w l_cmp2_cmp # go do the compare emulation
825
826 global _060LSP__cmp2_Al_
827 _060LSP__cmp2_Al_:
828
829 # PROLOGUE BEGIN ########################################################
830 link.w %a6,&-4
831 movm.l &0x3800,-(%sp) # save d2-d4
832 # fmovm.l &0x0,-(%sp) # save no fpregs
833 # PROLOGUE END ##########################################################
834
835 mov.w %cc,CMP2_CC(%a6)
836 mov.l 0x8(%a6), %d2 # get regval
837
838 mov.l ([0xc,%a6],0x0),%d0
839 mov.l ([0xc,%a6],0x4),%d1
840 bra.w l_cmp2_cmp # go do the compare emulation
841
842 global _060LSP__cmp2_Db_
843 _060LSP__cmp2_Db_:
844
845 # PROLOGUE BEGIN ########################################################
846 link.w %a6,&-4
847 movm.l &0x3800,-(%sp) # save d2-d4
848 # fmovm.l &0x0,-(%sp) # save no fpregs
849 # PROLOGUE END ##########################################################
850
851 mov.w %cc,CMP2_CC(%a6)
852 mov.l 0x8(%a6), %d2 # get regval
853
854 mov.b ([0xc,%a6],0x0),%d0
855 mov.b ([0xc,%a6],0x1),%d1
856
857 extb.l %d0 # sign extend lo bnd
858 extb.l %d1 # sign extend hi bnd
859
860 # operation is a data register compare.
861 # sign extend byte to long so we can do simple longword compares.
862 extb.l %d2 # sign extend data byte
863 bra.w l_cmp2_cmp # go do the compare emulation
864
865 global _060LSP__cmp2_Dw_
866 _060LSP__cmp2_Dw_:
867
868 # PROLOGUE BEGIN ########################################################
869 link.w %a6,&-4
870 movm.l &0x3800,-(%sp) # save d2-d4
871 # fmovm.l &0x0,-(%sp) # save no fpregs
872 # PROLOGUE END ##########################################################
873
874 mov.w %cc,CMP2_CC(%a6)
875 mov.l 0x8(%a6), %d2 # get regval
876
877 mov.w ([0xc,%a6],0x0),%d0
878 mov.w ([0xc,%a6],0x2),%d1
879
880 ext.l %d0 # sign extend lo bnd
881 ext.l %d1 # sign extend hi bnd
882
883 # operation is a data register compare.
884 # sign extend word to long so we can do simple longword compares.
885 ext.l %d2 # sign extend data word
886 bra.w l_cmp2_cmp # go emulate compare
887
888 global _060LSP__cmp2_Dl_
889 _060LSP__cmp2_Dl_:
890
891 # PROLOGUE BEGIN ########################################################
892 link.w %a6,&-4
893 movm.l &0x3800,-(%sp) # save d2-d4
894 # fmovm.l &0x0,-(%sp) # save no fpregs
895 # PROLOGUE END ##########################################################
896
897 mov.w %cc,CMP2_CC(%a6)
898 mov.l 0x8(%a6), %d2 # get regval
899
900 mov.l ([0xc,%a6],0x0),%d0
901 mov.l ([0xc,%a6],0x4),%d1
902
903 #
904 # To set the ccodes correctly:
905 # (1) save 'Z' bit from (Rn - lo)
906 # (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
907 # (3) keep 'X', 'N', and 'V' from before instruction
908 # (4) combine ccodes
909 #
910 l_cmp2_cmp:
911 sub.l %d0, %d2 # (Rn - lo)
912 mov.w %cc, %d3 # fetch resulting ccodes
913 andi.b &0x4, %d3 # keep 'Z' bit
914 sub.l %d0, %d1 # (hi - lo)
915 cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi))
916
917 mov.w %cc, %d4 # fetch resulting ccodes
918 or.b %d4, %d3 # combine w/ earlier ccodes
919 andi.b &0x5, %d3 # keep 'Z' and 'N'
920
921 mov.w CMP2_CC(%a6), %d4 # fetch old ccodes
922 andi.b &0x1a, %d4 # keep 'X','N','V' bits
923 or.b %d3, %d4 # insert new ccodes
924 mov.w %d4,%cc # save new ccodes
925
926 # EPILOGUE BEGIN ########################################################
927 # fmovm.l (%sp)+,&0x0 # restore no fpregs
928 movm.l (%sp)+,&0x001c # restore d2-d4
929 unlk %a6
930 # EPILOGUE END ##########################################################
931
932 rts