]> git.proxmox.com Git - rustc.git/blob - src/libcompiler_builtins/compiler-rt/lib/builtins/arm/udivsi3.S
New upstream version 1.25.0+dfsg1
[rustc.git] / src / libcompiler_builtins / compiler-rt / lib / builtins / arm / udivsi3.S
1 /*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
2 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===//
9 *
10 * This file implements the __udivsi3 (32-bit unsigned integer divide)
11 * function for the ARM 32-bit architecture.
12 *
13 *===----------------------------------------------------------------------===*/
14
15 #include "../assembly.h"
16
17 .syntax unified
18 .text
19
20 DEFINE_CODE_STATE
21
22 .p2align 2
23 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
24
25 @ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
26 @ Calculate and return the quotient of the (unsigned) division.
27
28 DEFINE_COMPILERRT_FUNCTION(__udivsi3)
29 #if __ARM_ARCH_EXT_IDIV__
30 tst r1, r1
31 beq LOCAL_LABEL(divby0)
32 udiv r0, r0, r1
33 bx lr
34
35 LOCAL_LABEL(divby0):
36 mov r0, #0
37 # ifdef __ARM_EABI__
38 b __aeabi_idiv0
39 # else
40 JMP(lr)
41 # endif
42
43 #else /* ! __ARM_ARCH_EXT_IDIV__ */
44 cmp r1, #1
45 bcc LOCAL_LABEL(divby0)
46 #if defined(USE_THUMB_1)
47 bne LOCAL_LABEL(num_neq_denom)
48 JMP(lr)
49 LOCAL_LABEL(num_neq_denom):
50 #else
51 IT(eq)
52 JMPc(lr, eq)
53 #endif
54 cmp r0, r1
55 #if defined(USE_THUMB_1)
56 bhs LOCAL_LABEL(num_ge_denom)
57 movs r0, #0
58 JMP(lr)
59 LOCAL_LABEL(num_ge_denom):
60 #else
61 ITT(cc)
62 movcc r0, #0
63 JMPc(lr, cc)
64 #endif
65
66 /*
67 * Implement division using binary long division algorithm.
68 *
69 * r0 is the numerator, r1 the denominator.
70 *
71 * The code before JMP computes the correct shift I, so that
72 * r0 and (r1 << I) have the highest bit set in the same position.
73 * At the time of JMP, ip := .Ldiv0block - 12 * I.
74 * This depends on the fixed instruction size of block.
75 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
76 *
77 * block(shift) implements the test-and-update-quotient core.
78 * It assumes (r0 << shift) can be computed without overflow and
79 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
80 */
81
82 # if defined(__ARM_FEATURE_CLZ)
83 clz ip, r0
84 clz r3, r1
85 /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
86 sub r3, r3, ip
87 # if defined(USE_THUMB_2)
88 adr ip, LOCAL_LABEL(div0block) + 1
89 sub ip, ip, r3, lsl #1
90 # else
91 adr ip, LOCAL_LABEL(div0block)
92 # endif
93 sub ip, ip, r3, lsl #2
94 sub ip, ip, r3, lsl #3
95 mov r3, #0
96 bx ip
97 # else /* No CLZ Feature */
98 # if defined(USE_THUMB_2)
99 # error THUMB mode requires CLZ or UDIV
100 # endif
101 # if defined(USE_THUMB_1)
102 # define BLOCK_SIZE 10
103 # else
104 # define BLOCK_SIZE 12
105 # endif
106
107 mov r2, r0
108 # if defined(USE_THUMB_1)
109 mov ip, r0
110 adr r0, LOCAL_LABEL(div0block)
111 adds r0, #1
112 # else
113 adr ip, LOCAL_LABEL(div0block)
114 # endif
115 lsrs r3, r2, #16
116 cmp r3, r1
117 # if defined(USE_THUMB_1)
118 blo LOCAL_LABEL(skip_16)
119 movs r2, r3
120 subs r0, r0, #(16 * BLOCK_SIZE)
121 LOCAL_LABEL(skip_16):
122 # else
123 movhs r2, r3
124 subhs ip, ip, #(16 * BLOCK_SIZE)
125 # endif
126
127 lsrs r3, r2, #8
128 cmp r3, r1
129 # if defined(USE_THUMB_1)
130 blo LOCAL_LABEL(skip_8)
131 movs r2, r3
132 subs r0, r0, #(8 * BLOCK_SIZE)
133 LOCAL_LABEL(skip_8):
134 # else
135 movhs r2, r3
136 subhs ip, ip, #(8 * BLOCK_SIZE)
137 # endif
138
139 lsrs r3, r2, #4
140 cmp r3, r1
141 # if defined(USE_THUMB_1)
142 blo LOCAL_LABEL(skip_4)
143 movs r2, r3
144 subs r0, r0, #(4 * BLOCK_SIZE)
145 LOCAL_LABEL(skip_4):
146 # else
147 movhs r2, r3
148 subhs ip, #(4 * BLOCK_SIZE)
149 # endif
150
151 lsrs r3, r2, #2
152 cmp r3, r1
153 # if defined(USE_THUMB_1)
154 blo LOCAL_LABEL(skip_2)
155 movs r2, r3
156 subs r0, r0, #(2 * BLOCK_SIZE)
157 LOCAL_LABEL(skip_2):
158 # else
159 movhs r2, r3
160 subhs ip, ip, #(2 * BLOCK_SIZE)
161 # endif
162
163 /* Last block, no need to update r2 or r3. */
164 # if defined(USE_THUMB_1)
165 lsrs r3, r2, #1
166 cmp r3, r1
167 blo LOCAL_LABEL(skip_1)
168 subs r0, r0, #(1 * BLOCK_SIZE)
169 LOCAL_LABEL(skip_1):
170 movs r2, r0
171 mov r0, ip
172 movs r3, #0
173 JMP (r2)
174
175 # else
176 cmp r1, r2, lsr #1
177 subls ip, ip, #(1 * BLOCK_SIZE)
178
179 movs r3, #0
180
181 JMP(ip)
182 # endif
183 # endif /* __ARM_FEATURE_CLZ */
184
185
186 #define IMM #
187 /* due to the range limit of branch in Thumb1, we have to place the
188 block closer */
189 LOCAL_LABEL(divby0):
190 movs r0, #0
191 # if defined(__ARM_EABI__)
192 push {r7, lr}
193 bl __aeabi_idiv0 // due to relocation limit, can't use b.
194 pop {r7, pc}
195 # else
196 JMP(lr)
197 # endif
198
199
200 #if defined(USE_THUMB_1)
201 #define block(shift) \
202 lsls r2, r1, IMM shift; \
203 cmp r0, r2; \
204 blo LOCAL_LABEL(block_skip_##shift); \
205 subs r0, r0, r2; \
206 LOCAL_LABEL(block_skip_##shift) :; \
207 adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
208
209 /* TODO: if current location counter is not not word aligned, we don't
210 need the .p2align and nop */
211 /* Label div0block must be word-aligned. First align block 31 */
212 .p2align 2
213 nop /* Padding to align div0block as 31 blocks = 310 bytes */
214
215 #else
216 #define block(shift) \
217 cmp r0, r1, lsl IMM shift; \
218 ITT(hs); \
219 WIDE(addhs) r3, r3, IMM (1 << shift); \
220 WIDE(subhs) r0, r0, r1, lsl IMM shift
221 #endif
222
223 block(31)
224 block(30)
225 block(29)
226 block(28)
227 block(27)
228 block(26)
229 block(25)
230 block(24)
231 block(23)
232 block(22)
233 block(21)
234 block(20)
235 block(19)
236 block(18)
237 block(17)
238 block(16)
239 block(15)
240 block(14)
241 block(13)
242 block(12)
243 block(11)
244 block(10)
245 block(9)
246 block(8)
247 block(7)
248 block(6)
249 block(5)
250 block(4)
251 block(3)
252 block(2)
253 block(1)
254 LOCAL_LABEL(div0block):
255 block(0)
256
257 mov r0, r3
258 JMP(lr)
259 #endif /* __ARM_ARCH_EXT_IDIV__ */
260
261 END_COMPILERRT_FUNCTION(__udivsi3)
262
263 NO_EXEC_STACK_DIRECTIVE
264