1 /*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
3 * The LLVM Compiler Infrastructure
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
8 *===----------------------------------------------------------------------===//
10 * This file implements the __udivsi3 (32-bit unsigned integer divide)
11 * function for the ARM 32-bit architecture.
13 *===----------------------------------------------------------------------===*/
15 #include "../assembly.h"
23 DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
25 @ unsigned int __udivsi3(unsigned int divident, unsigned int divisor)
26 @ Calculate and return the quotient of the (unsigned) division.
28 DEFINE_COMPILERRT_FUNCTION(__udivsi3)
29 #if __ARM_ARCH_EXT_IDIV__
31 beq LOCAL_LABEL(divby0)
43 #else /* ! __ARM_ARCH_EXT_IDIV__ */
45 bcc LOCAL_LABEL(divby0)
46 #if defined(USE_THUMB_1)
47 bne LOCAL_LABEL(num_neq_denom)
49 LOCAL_LABEL(num_neq_denom):
55 #if defined(USE_THUMB_1)
56 bhs LOCAL_LABEL(num_ge_denom)
59 LOCAL_LABEL(num_ge_denom):
67 * Implement division using binary long division algorithm.
69 * r0 is the numerator, r1 the denominator.
71 * The code before JMP computes the correct shift I, so that
72 * r0 and (r1 << I) have the highest bit set in the same position.
73 * At the time of JMP, ip := .Ldiv0block - 12 * I.
74 * This depends on the fixed instruction size of block.
75 * For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes.
77 * block(shift) implements the test-and-update-quotient core.
78 * It assumes (r0 << shift) can be computed without overflow and
79 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
82 # if defined(__ARM_FEATURE_CLZ)
85 /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
87 # if defined(USE_THUMB_2)
88 adr ip, LOCAL_LABEL(div0block) + 1
89 sub ip, ip, r3, lsl #1
91 adr ip, LOCAL_LABEL(div0block)
93 sub ip, ip, r3, lsl #2
94 sub ip, ip, r3, lsl #3
97 # else /* No CLZ Feature */
98 # if defined(USE_THUMB_2)
99 # error THUMB mode requires CLZ or UDIV
101 # if defined(USE_THUMB_1)
102 # define BLOCK_SIZE 10
104 # define BLOCK_SIZE 12
108 # if defined(USE_THUMB_1)
110 adr r0, LOCAL_LABEL(div0block)
113 adr ip, LOCAL_LABEL(div0block)
117 # if defined(USE_THUMB_1)
118 blo LOCAL_LABEL(skip_16)
120 subs r0, r0, #(16 * BLOCK_SIZE)
121 LOCAL_LABEL(skip_16):
124 subhs ip, ip, #(16 * BLOCK_SIZE)
129 # if defined(USE_THUMB_1)
130 blo LOCAL_LABEL(skip_8)
132 subs r0, r0, #(8 * BLOCK_SIZE)
136 subhs ip, ip, #(8 * BLOCK_SIZE)
141 # if defined(USE_THUMB_1)
142 blo LOCAL_LABEL(skip_4)
144 subs r0, r0, #(4 * BLOCK_SIZE)
148 subhs ip, #(4 * BLOCK_SIZE)
153 # if defined(USE_THUMB_1)
154 blo LOCAL_LABEL(skip_2)
156 subs r0, r0, #(2 * BLOCK_SIZE)
160 subhs ip, ip, #(2 * BLOCK_SIZE)
163 /* Last block, no need to update r2 or r3. */
164 # if defined(USE_THUMB_1)
167 blo LOCAL_LABEL(skip_1)
168 subs r0, r0, #(1 * BLOCK_SIZE)
177 subls ip, ip, #(1 * BLOCK_SIZE)
183 # endif /* __ARM_FEATURE_CLZ */
187 /* due to the range limit of branch in Thumb1, we have to place the
191 # if defined(__ARM_EABI__)
193 bl __aeabi_idiv0 // due to relocation limit, can't use b.
200 #if defined(USE_THUMB_1)
201 #define block(shift) \
202 lsls r2, r1, IMM shift; \
204 blo LOCAL_LABEL(block_skip_##shift); \
206 LOCAL_LABEL(block_skip_##shift) :; \
207 adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
209 /* TODO: if current location counter is not not word aligned, we don't
210 need the .p2align and nop */
211 /* Label div0block must be word-aligned. First align block 31 */
213 nop /* Padding to align div0block as 31 blocks = 310 bytes */
216 #define block(shift) \
217 cmp r0, r1, lsl IMM shift; \
219 WIDE(addhs) r3, r3, IMM (1 << shift); \
220 WIDE(subhs) r0, r0, r1, lsl IMM shift
254 LOCAL_LABEL(div0block):
259 #endif /* __ARM_ARCH_EXT_IDIV__ */
261 END_COMPILERRT_FUNCTION(__udivsi3)
263 NO_EXEC_STACK_DIRECTIVE