]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | #ifndef __ASM_ARM_DIV64 |
2 | #define __ASM_ARM_DIV64 | |
3 | ||
3927f2e8 | 4 | #include <linux/types.h> |
9f97da78 | 5 | #include <asm/compiler.h> |
1da177e4 LT |
6 | |
7 | /* | |
040b323b | 8 | * The semantics of __div64_32() are: |
1da177e4 | 9 | * |
040b323b | 10 | * uint32_t __div64_32(uint64_t *n, uint32_t base) |
1da177e4 LT |
11 | * { |
12 | * uint32_t remainder = *n % base; | |
13 | * *n = *n / base; | |
14 | * return remainder; | |
15 | * } | |
16 | * | |
17 | * In other words, a 64-bit dividend with a 32-bit divisor producing | |
18 | * a 64-bit result and a 32-bit remainder. To accomplish this optimally | |
040b323b NP |
19 | * we override the generic version in lib/div64.c to call our __do_div64 |
20 | * assembly implementation with completely non standard calling convention | |
21 | * for arguments and results (beware). | |
1da177e4 LT |
22 | */ |
23 | ||
24 | #ifdef __ARMEB__ | |
25 | #define __xh "r0" | |
26 | #define __xl "r1" | |
27 | #else | |
28 | #define __xl "r0" | |
29 | #define __xh "r1" | |
30 | #endif | |
31 | ||
040b323b NP |
32 | static inline uint32_t __div64_32(uint64_t *n, uint32_t base) |
33 | { | |
34 | register unsigned int __base asm("r4") = base; | |
35 | register unsigned long long __n asm("r0") = *n; | |
36 | register unsigned long long __res asm("r2"); | |
37 | register unsigned int __rem asm(__xh); | |
38 | asm( __asmeq("%0", __xh) | |
39 | __asmeq("%1", "r2") | |
40 | __asmeq("%2", "r0") | |
41 | __asmeq("%3", "r4") | |
42 | "bl __do_div64" | |
43 | : "=r" (__rem), "=r" (__res) | |
44 | : "r" (__n), "r" (__base) | |
45 | : "ip", "lr", "cc"); | |
46 | *n = __res; | |
47 | return __rem; | |
48 | } | |
49 | #define __div64_32 __div64_32 | |
50 | ||
51 | #if !defined(CONFIG_AEABI) | |
fa4adc61 NP |
52 | |
53 | /* | |
040b323b NP |
54 | * In OABI configurations, some uses of the do_div function |
55 | * cause gcc to run out of registers. To work around that, | |
56 | * we can force the use of the out-of-line version for | |
57 | * configurations that build a OABI kernel. | |
fa4adc61 | 58 | */ |
040b323b | 59 | #define do_div(n, base) __div64_32(&(n), base) |
fa4adc61 | 60 | |
040b323b | 61 | #else |
fa4adc61 NP |
62 | |
63 | /* | |
040b323b NP |
64 | * gcc versions earlier than 4.0 are simply too problematic for the |
65 | * __div64_const32() code in asm-generic/div64.h. First there is | |
66 | * gcc PR 15089 that tend to trig on more complex constructs, spurious | |
67 | * .global __udivsi3 are inserted even if none of those symbols are | |
68 | * referenced in the generated code, and those gcc versions are not able | |
69 | * to do constant propagation on long long values anyway. | |
fa4adc61 | 70 | */ |
040b323b NP |
71 | |
72 | #define __div64_const32_is_OK (__GNUC__ >= 4) | |
73 | ||
74 | static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias) | |
75 | { | |
76 | unsigned long long res; | |
77 | unsigned int tmp = 0; | |
78 | ||
79 | if (!bias) { | |
80 | asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" | |
81 | "mov %Q0, #0" | |
82 | : "=&r" (res) | |
83 | : "r" (m), "r" (n) | |
84 | : "cc"); | |
85 | } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) { | |
86 | res = m; | |
87 | asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t" | |
88 | "mov %Q0, #0" | |
89 | : "+&r" (res) | |
90 | : "r" (m), "r" (n) | |
91 | : "cc"); | |
92 | } else { | |
93 | asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" | |
94 | "cmn %Q0, %Q1\n\t" | |
95 | "adcs %R0, %R0, %R1\n\t" | |
96 | "adc %Q0, %3, #0" | |
97 | : "=&r" (res) | |
98 | : "r" (m), "r" (n), "r" (tmp) | |
99 | : "cc"); | |
100 | } | |
101 | ||
102 | if (!(m & ((1ULL << 63) | (1ULL << 31)))) { | |
103 | asm ( "umlal %R0, %Q0, %R1, %Q2\n\t" | |
104 | "umlal %R0, %Q0, %Q1, %R2\n\t" | |
105 | "mov %R0, #0\n\t" | |
106 | "umlal %Q0, %R0, %R1, %R2" | |
107 | : "+&r" (res) | |
108 | : "r" (m), "r" (n) | |
109 | : "cc"); | |
110 | } else { | |
111 | asm ( "umlal %R0, %Q0, %R2, %Q3\n\t" | |
112 | "umlal %R0, %1, %Q2, %R3\n\t" | |
113 | "mov %R0, #0\n\t" | |
114 | "adds %Q0, %1, %Q0\n\t" | |
115 | "adc %R0, %R0, #0\n\t" | |
116 | "umlal %Q0, %R0, %R2, %R3" | |
117 | : "+&r" (res), "+&r" (tmp) | |
118 | : "r" (m), "r" (n) | |
119 | : "cc"); | |
120 | } | |
121 | ||
122 | return res; | |
123 | } | |
124 | #define __arch_xprod_64 __arch_xprod_64 | |
125 | ||
126 | #include <asm-generic/div64.h> | |
fa4adc61 NP |
127 | |
128 | #endif | |
129 | ||
1da177e4 | 130 | #endif |