]>
Commit | Line | Data |
---|---|---|
320054e8 DG |
1 | /* Copyright (C) 2011 by Valentin Ochs |
2 | * | |
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
4 | * of this software and associated documentation files (the "Software"), to | |
5 | * deal in the Software without restriction, including without limitation the | |
6 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | |
7 | * sell copies of the Software, and to permit persons to whom the Software is | |
8 | * furnished to do so, subject to the following conditions: | |
9 | * | |
10 | * The above copyright notice and this permission notice shall be included in | |
11 | * all copies or substantial portions of the Software. | |
12 | * | |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
19 | * IN THE SOFTWARE. | |
20 | */ | |
21 | ||
22 | /* Minor changes by Rich Felker for integration in musl, 2011-04-27. */ | |
23 | ||
24 | /* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1). | |
25 | Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */ | |
26 | ||
5d8a1409 | 27 | #define _BSD_SOURCE |
320054e8 DG |
28 | #include <stdint.h> |
29 | #include <stdlib.h> | |
30 | #include <string.h> | |
31 | ||
32 | #include "atomic.h" | |
33 | #define ntz(x) a_ctz_l((x)) | |
34 | ||
5d8a1409 | 35 | typedef int (*cmpfun)(const void *, const void *, void *); |
320054e8 DG |
36 | |
37 | static inline int pntz(size_t p[2]) { | |
38 | int r = ntz(p[0] - 1); | |
39 | if(r != 0 || (r = 8*sizeof(size_t) + ntz(p[1])) != 8*sizeof(size_t)) { | |
40 | return r; | |
41 | } | |
42 | return 0; | |
43 | } | |
44 | ||
45 | static void cycle(size_t width, unsigned char* ar[], int n) | |
46 | { | |
47 | unsigned char tmp[256]; | |
48 | size_t l; | |
49 | int i; | |
50 | ||
51 | if(n < 2) { | |
52 | return; | |
53 | } | |
54 | ||
55 | ar[n] = tmp; | |
56 | while(width) { | |
57 | l = sizeof(tmp) < width ? sizeof(tmp) : width; | |
58 | memcpy(ar[n], ar[0], l); | |
59 | for(i = 0; i < n; i++) { | |
60 | memcpy(ar[i], ar[i + 1], l); | |
61 | ar[i] += l; | |
62 | } | |
63 | width -= l; | |
64 | } | |
65 | } | |
66 | ||
67 | /* shl() and shr() need n > 0 */ | |
68 | static inline void shl(size_t p[2], int n) | |
69 | { | |
70 | if(n >= 8 * sizeof(size_t)) { | |
71 | n -= 8 * sizeof(size_t); | |
72 | p[1] = p[0]; | |
73 | p[0] = 0; | |
74 | } | |
75 | p[1] <<= n; | |
76 | p[1] |= p[0] >> (sizeof(size_t) * 8 - n); | |
77 | p[0] <<= n; | |
78 | } | |
79 | ||
80 | static inline void shr(size_t p[2], int n) | |
81 | { | |
82 | if(n >= 8 * sizeof(size_t)) { | |
83 | n -= 8 * sizeof(size_t); | |
84 | p[0] = p[1]; | |
85 | p[1] = 0; | |
86 | } | |
87 | p[0] >>= n; | |
88 | p[0] |= p[1] << (sizeof(size_t) * 8 - n); | |
89 | p[1] >>= n; | |
90 | } | |
91 | ||
5d8a1409 | 92 | static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int pshift, size_t lp[]) |
320054e8 DG |
93 | { |
94 | unsigned char *rt, *lf; | |
95 | unsigned char *ar[14 * sizeof(size_t) + 1]; | |
96 | int i = 1; | |
97 | ||
98 | ar[0] = head; | |
99 | while(pshift > 1) { | |
100 | rt = head - width; | |
101 | lf = head - width - lp[pshift - 2]; | |
102 | ||
5d8a1409 | 103 | if(cmp(ar[0], lf, arg) >= 0 && cmp(ar[0], rt, arg) >= 0) { |
320054e8 DG |
104 | break; |
105 | } | |
5d8a1409 | 106 | if(cmp(lf, rt, arg) >= 0) { |
320054e8 DG |
107 | ar[i++] = lf; |
108 | head = lf; | |
109 | pshift -= 1; | |
110 | } else { | |
111 | ar[i++] = rt; | |
112 | head = rt; | |
113 | pshift -= 2; | |
114 | } | |
115 | } | |
116 | cycle(width, ar, i); | |
117 | } | |
118 | ||
5d8a1409 | 119 | static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[]) |
320054e8 DG |
120 | { |
121 | unsigned char *stepson, | |
122 | *rt, *lf; | |
123 | size_t p[2]; | |
124 | unsigned char *ar[14 * sizeof(size_t) + 1]; | |
125 | int i = 1; | |
126 | int trail; | |
127 | ||
128 | p[0] = pp[0]; | |
129 | p[1] = pp[1]; | |
130 | ||
131 | ar[0] = head; | |
132 | while(p[0] != 1 || p[1] != 0) { | |
133 | stepson = head - lp[pshift]; | |
5d8a1409 | 134 | if(cmp(stepson, ar[0], arg) <= 0) { |
320054e8 DG |
135 | break; |
136 | } | |
137 | if(!trusty && pshift > 1) { | |
138 | rt = head - width; | |
139 | lf = head - width - lp[pshift - 2]; | |
5d8a1409 | 140 | if(cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0) { |
320054e8 DG |
141 | break; |
142 | } | |
143 | } | |
144 | ||
145 | ar[i++] = stepson; | |
146 | head = stepson; | |
147 | trail = pntz(p); | |
148 | shr(p, trail); | |
149 | pshift += trail; | |
150 | trusty = 0; | |
151 | } | |
152 | if(!trusty) { | |
153 | cycle(width, ar, i); | |
5d8a1409 | 154 | sift(head, width, cmp, arg, pshift, lp); |
320054e8 DG |
155 | } |
156 | } | |
157 | ||
5d8a1409 | 158 | void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg) |
320054e8 DG |
159 | { |
160 | size_t lp[12*sizeof(size_t)]; | |
161 | size_t i, size = width * nel; | |
162 | unsigned char *head, *high; | |
163 | size_t p[2] = {1, 0}; | |
164 | int pshift = 1; | |
165 | int trail; | |
166 | ||
167 | if (!size) return; | |
168 | ||
169 | head = base; | |
170 | high = head + size - width; | |
171 | ||
172 | /* Precompute Leonardo numbers, scaled by element width */ | |
173 | for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++); | |
174 | ||
175 | while(head < high) { | |
176 | if((p[0] & 3) == 3) { | |
5d8a1409 | 177 | sift(head, width, cmp, arg, pshift, lp); |
320054e8 DG |
178 | shr(p, 2); |
179 | pshift += 2; | |
180 | } else { | |
181 | if(lp[pshift - 1] >= high - head) { | |
5d8a1409 | 182 | trinkle(head, width, cmp, arg, p, pshift, 0, lp); |
320054e8 | 183 | } else { |
5d8a1409 | 184 | sift(head, width, cmp, arg, pshift, lp); |
320054e8 | 185 | } |
5d8a1409 | 186 | |
320054e8 DG |
187 | if(pshift == 1) { |
188 | shl(p, 1); | |
189 | pshift = 0; | |
190 | } else { | |
191 | shl(p, pshift - 1); | |
192 | pshift = 1; | |
193 | } | |
194 | } | |
5d8a1409 | 195 | |
320054e8 DG |
196 | p[0] |= 1; |
197 | head += width; | |
198 | } | |
199 | ||
5d8a1409 | 200 | trinkle(head, width, cmp, arg, p, pshift, 0, lp); |
320054e8 DG |
201 | |
202 | while(pshift != 1 || p[0] != 1 || p[1] != 0) { | |
203 | if(pshift <= 1) { | |
204 | trail = pntz(p); | |
205 | shr(p, trail); | |
206 | pshift += trail; | |
207 | } else { | |
208 | shl(p, 2); | |
209 | pshift -= 2; | |
210 | p[0] ^= 7; | |
211 | shr(p, 1); | |
5d8a1409 | 212 | trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp); |
320054e8 DG |
213 | shl(p, 1); |
214 | p[0] |= 1; | |
5d8a1409 | 215 | trinkle(head - width, width, cmp, arg, p, pshift, 1, lp); |
320054e8 DG |
216 | } |
217 | head -= width; | |
218 | } | |
219 | } | |
5d8a1409 DG |
220 | |
221 | weak_alias(__qsort_r, qsort_r); |