]> git.proxmox.com Git - libgit2.git/blob - src/tsort.c
Merge pull request #397 from lambourg/development
[libgit2.git] / src / tsort.c
1 /*
2 * Copyright (C) 2009-2011 the libgit2 contributors
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "common.h"
9
10 /**
11 * An array-of-pointers implementation of Python's Timsort
12 * Based on code by Christopher Swenson under the MIT license
13 *
14 * Copyright (c) 2010 Christopher Swenson
15 * Copyright (c) 2011 Vicent Marti
16 */
17
18 #ifndef MAX
19 # define MAX(x,y) (((x) > (y) ? (x) : (y)))
20 #endif
21
22 #ifndef MIN
23 # define MIN(x,y) (((x) < (y) ? (x) : (y)))
24 #endif
25
26 typedef int (*cmp_ptr_t)(const void *, const void *);
27
28 static int binsearch(void **dst, const void *x, size_t size, cmp_ptr_t cmp)
29 {
30 int l, c, r;
31 void *lx, *cx;
32
33 l = 0;
34 r = size - 1;
35 c = r >> 1;
36 lx = dst[l];
37
38 /* check for beginning conditions */
39 if (cmp(x, lx) < 0)
40 return 0;
41
42 else if (cmp(x, lx) == 0) {
43 int i = 1;
44 while (cmp(x, dst[i]) == 0)
45 i++;
46 return i;
47 }
48
49 /* guaranteed not to be >= rx */
50 cx = dst[c];
51 while (1) {
52 const int val = cmp(x, cx);
53 if (val < 0) {
54 if (c - l <= 1) return c;
55 r = c;
56 } else if (val > 0) {
57 if (r - c <= 1) return c + 1;
58 l = c;
59 lx = cx;
60 } else {
61 do {
62 cx = dst[++c];
63 } while (cmp(x, cx) == 0);
64 return c;
65 }
66 c = l + ((r - l) >> 1);
67 cx = dst[c];
68 }
69 }
70
71 /* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */
72 static void bisort(void **dst, size_t start, size_t size, cmp_ptr_t cmp)
73 {
74 size_t i;
75 void *x;
76 int location;
77
78 for (i = start; i < size; i++) {
79 int j;
80 /* If this entry is already correct, just move along */
81 if (cmp(dst[i - 1], dst[i]) <= 0)
82 continue;
83
84 /* Else we need to find the right place, shift everything over, and squeeze in */
85 x = dst[i];
86 location = binsearch(dst, x, i, cmp);
87 for (j = i - 1; j >= location; j--) {
88 dst[j + 1] = dst[j];
89 }
90 dst[location] = x;
91 }
92 }
93
94
95 /* timsort implementation, based on timsort.txt */
96 struct tsort_run {
97 ssize_t start;
98 ssize_t length;
99 };
100
101 struct tsort_store {
102 size_t alloc;
103 cmp_ptr_t cmp;
104 void **storage;
105 };
106
107 static void reverse_elements(void **dst, int start, int end)
108 {
109 while (start < end) {
110 void *tmp = dst[start];
111 dst[start] = dst[end];
112 dst[end] = tmp;
113
114 start++;
115 end--;
116 }
117 }
118
119 static int count_run(void **dst, ssize_t start, ssize_t size, struct tsort_store *store)
120 {
121 ssize_t curr = start + 2;
122
123 if (size - start == 1)
124 return 1;
125
126 if (start >= size - 2) {
127 if (store->cmp(dst[size - 2], dst[size - 1]) > 0) {
128 void *tmp = dst[size - 1];
129 dst[size - 1] = dst[size - 2];
130 dst[size - 2] = tmp;
131 }
132
133 return 2;
134 }
135
136 if (store->cmp(dst[start], dst[start + 1]) <= 0) {
137 while (curr < size - 1 && store->cmp(dst[curr - 1], dst[curr]) <= 0)
138 curr++;
139
140 return curr - start;
141 } else {
142 while (curr < size - 1 && store->cmp(dst[curr - 1], dst[curr]) > 0)
143 curr++;
144
145 /* reverse in-place */
146 reverse_elements(dst, start, curr - 1);
147 return curr - start;
148 }
149 }
150
151 static int compute_minrun(size_t n)
152 {
153 int r = 0;
154 while (n >= 64) {
155 r |= n & 1;
156 n >>= 1;
157 }
158 return n + r;
159 }
160
161 static int check_invariant(struct tsort_run *stack, int stack_curr)
162 {
163 if (stack_curr < 2)
164 return 1;
165
166 else if (stack_curr == 2) {
167 const int A = stack[stack_curr - 2].length;
168 const int B = stack[stack_curr - 1].length;
169 return (A > B);
170 } else {
171 const int A = stack[stack_curr - 3].length;
172 const int B = stack[stack_curr - 2].length;
173 const int C = stack[stack_curr - 1].length;
174 return !((A <= B + C) || (B <= C));
175 }
176 }
177
178 static int resize(struct tsort_store *store, size_t new_size)
179 {
180 if (store->alloc < new_size) {
181 void **tempstore = realloc(store->storage, new_size * sizeof(void *));
182
183 /**
184 * Do not propagate on OOM; this will abort the sort and
185 * leave the array unsorted, but no error code will be
186 * raised
187 */
188 if (tempstore == NULL)
189 return -1;
190
191 store->storage = tempstore;
192 store->alloc = new_size;
193 }
194
195 return 0;
196 }
197
198 static void merge(void **dst, const struct tsort_run *stack, int stack_curr, struct tsort_store *store)
199 {
200 const ssize_t A = stack[stack_curr - 2].length;
201 const ssize_t B = stack[stack_curr - 1].length;
202 const ssize_t curr = stack[stack_curr - 2].start;
203
204 void **storage;
205 ssize_t i, j, k;
206
207 if (resize(store, MIN(A, B)) < 0)
208 return;
209
210 storage = store->storage;
211
212 /* left merge */
213 if (A < B) {
214 memcpy(storage, &dst[curr], A * sizeof(void *));
215 i = 0;
216 j = curr + A;
217
218 for (k = curr; k < curr + A + B; k++) {
219 if ((i < A) && (j < curr + A + B)) {
220 if (store->cmp(storage[i], dst[j]) <= 0)
221 dst[k] = storage[i++];
222 else
223 dst[k] = dst[j++];
224 } else if (i < A) {
225 dst[k] = storage[i++];
226 } else
227 dst[k] = dst[j++];
228 }
229 } else {
230 memcpy(storage, &dst[curr + A], B * sizeof(void *));
231 i = B - 1;
232 j = curr + A - 1;
233
234 for (k = curr + A + B - 1; k >= curr; k--) {
235 if ((i >= 0) && (j >= curr)) {
236 if (store->cmp(dst[j], storage[i]) > 0)
237 dst[k] = dst[j--];
238 else
239 dst[k] = storage[i--];
240 } else if (i >= 0)
241 dst[k] = storage[i--];
242 else
243 dst[k] = dst[j--];
244 }
245 }
246 }
247
248 static ssize_t collapse(void **dst, struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store, ssize_t size)
249 {
250 ssize_t A, B, C;
251
252 while (1) {
253 /* if the stack only has one thing on it, we are done with the collapse */
254 if (stack_curr <= 1)
255 break;
256
257 /* if this is the last merge, just do it */
258 if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
259 merge(dst, stack, stack_curr, store);
260 stack[0].length += stack[1].length;
261 stack_curr--;
262 break;
263 }
264
265 /* check if the invariant is off for a stack of 2 elements */
266 else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
267 merge(dst, stack, stack_curr, store);
268 stack[0].length += stack[1].length;
269 stack_curr--;
270 break;
271 }
272 else if (stack_curr == 2)
273 break;
274
275 A = stack[stack_curr - 3].length;
276 B = stack[stack_curr - 2].length;
277 C = stack[stack_curr - 1].length;
278
279 /* check first invariant */
280 if (A <= B + C) {
281 if (A < C) {
282 merge(dst, stack, stack_curr - 1, store);
283 stack[stack_curr - 3].length += stack[stack_curr - 2].length;
284 stack[stack_curr - 2] = stack[stack_curr - 1];
285 stack_curr--;
286 } else {
287 merge(dst, stack, stack_curr, store);
288 stack[stack_curr - 2].length += stack[stack_curr - 1].length;
289 stack_curr--;
290 }
291 } else if (B <= C) {
292 merge(dst, stack, stack_curr, store);
293 stack[stack_curr - 2].length += stack[stack_curr - 1].length;
294 stack_curr--;
295 } else
296 break;
297 }
298
299 return stack_curr;
300 }
301
302 #define PUSH_NEXT() do {\
303 len = count_run(dst, curr, size, store);\
304 run = minrun;\
305 if (run < minrun) run = minrun;\
306 if (run > (ssize_t)size - curr) run = size - curr;\
307 if (run > len) {\
308 bisort(&dst[curr], len, run, cmp);\
309 len = run;\
310 }\
311 run_stack[stack_curr].start = curr;\
312 run_stack[stack_curr++].length = len;\
313 curr += len;\
314 if (curr == (ssize_t)size) {\
315 /* finish up */ \
316 while (stack_curr > 1) { \
317 merge(dst, run_stack, stack_curr, store); \
318 run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length; \
319 stack_curr--; \
320 } \
321 if (store->storage != NULL) {\
322 free(store->storage);\
323 store->storage = NULL;\
324 }\
325 return;\
326 }\
327 }\
328 while (0)
329
330 void git__tsort(void **dst, size_t size, cmp_ptr_t cmp)
331 {
332 struct tsort_store _store, *store = &_store;
333 struct tsort_run run_stack[128];
334
335 ssize_t stack_curr = 0;
336 ssize_t len, run;
337 ssize_t curr = 0;
338 ssize_t minrun;
339
340 if (size < 64) {
341 bisort(dst, 1, size, cmp);
342 return;
343 }
344
345 /* compute the minimum run length */
346 minrun = compute_minrun(size);
347
348 /* temporary storage for merges */
349 store->alloc = 0;
350 store->storage = NULL;
351 store->cmp = cmp;
352
353 PUSH_NEXT();
354 PUSH_NEXT();
355 PUSH_NEXT();
356
357 while (1) {
358 if (!check_invariant(run_stack, stack_curr)) {
359 stack_curr = collapse(dst, run_stack, stack_curr, store, size);
360 continue;
361 }
362
363 PUSH_NEXT();
364 }
365 }