1 /* Drop in replacement for heapq.py
3 C implementation derived directly from heapq.py in Py2.3
4 which was written by Kevin O'Connor, augmented by Tim Peters,
5 annotated by François Pinard, and converted to C by Raymond Hettinger.
11 /* Older implementations of heapq used Py_LE for comparisons. Now, it uses
12 Py_LT so it will match min(), sorted(), and bisect(). Unfortunately, some
13 client code (Twisted for example) relied on Py_LE, so this little function
14 restores compatibility by trying both.
17 cmp_lt(PyObject
*x
, PyObject
*y
)
20 static PyObject
*lt
= NULL
;
23 lt
= PyString_FromString("__lt__");
27 if (PyObject_HasAttr(x
, lt
))
28 return PyObject_RichCompareBool(x
, y
, Py_LT
);
29 cmp
= PyObject_RichCompareBool(y
, x
, Py_LE
);
36 _siftdown(PyListObject
*heap
, Py_ssize_t startpos
, Py_ssize_t pos
)
38 PyObject
*newitem
, *parent
;
39 Py_ssize_t parentpos
, size
;
42 assert(PyList_Check(heap
));
43 size
= PyList_GET_SIZE(heap
);
45 PyErr_SetString(PyExc_IndexError
, "index out of range");
49 /* Follow the path to the root, moving parents down until finding
50 a place newitem fits. */
51 newitem
= PyList_GET_ITEM(heap
, pos
);
52 while (pos
> startpos
) {
53 parentpos
= (pos
- 1) >> 1;
54 parent
= PyList_GET_ITEM(heap
, parentpos
);
55 cmp
= cmp_lt(newitem
, parent
);
58 if (size
!= PyList_GET_SIZE(heap
)) {
59 PyErr_SetString(PyExc_RuntimeError
,
60 "list changed size during iteration");
65 parent
= PyList_GET_ITEM(heap
, parentpos
);
66 newitem
= PyList_GET_ITEM(heap
, pos
);
67 PyList_SET_ITEM(heap
, parentpos
, newitem
);
68 PyList_SET_ITEM(heap
, pos
, parent
);
75 _siftup(PyListObject
*heap
, Py_ssize_t pos
)
77 Py_ssize_t startpos
, endpos
, childpos
, rightpos
, limit
;
78 PyObject
*tmp1
, *tmp2
;
81 assert(PyList_Check(heap
));
82 endpos
= PyList_GET_SIZE(heap
);
85 PyErr_SetString(PyExc_IndexError
, "index out of range");
89 /* Bubble up the smaller child until hitting a leaf. */
90 limit
= endpos
/ 2; /* smallest pos that has no child */
92 /* Set childpos to index of smaller child. */
93 childpos
= 2*pos
+ 1; /* leftmost child position */
94 rightpos
= childpos
+ 1;
95 if (rightpos
< endpos
) {
97 PyList_GET_ITEM(heap
, childpos
),
98 PyList_GET_ITEM(heap
, rightpos
));
103 if (endpos
!= PyList_GET_SIZE(heap
)) {
104 PyErr_SetString(PyExc_RuntimeError
,
105 "list changed size during iteration");
109 /* Move the smaller child up. */
110 tmp1
= PyList_GET_ITEM(heap
, childpos
);
111 tmp2
= PyList_GET_ITEM(heap
, pos
);
112 PyList_SET_ITEM(heap
, childpos
, tmp2
);
113 PyList_SET_ITEM(heap
, pos
, tmp1
);
116 /* Bubble it up to its final resting place (by sifting its parents down). */
117 return _siftdown(heap
, startpos
, pos
);
121 heappush(PyObject
*self
, PyObject
*args
)
123 PyObject
*heap
, *item
;
125 if (!PyArg_UnpackTuple(args
, "heappush", 2, 2, &heap
, &item
))
128 if (!PyList_Check(heap
)) {
129 PyErr_SetString(PyExc_TypeError
, "heap argument must be a list");
133 if (PyList_Append(heap
, item
) == -1)
136 if (_siftdown((PyListObject
*)heap
, 0, PyList_GET_SIZE(heap
)-1) == -1)
142 PyDoc_STRVAR(heappush_doc
,
143 "heappush(heap, item) -> None. Push item onto heap, maintaining the heap invariant.");
146 heappop(PyObject
*self
, PyObject
*heap
)
148 PyObject
*lastelt
, *returnitem
;
151 if (!PyList_Check(heap
)) {
152 PyErr_SetString(PyExc_TypeError
, "heap argument must be a list");
156 /* # raises appropriate IndexError if heap is empty */
157 n
= PyList_GET_SIZE(heap
);
159 PyErr_SetString(PyExc_IndexError
, "index out of range");
163 lastelt
= PyList_GET_ITEM(heap
, n
-1) ;
165 PyList_SetSlice(heap
, n
-1, n
, NULL
);
170 returnitem
= PyList_GET_ITEM(heap
, 0);
171 PyList_SET_ITEM(heap
, 0, lastelt
);
172 if (_siftup((PyListObject
*)heap
, 0) == -1) {
173 Py_DECREF(returnitem
);
179 PyDoc_STRVAR(heappop_doc
,
180 "Pop the smallest item off the heap, maintaining the heap invariant.");
183 heapreplace(PyObject
*self
, PyObject
*args
)
185 PyObject
*heap
, *item
, *returnitem
;
187 if (!PyArg_UnpackTuple(args
, "heapreplace", 2, 2, &heap
, &item
))
190 if (!PyList_Check(heap
)) {
191 PyErr_SetString(PyExc_TypeError
, "heap argument must be a list");
195 if (PyList_GET_SIZE(heap
) < 1) {
196 PyErr_SetString(PyExc_IndexError
, "index out of range");
200 returnitem
= PyList_GET_ITEM(heap
, 0);
202 PyList_SET_ITEM(heap
, 0, item
);
203 if (_siftup((PyListObject
*)heap
, 0) == -1) {
204 Py_DECREF(returnitem
);
210 PyDoc_STRVAR(heapreplace_doc
,
211 "heapreplace(heap, item) -> value. Pop and return the current smallest value, and add the new item.\n\
213 This is more efficient than heappop() followed by heappush(), and can be\n\
214 more appropriate when using a fixed-size heap. Note that the value\n\
215 returned may be larger than item! That constrains reasonable uses of\n\
216 this routine unless written as part of a conditional replacement:\n\n\
217 if item > heap[0]:\n\
218 item = heapreplace(heap, item)\n");
221 heappushpop(PyObject
*self
, PyObject
*args
)
223 PyObject
*heap
, *item
, *returnitem
;
226 if (!PyArg_UnpackTuple(args
, "heappushpop", 2, 2, &heap
, &item
))
229 if (!PyList_Check(heap
)) {
230 PyErr_SetString(PyExc_TypeError
, "heap argument must be a list");
234 if (PyList_GET_SIZE(heap
) < 1) {
239 cmp
= cmp_lt(PyList_GET_ITEM(heap
, 0), item
);
247 returnitem
= PyList_GET_ITEM(heap
, 0);
249 PyList_SET_ITEM(heap
, 0, item
);
250 if (_siftup((PyListObject
*)heap
, 0) == -1) {
251 Py_DECREF(returnitem
);
257 PyDoc_STRVAR(heappushpop_doc
,
258 "heappushpop(heap, item) -> value. Push item on the heap, then pop and return the smallest item\n\
259 from the heap. The combined action runs more efficiently than\n\
260 heappush() followed by a separate call to heappop().");
263 heapify(PyObject
*self
, PyObject
*heap
)
267 if (!PyList_Check(heap
)) {
268 PyErr_SetString(PyExc_TypeError
, "heap argument must be a list");
272 n
= PyList_GET_SIZE(heap
);
273 /* Transform bottom-up. The largest index there's any point to
274 looking at is the largest with a child index in-range, so must
275 have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
276 (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
277 n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
278 and that's again n//2-1.
280 for (i
=n
/2-1 ; i
>=0 ; i
--)
281 if(_siftup((PyListObject
*)heap
, i
) == -1)
287 PyDoc_STRVAR(heapify_doc
,
288 "Transform list into a heap, in-place, in O(len(heap)) time.");
291 nlargest(PyObject
*self
, PyObject
*args
)
293 PyObject
*heap
=NULL
, *elem
, *iterable
, *sol
, *it
, *oldelem
;
297 if (!PyArg_ParseTuple(args
, "nO:nlargest", &n
, &iterable
))
300 it
= PyObject_GetIter(iterable
);
304 heap
= PyList_New(0);
308 for (i
=0 ; i
<n
; i
++ ){
309 elem
= PyIter_Next(it
);
311 if (PyErr_Occurred())
316 if (PyList_Append(heap
, elem
) == -1) {
322 if (PyList_GET_SIZE(heap
) == 0)
325 for (i
=n
/2-1 ; i
>=0 ; i
--)
326 if(_siftup((PyListObject
*)heap
, i
) == -1)
329 sol
= PyList_GET_ITEM(heap
, 0);
331 elem
= PyIter_Next(it
);
333 if (PyErr_Occurred())
338 cmp
= cmp_lt(sol
, elem
);
347 oldelem
= PyList_GET_ITEM(heap
, 0);
348 PyList_SET_ITEM(heap
, 0, elem
);
350 if (_siftup((PyListObject
*)heap
, 0) == -1)
352 sol
= PyList_GET_ITEM(heap
, 0);
355 if (PyList_Sort(heap
) == -1)
357 if (PyList_Reverse(heap
) == -1)
368 PyDoc_STRVAR(nlargest_doc
,
369 "Find the n largest elements in a dataset.\n\
371 Equivalent to: sorted(iterable, reverse=True)[:n]\n");
374 _siftdownmax(PyListObject
*heap
, Py_ssize_t startpos
, Py_ssize_t pos
)
376 PyObject
*newitem
, *parent
;
378 Py_ssize_t parentpos
;
380 assert(PyList_Check(heap
));
381 if (pos
>= PyList_GET_SIZE(heap
)) {
382 PyErr_SetString(PyExc_IndexError
, "index out of range");
386 newitem
= PyList_GET_ITEM(heap
, pos
);
388 /* Follow the path to the root, moving parents down until finding
389 a place newitem fits. */
390 while (pos
> startpos
){
391 parentpos
= (pos
- 1) >> 1;
392 parent
= PyList_GET_ITEM(heap
, parentpos
);
393 cmp
= cmp_lt(parent
, newitem
);
401 Py_DECREF(PyList_GET_ITEM(heap
, pos
));
402 PyList_SET_ITEM(heap
, pos
, parent
);
405 Py_DECREF(PyList_GET_ITEM(heap
, pos
));
406 PyList_SET_ITEM(heap
, pos
, newitem
);
411 _siftupmax(PyListObject
*heap
, Py_ssize_t pos
)
413 Py_ssize_t startpos
, endpos
, childpos
, rightpos
, limit
;
415 PyObject
*newitem
, *tmp
;
417 assert(PyList_Check(heap
));
418 endpos
= PyList_GET_SIZE(heap
);
421 PyErr_SetString(PyExc_IndexError
, "index out of range");
424 newitem
= PyList_GET_ITEM(heap
, pos
);
427 /* Bubble up the smaller child until hitting a leaf. */
428 limit
= endpos
/ 2; /* smallest pos that has no child */
429 while (pos
< limit
) {
430 /* Set childpos to index of smaller child. */
431 childpos
= 2*pos
+ 1; /* leftmost child position */
432 rightpos
= childpos
+ 1;
433 if (rightpos
< endpos
) {
435 PyList_GET_ITEM(heap
, rightpos
),
436 PyList_GET_ITEM(heap
, childpos
));
444 /* Move the smaller child up. */
445 tmp
= PyList_GET_ITEM(heap
, childpos
);
447 Py_DECREF(PyList_GET_ITEM(heap
, pos
));
448 PyList_SET_ITEM(heap
, pos
, tmp
);
452 /* The leaf at pos is empty now. Put newitem there, and bubble
453 it up to its final resting place (by sifting its parents down). */
454 Py_DECREF(PyList_GET_ITEM(heap
, pos
));
455 PyList_SET_ITEM(heap
, pos
, newitem
);
456 return _siftdownmax(heap
, startpos
, pos
);
460 nsmallest(PyObject
*self
, PyObject
*args
)
462 PyObject
*heap
=NULL
, *elem
, *iterable
, *los
, *it
, *oldelem
;
466 if (!PyArg_ParseTuple(args
, "nO:nsmallest", &n
, &iterable
))
469 it
= PyObject_GetIter(iterable
);
473 heap
= PyList_New(0);
477 for (i
=0 ; i
<n
; i
++ ){
478 elem
= PyIter_Next(it
);
480 if (PyErr_Occurred())
485 if (PyList_Append(heap
, elem
) == -1) {
491 n
= PyList_GET_SIZE(heap
);
495 for (i
=n
/2-1 ; i
>=0 ; i
--)
496 if(_siftupmax((PyListObject
*)heap
, i
) == -1)
499 los
= PyList_GET_ITEM(heap
, 0);
501 elem
= PyIter_Next(it
);
503 if (PyErr_Occurred())
508 cmp
= cmp_lt(elem
, los
);
518 oldelem
= PyList_GET_ITEM(heap
, 0);
519 PyList_SET_ITEM(heap
, 0, elem
);
521 if (_siftupmax((PyListObject
*)heap
, 0) == -1)
523 los
= PyList_GET_ITEM(heap
, 0);
527 if (PyList_Sort(heap
) == -1)
538 PyDoc_STRVAR(nsmallest_doc
,
539 "Find the n smallest elements in a dataset.\n\
541 Equivalent to: sorted(iterable)[:n]\n");
543 static PyMethodDef heapq_methods
[] = {
544 {"heappush", (PyCFunction
)heappush
,
545 METH_VARARGS
, heappush_doc
},
546 {"heappushpop", (PyCFunction
)heappushpop
,
547 METH_VARARGS
, heappushpop_doc
},
548 {"heappop", (PyCFunction
)heappop
,
549 METH_O
, heappop_doc
},
550 {"heapreplace", (PyCFunction
)heapreplace
,
551 METH_VARARGS
, heapreplace_doc
},
552 {"heapify", (PyCFunction
)heapify
,
553 METH_O
, heapify_doc
},
554 {"nlargest", (PyCFunction
)nlargest
,
555 METH_VARARGS
, nlargest_doc
},
556 {"nsmallest", (PyCFunction
)nsmallest
,
557 METH_VARARGS
, nsmallest_doc
},
558 {NULL
, NULL
} /* sentinel */
561 PyDoc_STRVAR(module_doc
,
562 "Heap queue algorithm (a.k.a. priority queue).\n\
564 Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
565 all k, counting elements from 0. For the sake of comparison,\n\
566 non-existing elements are considered to be infinite. The interesting\n\
567 property of a heap is that a[0] is always its smallest element.\n\
571 heap = [] # creates an empty heap\n\
572 heappush(heap, item) # pushes a new item on the heap\n\
573 item = heappop(heap) # pops the smallest item from the heap\n\
574 item = heap[0] # smallest item on the heap without popping it\n\
575 heapify(x) # transforms list into a heap, in-place, in linear time\n\
576 item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
577 # new item; the heap size is unchanged\n\
579 Our API differs from textbook heap algorithms as follows:\n\
581 - We use 0-based indexing. This makes the relationship between the\n\
582 index for a node and the indexes for its children slightly less\n\
583 obvious, but is more suitable since Python uses 0-based indexing.\n\
585 - Our heappop() method returns the smallest item, not the largest.\n\
587 These two make it possible to view the heap as a regular Python list\n\
588 without surprises: heap[0] is the smallest item, and heap.sort()\n\
589 maintains the heap invariant!\n");
592 PyDoc_STRVAR(__about__
,
595 [explanation by François Pinard]\n\
597 Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
598 all k, counting elements from 0. For the sake of comparison,\n\
599 non-existing elements are considered to be infinite. The interesting\n\
600 property of a heap is that a[0] is always its smallest element.\n"
602 The strange invariant above is meant to be an efficient memory\n\
603 representation for a tournament. The numbers below are `k', not a[k]:\n\
611 7 8 9 10 11 12 13 14\n\
613 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
616 In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
617 an usual binary tournament we see in sports, each cell is the winner\n\
618 over the two cells it tops, and we can trace the winner down the tree\n\
619 to see all opponents s/he had. However, in many computer applications\n\
620 of such tournaments, we do not need to trace the history of a winner.\n\
621 To be more memory efficient, when a winner is promoted, we try to\n\
622 replace it by something else at a lower level, and the rule becomes\n\
623 that a cell and the two cells it tops contain three different items,\n\
624 but the top cell \"wins\" over the two topped cells.\n"
626 If this heap invariant is protected at all time, index 0 is clearly\n\
627 the overall winner. The simplest algorithmic way to remove it and\n\
628 find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
629 diagram above) into the 0 position, and then percolate this new 0 down\n\
630 the tree, exchanging values, until the invariant is re-established.\n\
631 This is clearly logarithmic on the total number of items in the tree.\n\
632 By iterating over all items, you get an O(n ln n) sort.\n"
634 A nice feature of this sort is that you can efficiently insert new\n\
635 items while the sort is going on, provided that the inserted items are\n\
636 not \"better\" than the last 0'th element you extracted. This is\n\
637 especially useful in simulation contexts, where the tree holds all\n\
638 incoming events, and the \"win\" condition means the smallest scheduled\n\
639 time. When an event schedule other events for execution, they are\n\
640 scheduled into the future, so they can easily go into the heap. So, a\n\
641 heap is a good structure for implementing schedulers (this is what I\n\
642 used for my MIDI sequencer :-).\n"
644 Various structures for implementing schedulers have been extensively\n\
645 studied, and heaps are good for this, as they are reasonably speedy,\n\
646 the speed is almost constant, and the worst case is not much different\n\
647 than the average case. However, there are other representations which\n\
648 are more efficient overall, yet the worst cases might be terrible.\n"
650 Heaps are also very useful in big disk sorts. You most probably all\n\
651 know that a big sort implies producing \"runs\" (which are pre-sorted\n\
652 sequences, which size is usually related to the amount of CPU memory),\n\
653 followed by a merging passes for these runs, which merging is often\n\
654 very cleverly organised[1]. It is very important that the initial\n\
655 sort produces the longest runs possible. Tournaments are a good way\n\
656 to that. If, using all the memory available to hold a tournament, you\n\
657 replace and percolate items that happen to fit the current run, you'll\n\
658 produce runs which are twice the size of the memory for random input,\n\
659 and much better for input fuzzily ordered.\n"
661 Moreover, if you output the 0'th item on disk and get an input which\n\
662 may not fit in the current tournament (because the value \"wins\" over\n\
663 the last output value), it cannot fit in the heap, so the size of the\n\
664 heap decreases. The freed memory could be cleverly reused immediately\n\
665 for progressively building a second heap, which grows at exactly the\n\
666 same rate the first heap is melting. When the first heap completely\n\
667 vanishes, you switch heaps and start a new run. Clever and quite\n\
670 In a word, heaps are useful memory structures to know. I use them in\n\
671 a few applications, and I think it is good to keep a `heap' module\n\
674 --------------------\n\
675 [1] The disk balancing algorithms which are current, nowadays, are\n\
676 more annoying than clever, and this is a consequence of the seeking\n\
677 capabilities of the disks. On devices which cannot seek, like big\n\
678 tape drives, the story was quite different, and one had to be very\n\
679 clever to ensure (far in advance) that each tape movement will be the\n\
680 most effective possible (that is, will best participate at\n\
681 \"progressing\" the merge). Some tapes were even able to read\n\
682 backwards, and this was also used to avoid the rewinding time.\n\
683 Believe me, real good tape sorts were quite spectacular to watch!\n\
684 From all times, sorting has always been a Great Art! :-)\n");
691 m
= Py_InitModule3("_heapq", heapq_methods
, module_doc
);
694 PyModule_AddObject(m
, "__about__", PyString_FromString(__about__
));