]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | |
6 | #pragma once | |
7 | ||
8 | #include <algorithm> | |
9 | #include <cstdint> | |
10 | #include <functional> | |
11 | #include "port/port.h" | |
12 | #include "util/autovector.h" | |
13 | ||
14 | namespace rocksdb { | |
15 | ||
16 | // Binary heap implementation optimized for use in multi-way merge sort. | |
17 | // Comparison to std::priority_queue: | |
18 | // - In libstdc++, std::priority_queue::pop() usually performs just over logN | |
19 | // comparisons but never fewer. | |
20 | // - std::priority_queue does not have a replace-top operation, requiring a | |
21 | // pop+push. If the replacement element is the new top, this requires | |
22 | // around 2logN comparisons. | |
23 | // - This heap's pop() uses a "schoolbook" downheap which requires up to ~2logN | |
24 | // comparisons. | |
25 | // - This heap provides a replace_top() operation which requires [1, 2logN] | |
26 | // comparisons. When the replacement element is also the new top, this | |
27 | // takes just 1 or 2 comparisons. | |
28 | // | |
29 | // The last property can yield an order-of-magnitude performance improvement | |
30 | // when merge-sorting real-world non-random data. If the merge operation is | |
31 | // likely to take chunks of elements from the same input stream, only 1 | |
32 | // comparison per element is needed. In RocksDB-land, this happens when | |
33 | // compacting a database where keys are not randomly distributed across L0 | |
34 | // files but nearby keys are likely to be in the same L0 file. | |
35 | // | |
36 | // The container uses the same counterintuitive ordering as | |
37 | // std::priority_queue: the comparison operator is expected to provide the | |
38 | // less-than relation, but top() will return the maximum. | |
39 | ||
40 | template<typename T, typename Compare = std::less<T>> | |
41 | class BinaryHeap { | |
42 | public: | |
43 | BinaryHeap() { } | |
44 | explicit BinaryHeap(Compare cmp) : cmp_(std::move(cmp)) { } | |
45 | ||
46 | void push(const T& value) { | |
47 | data_.push_back(value); | |
48 | upheap(data_.size() - 1); | |
49 | } | |
50 | ||
51 | void push(T&& value) { | |
52 | data_.push_back(std::move(value)); | |
53 | upheap(data_.size() - 1); | |
54 | } | |
55 | ||
56 | const T& top() const { | |
57 | assert(!empty()); | |
58 | return data_.front(); | |
59 | } | |
60 | ||
61 | void replace_top(const T& value) { | |
62 | assert(!empty()); | |
63 | data_.front() = value; | |
64 | downheap(get_root()); | |
65 | } | |
66 | ||
67 | void replace_top(T&& value) { | |
68 | assert(!empty()); | |
69 | data_.front() = std::move(value); | |
70 | downheap(get_root()); | |
71 | } | |
72 | ||
73 | void pop() { | |
74 | assert(!empty()); | |
75 | data_.front() = std::move(data_.back()); | |
76 | data_.pop_back(); | |
77 | if (!empty()) { | |
78 | downheap(get_root()); | |
79 | } else { | |
80 | reset_root_cmp_cache(); | |
81 | } | |
82 | } | |
83 | ||
84 | void swap(BinaryHeap &other) { | |
85 | std::swap(cmp_, other.cmp_); | |
86 | data_.swap(other.data_); | |
87 | std::swap(root_cmp_cache_, other.root_cmp_cache_); | |
88 | } | |
89 | ||
90 | void clear() { | |
91 | data_.clear(); | |
92 | reset_root_cmp_cache(); | |
93 | } | |
94 | ||
95 | bool empty() const { | |
96 | return data_.empty(); | |
97 | } | |
98 | ||
99 | void reset_root_cmp_cache() { root_cmp_cache_ = port::kMaxSizet; } | |
100 | ||
101 | private: | |
102 | static inline size_t get_root() { return 0; } | |
103 | static inline size_t get_parent(size_t index) { return (index - 1) / 2; } | |
104 | static inline size_t get_left(size_t index) { return 2 * index + 1; } | |
105 | static inline size_t get_right(size_t index) { return 2 * index + 2; } | |
106 | ||
107 | void upheap(size_t index) { | |
108 | T v = std::move(data_[index]); | |
109 | while (index > get_root()) { | |
110 | const size_t parent = get_parent(index); | |
111 | if (!cmp_(data_[parent], v)) { | |
112 | break; | |
113 | } | |
114 | data_[index] = std::move(data_[parent]); | |
115 | index = parent; | |
116 | } | |
117 | data_[index] = std::move(v); | |
118 | reset_root_cmp_cache(); | |
119 | } | |
120 | ||
121 | void downheap(size_t index) { | |
122 | T v = std::move(data_[index]); | |
123 | ||
124 | size_t picked_child = port::kMaxSizet; | |
125 | while (1) { | |
126 | const size_t left_child = get_left(index); | |
127 | if (get_left(index) >= data_.size()) { | |
128 | break; | |
129 | } | |
130 | const size_t right_child = left_child + 1; | |
131 | assert(right_child == get_right(index)); | |
132 | picked_child = left_child; | |
133 | if (index == 0 && root_cmp_cache_ < data_.size()) { | |
134 | picked_child = root_cmp_cache_; | |
135 | } else if (right_child < data_.size() && | |
136 | cmp_(data_[left_child], data_[right_child])) { | |
137 | picked_child = right_child; | |
138 | } | |
139 | if (!cmp_(v, data_[picked_child])) { | |
140 | break; | |
141 | } | |
142 | data_[index] = std::move(data_[picked_child]); | |
143 | index = picked_child; | |
144 | } | |
145 | ||
146 | if (index == 0) { | |
147 | // We did not change anything in the tree except for the value | |
148 | // of the root node, left and right child did not change, we can | |
149 | // cache that `picked_child` is the smallest child | |
150 | // so next time we compare againist it directly | |
151 | root_cmp_cache_ = picked_child; | |
152 | } else { | |
153 | // the tree changed, reset cache | |
154 | reset_root_cmp_cache(); | |
155 | } | |
156 | ||
157 | data_[index] = std::move(v); | |
158 | } | |
159 | ||
160 | Compare cmp_; | |
161 | autovector<T> data_; | |
162 | // Used to reduce number of cmp_ calls in downheap() | |
163 | size_t root_cmp_cache_ = port::kMaxSizet; | |
164 | }; | |
165 | ||
166 | } // namespace rocksdb |