]>
Commit | Line | Data |
---|---|---|
b32b8144 FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2017 Red Hat, Inc | |
7 | * | |
8 | * Author: Casey Bodley <cbodley@redhat.com> | |
9 | * | |
10 | * This is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License version 2.1, as published by the Free Software | |
13 | * Foundation. See file COPYING. | |
14 | * | |
15 | */ | |
16 | ||
17 | #ifndef BOUNDED_KEY_COUNTER_H | |
18 | #define BOUNDED_KEY_COUNTER_H | |
19 | ||
20 | #include <algorithm> | |
21 | #include <map> | |
22 | #include <tuple> | |
23 | #include <vector> | |
24 | ||
25 | #include "include/assert.h" | |
26 | ||
27 | /** | |
28 | * BoundedKeyCounter | |
29 | * | |
30 | * A data structure that counts the number of times a given key is inserted, | |
31 | * and can return the keys with the highest counters. The number of unique keys | |
32 | * is bounded by the given constructor argument, meaning that new keys will be | |
33 | * rejected if they would exceed this bound. | |
34 | * | |
35 | * It is optimized for use where insertion is frequent, but sorted listings are | |
36 | * both infrequent and tend to request a small subset of the available keys. | |
37 | */ | |
38 | template <typename Key, typename Count> | |
39 | class BoundedKeyCounter { | |
40 | /// map type to associate keys with their counter values | |
41 | using map_type = std::map<Key, Count>; | |
42 | using value_type = typename map_type::value_type; | |
43 | ||
44 | /// view type used for sorting key-value pairs by their counter value | |
45 | using view_type = std::vector<const value_type*>; | |
46 | ||
47 | /// maximum number of counters to store at once | |
48 | const size_t bound; | |
49 | ||
50 | /// map of counters, with a maximum size given by 'bound' | |
51 | map_type counters; | |
52 | ||
53 | /// storage for sorted key-value pairs | |
54 | view_type sorted; | |
55 | ||
56 | /// remembers how much of the range is actually sorted | |
57 | typename view_type::iterator sorted_position; | |
58 | ||
59 | /// invalidate view of sorted entries | |
60 | void invalidate_sorted() | |
61 | { | |
62 | sorted_position = sorted.begin(); | |
63 | sorted.clear(); | |
64 | } | |
65 | ||
66 | /// value_type comparison function for sorting in descending order | |
67 | static bool value_greater(const value_type *lhs, const value_type *rhs) | |
68 | { | |
69 | return lhs->second > rhs->second; | |
70 | } | |
71 | ||
72 | /// map iterator that adapts value_type to value_type* | |
73 | struct const_pointer_iterator : public map_type::const_iterator { | |
74 | const_pointer_iterator(typename map_type::const_iterator i) | |
75 | : map_type::const_iterator(i) {} | |
28e407b8 AA |
76 | |
77 | using value_type = typename map_type::const_iterator::value_type*; | |
78 | using reference = const typename map_type::const_iterator::value_type*; | |
79 | ||
80 | reference operator*() const { | |
b32b8144 FG |
81 | return &map_type::const_iterator::operator*(); |
82 | } | |
83 | }; | |
84 | ||
85 | protected: | |
86 | /// return the number of sorted entries. marked protected for unit testing | |
87 | size_t get_num_sorted() const | |
88 | { | |
89 | using const_iterator = typename view_type::const_iterator; | |
90 | return std::distance<const_iterator>(sorted.begin(), sorted_position); | |
91 | } | |
92 | ||
93 | public: | |
94 | BoundedKeyCounter(size_t bound) | |
95 | : bound(bound) | |
96 | { | |
97 | sorted.reserve(bound); | |
98 | sorted_position = sorted.begin(); | |
99 | } | |
100 | ||
101 | /// return the number of keys stored | |
102 | size_t size() const noexcept { return counters.size(); } | |
103 | ||
104 | /// return the maximum number of keys | |
105 | size_t capacity() const noexcept { return bound; } | |
106 | ||
107 | /// increment a counter for the given key and return its value. if the key was | |
108 | /// not present, insert it. if the map is full, return 0 | |
109 | Count insert(const Key& key, Count n = 1) | |
110 | { | |
111 | typename map_type::iterator i; | |
112 | ||
113 | if (counters.size() < bound) { | |
114 | // insert new entries at count=0 | |
115 | bool inserted; | |
116 | std::tie(i, inserted) = counters.emplace(key, 0); | |
117 | if (inserted) { | |
118 | sorted.push_back(&*i); | |
119 | } | |
120 | } else { | |
121 | // when full, refuse to insert new entries | |
122 | i = counters.find(key); | |
123 | if (i == counters.end()) { | |
124 | return 0; | |
125 | } | |
126 | } | |
127 | ||
128 | i->second += n; // add to the counter | |
129 | ||
130 | // update sorted position if necessary. use a binary search for the last | |
131 | // element in the sorted range that's greater than this counter | |
132 | sorted_position = std::lower_bound(sorted.begin(), sorted_position, | |
133 | &*i, &value_greater); | |
134 | ||
135 | return i->second; | |
136 | } | |
137 | ||
138 | /// remove the given key from the map of counters | |
139 | void erase(const Key& key) | |
140 | { | |
141 | auto i = counters.find(key); | |
142 | if (i == counters.end()) { | |
143 | return; | |
144 | } | |
145 | // removing the sorted entry would require linear search; invalidate instead | |
146 | invalidate_sorted(); | |
147 | ||
148 | counters.erase(i); | |
149 | } | |
150 | ||
151 | /// query the highest N key-value pairs sorted by counter value, passing each | |
152 | /// in order to the given callback with arguments (Key, Count) | |
153 | template <typename Callback> | |
154 | void get_highest(size_t count, Callback&& cb) | |
155 | { | |
156 | if (sorted.empty()) { | |
157 | // initialize the vector with pointers to all key-value pairs | |
158 | sorted.assign(const_pointer_iterator{counters.cbegin()}, | |
159 | const_pointer_iterator{counters.cend()}); | |
160 | // entire range is unsorted | |
161 | assert(sorted_position == sorted.begin()); | |
162 | } | |
163 | ||
164 | const size_t sorted_count = get_num_sorted(); | |
165 | if (sorted_count < count) { | |
166 | // move sorted_position to cover the requested number of entries | |
167 | sorted_position = sorted.begin() + std::min(count, sorted.size()); | |
168 | ||
169 | // sort all entries in descending order up to the given position | |
170 | std::partial_sort(sorted.begin(), sorted_position, sorted.end(), | |
171 | &value_greater); | |
172 | } | |
173 | ||
174 | // return the requested range via callback | |
175 | for (const auto& pair : sorted) { | |
176 | if (count-- == 0) { | |
177 | return; | |
178 | } | |
179 | cb(pair->first, pair->second); | |
180 | } | |
181 | } | |
182 | ||
183 | /// remove all keys and counters and invalidate the sorted range | |
184 | void clear() | |
185 | { | |
186 | invalidate_sorted(); | |
187 | counters.clear(); | |
188 | } | |
189 | }; | |
190 | ||
191 | #endif // BOUNDED_KEY_COUNTER_H |