]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/bounded_key_counter.h
update sources to v12.2.3
[ceph.git] / ceph / src / common / bounded_key_counter.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2017 Red Hat, Inc
7 *
8 * Author: Casey Bodley <cbodley@redhat.com>
9 *
10 * This is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License version 2.1, as published by the Free Software
13 * Foundation. See file COPYING.
14 *
15 */
16
17 #ifndef BOUNDED_KEY_COUNTER_H
18 #define BOUNDED_KEY_COUNTER_H
19
20 #include <algorithm>
21 #include <map>
22 #include <tuple>
23 #include <vector>
24
25 #include "include/assert.h"
26
27 /**
28 * BoundedKeyCounter
29 *
30 * A data structure that counts the number of times a given key is inserted,
31 * and can return the keys with the highest counters. The number of unique keys
32 * is bounded by the given constructor argument, meaning that new keys will be
33 * rejected if they would exceed this bound.
34 *
35 * It is optimized for use where insertion is frequent, but sorted listings are
36 * both infrequent and tend to request a small subset of the available keys.
37 */
38 template <typename Key, typename Count>
39 class BoundedKeyCounter {
40 /// map type to associate keys with their counter values
41 using map_type = std::map<Key, Count>;
42 using value_type = typename map_type::value_type;
43
44 /// view type used for sorting key-value pairs by their counter value
45 using view_type = std::vector<const value_type*>;
46
47 /// maximum number of counters to store at once
48 const size_t bound;
49
50 /// map of counters, with a maximum size given by 'bound'
51 map_type counters;
52
53 /// storage for sorted key-value pairs
54 view_type sorted;
55
56 /// remembers how much of the range is actually sorted
57 typename view_type::iterator sorted_position;
58
59 /// invalidate view of sorted entries
60 void invalidate_sorted()
61 {
62 sorted_position = sorted.begin();
63 sorted.clear();
64 }
65
66 /// value_type comparison function for sorting in descending order
67 static bool value_greater(const value_type *lhs, const value_type *rhs)
68 {
69 return lhs->second > rhs->second;
70 }
71
72 /// map iterator that adapts value_type to value_type*
73 struct const_pointer_iterator : public map_type::const_iterator {
74 const_pointer_iterator(typename map_type::const_iterator i)
75 : map_type::const_iterator(i) {}
76 const value_type* operator*() const {
77 return &map_type::const_iterator::operator*();
78 }
79 };
80
81 protected:
82 /// return the number of sorted entries. marked protected for unit testing
83 size_t get_num_sorted() const
84 {
85 using const_iterator = typename view_type::const_iterator;
86 return std::distance<const_iterator>(sorted.begin(), sorted_position);
87 }
88
89 public:
90 BoundedKeyCounter(size_t bound)
91 : bound(bound)
92 {
93 sorted.reserve(bound);
94 sorted_position = sorted.begin();
95 }
96
97 /// return the number of keys stored
98 size_t size() const noexcept { return counters.size(); }
99
100 /// return the maximum number of keys
101 size_t capacity() const noexcept { return bound; }
102
103 /// increment a counter for the given key and return its value. if the key was
104 /// not present, insert it. if the map is full, return 0
105 Count insert(const Key& key, Count n = 1)
106 {
107 typename map_type::iterator i;
108
109 if (counters.size() < bound) {
110 // insert new entries at count=0
111 bool inserted;
112 std::tie(i, inserted) = counters.emplace(key, 0);
113 if (inserted) {
114 sorted.push_back(&*i);
115 }
116 } else {
117 // when full, refuse to insert new entries
118 i = counters.find(key);
119 if (i == counters.end()) {
120 return 0;
121 }
122 }
123
124 i->second += n; // add to the counter
125
126 // update sorted position if necessary. use a binary search for the last
127 // element in the sorted range that's greater than this counter
128 sorted_position = std::lower_bound(sorted.begin(), sorted_position,
129 &*i, &value_greater);
130
131 return i->second;
132 }
133
134 /// remove the given key from the map of counters
135 void erase(const Key& key)
136 {
137 auto i = counters.find(key);
138 if (i == counters.end()) {
139 return;
140 }
141 // removing the sorted entry would require linear search; invalidate instead
142 invalidate_sorted();
143
144 counters.erase(i);
145 }
146
147 /// query the highest N key-value pairs sorted by counter value, passing each
148 /// in order to the given callback with arguments (Key, Count)
149 template <typename Callback>
150 void get_highest(size_t count, Callback&& cb)
151 {
152 if (sorted.empty()) {
153 // initialize the vector with pointers to all key-value pairs
154 sorted.assign(const_pointer_iterator{counters.cbegin()},
155 const_pointer_iterator{counters.cend()});
156 // entire range is unsorted
157 assert(sorted_position == sorted.begin());
158 }
159
160 const size_t sorted_count = get_num_sorted();
161 if (sorted_count < count) {
162 // move sorted_position to cover the requested number of entries
163 sorted_position = sorted.begin() + std::min(count, sorted.size());
164
165 // sort all entries in descending order up to the given position
166 std::partial_sort(sorted.begin(), sorted_position, sorted.end(),
167 &value_greater);
168 }
169
170 // return the requested range via callback
171 for (const auto& pair : sorted) {
172 if (count-- == 0) {
173 return;
174 }
175 cb(pair->first, pair->second);
176 }
177 }
178
179 /// remove all keys and counters and invalidate the sorted range
180 void clear()
181 {
182 invalidate_sorted();
183 counters.clear();
184 }
185 };
186
187 #endif // BOUNDED_KEY_COUNTER_H