]> git.proxmox.com Git - ceph.git/blob - ceph/src/include/interval_set.h
f1a21e5f96e1ae290e077386b16692bdf80e038a
[ceph.git] / ceph / src / include / interval_set.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #ifndef CEPH_INTERVAL_SET_H
17 #define CEPH_INTERVAL_SET_H
18
19 #include <iterator>
20 #include <map>
21 #include <ostream>
22
23 #include "encoding.h"
24
25 /*
26 * *** NOTE ***
27 *
28 * This class is written to work with a variety of map-like containers,
29 * *include* ones that invalidate iterators when they are modified (e.g.,
30 * flat_map and btree_map).
31 */
32
33 template<typename T, template<typename, typename, typename ...> class C = std::map>
34 class interval_set {
35 public:
36 using Map = C<T, T>;
37 using value_type = typename Map::value_type;
38 using offset_type = T;
39 using length_type = T;
40 using reference = value_type&;
41 using const_reference = const value_type&;
42 using size_type = typename Map::size_type;
43
44 class const_iterator;
45
46 class iterator
47 {
48 public:
49 using difference_type = ssize_t;
50 using value_type = typename Map::value_type;
51 using pointer = typename Map::value_type*;
52 using reference = typename Map::value_type&;
53 using iterator_category = std::forward_iterator_tag;
54
55 explicit iterator(typename Map::iterator iter)
56 : _iter(iter)
57 { }
58
59 // For the copy constructor and assignment operator, the compiler-generated functions, which
60 // perform simple bitwise copying, should be fine.
61
62 bool operator==(const iterator& rhs) const {
63 return (_iter == rhs._iter);
64 }
65
66 bool operator!=(const iterator& rhs) const {
67 return (_iter != rhs._iter);
68 }
69
70 // Dereference this iterator to get a pair.
71 reference operator*() const {
72 return *_iter;
73 }
74
75 // Return the interval start.
76 offset_type get_start() const {
77 return _iter->first;
78 }
79
80 // Return the interval length.
81 length_type get_len() const {
82 return _iter->second;
83 }
84
85 offset_type get_end() const {
86 return _iter->first + _iter->second;
87 }
88
89 // Set the interval length.
90 void set_len(const length_type& len) {
91 _iter->second = len;
92 }
93
94 // Preincrement
95 iterator& operator++()
96 {
97 ++_iter;
98 return *this;
99 }
100
101 // Postincrement
102 iterator operator++(int)
103 {
104 iterator prev(_iter);
105 ++_iter;
106 return prev;
107 }
108
109 friend class interval_set::const_iterator;
110
111 protected:
112 typename Map::iterator _iter;
113 friend class interval_set;
114 };
115
116 class const_iterator
117 {
118 public:
119 using difference_type = ssize_t;
120 using value_type = const typename Map::value_type;
121 using pointer = const typename Map::value_type*;
122 using reference = const typename Map::value_type&;
123 using iterator_category = std::forward_iterator_tag;
124
125 explicit const_iterator(typename Map::const_iterator iter)
126 : _iter(iter)
127 { }
128
129 const_iterator(const iterator &i)
130 : _iter(i._iter)
131 { }
132
133 // For the copy constructor and assignment operator, the compiler-generated functions, which
134 // perform simple bitwise copying, should be fine.
135
136 bool operator==(const const_iterator& rhs) const {
137 return (_iter == rhs._iter);
138 }
139
140 bool operator!=(const const_iterator& rhs) const {
141 return (_iter != rhs._iter);
142 }
143
144 // Dereference this iterator to get a pair.
145 reference operator*() const {
146 return *_iter;
147 }
148
149 // Return the interval start.
150 offset_type get_start() const {
151 return _iter->first;
152 }
153 offset_type get_end() const {
154 return _iter->first + _iter->second;
155 }
156
157 // Return the interval length.
158 length_type get_len() const {
159 return _iter->second;
160 }
161
162 // Preincrement
163 const_iterator& operator++()
164 {
165 ++_iter;
166 return *this;
167 }
168
169 // Postincrement
170 const_iterator operator++(int)
171 {
172 const_iterator prev(_iter);
173 ++_iter;
174 return prev;
175 }
176
177 protected:
178 typename Map::const_iterator _iter;
179 };
180
181 interval_set() = default;
182 interval_set(Map&& other) {
183 m.swap(other);
184 for (const auto& p : m) {
185 _size += p.second;
186 }
187 }
188
189 size_type num_intervals() const
190 {
191 return m.size();
192 }
193
194 iterator begin() {
195 return iterator(m.begin());
196 }
197
198 iterator lower_bound(T start) {
199 return iterator(find_inc_m(start));
200 }
201
202 iterator end() {
203 return iterator(m.end());
204 }
205
206 const_iterator begin() const {
207 return const_iterator(m.begin());
208 }
209
210 const_iterator lower_bound(T start) const {
211 return const_iterator(find_inc(start));
212 }
213
214 const_iterator end() const {
215 return const_iterator(m.end());
216 }
217
218 // helpers
219 private:
220 auto find_inc(T start) const {
221 auto p = m.lower_bound(start); // p->first >= start
222 if (p != m.begin() &&
223 (p == m.end() || p->first > start)) {
224 --p; // might overlap?
225 if (p->first + p->second <= start)
226 ++p; // it doesn't.
227 }
228 return p;
229 }
230
231 auto find_inc_m(T start) {
232 auto p = m.lower_bound(start);
233 if (p != m.begin() &&
234 (p == m.end() || p->first > start)) {
235 --p; // might overlap?
236 if (p->first + p->second <= start)
237 ++p; // it doesn't.
238 }
239 return p;
240 }
241
242 auto find_adj(T start) const {
243 auto p = m.lower_bound(start);
244 if (p != m.begin() &&
245 (p == m.end() || p->first > start)) {
246 --p; // might touch?
247 if (p->first + p->second < start)
248 ++p; // it doesn't.
249 }
250 return p;
251 }
252
253 auto find_adj_m(T start) {
254 auto p = m.lower_bound(start);
255 if (p != m.begin() &&
256 (p == m.end() || p->first > start)) {
257 --p; // might touch?
258 if (p->first + p->second < start)
259 ++p; // it doesn't.
260 }
261 return p;
262 }
263
264 void intersection_size_asym(const interval_set &s, const interval_set &l) {
265 auto ps = s.m.begin();
266 ceph_assert(ps != s.m.end());
267 auto offset = ps->first;
268 bool first = true;
269 auto mi = m.begin();
270
271 while (1) {
272 if (first)
273 first = false;
274 auto pl = l.find_inc(offset);
275 if (pl == l.m.end())
276 break;
277 while (ps != s.m.end() && ps->first + ps->second <= pl->first)
278 ++ps;
279 if (ps == s.m.end())
280 break;
281 offset = pl->first + pl->second;
282 if (offset <= ps->first) {
283 offset = ps->first;
284 continue;
285 }
286
287 if (*ps == *pl) {
288 do {
289 mi = m.insert(mi, *ps);
290 _size += ps->second;
291 ++ps;
292 ++pl;
293 } while (ps != s.m.end() && pl != l.m.end() && *ps == *pl);
294 if (ps == s.m.end())
295 break;
296 offset = ps->first;
297 continue;
298 }
299
300 auto start = std::max<T>(ps->first, pl->first);
301 auto en = std::min<T>(ps->first + ps->second, offset);
302 ceph_assert(en > start);
303 mi = m.emplace_hint(mi, start, en - start);
304 _size += mi->second;
305 if (ps->first + ps->second <= offset) {
306 ++ps;
307 if (ps == s.m.end())
308 break;
309 offset = ps->first;
310 }
311 }
312 }
313
314 bool subset_size_sym(const interval_set &b) const {
315 auto pa = m.begin(), pb = b.m.begin();
316 const auto a_end = m.end(), b_end = b.m.end();
317
318 while (pa != a_end && pb != b_end) {
319 while (pb->first + pb->second <= pa->first) {
320 ++pb;
321 if (pb == b_end)
322 return false;
323 }
324
325 if (*pa == *pb) {
326 do {
327 ++pa;
328 ++pb;
329 } while (pa != a_end && pb != b_end && *pa == *pb);
330 continue;
331 }
332
333 // interval begins before other
334 if (pa->first < pb->first)
335 return false;
336 // interval is longer than other
337 if (pa->first + pa->second > pb->first + pb->second)
338 return false;
339
340 ++pa;
341 }
342
343 return pa == a_end;
344 }
345
346 public:
347 bool operator==(const interval_set& other) const {
348 return _size == other._size && m == other.m;
349 }
350
351 uint64_t size() const {
352 return _size;
353 }
354
355 void bound_encode(size_t& p) const {
356 denc_traits<Map>::bound_encode(m, p);
357 }
358 void encode(ceph::buffer::list::contiguous_appender& p) const {
359 denc(m, p);
360 }
361 void decode(ceph::buffer::ptr::const_iterator& p) {
362 denc(m, p);
363 _size = 0;
364 for (const auto& p : m) {
365 _size += p.second;
366 }
367 }
368 void decode(ceph::buffer::list::iterator& p) {
369 denc(m, p);
370 _size = 0;
371 for (const auto& p : m) {
372 _size += p.second;
373 }
374 }
375
376 void encode_nohead(ceph::buffer::list::contiguous_appender& p) const {
377 denc_traits<Map>::encode_nohead(m, p);
378 }
379 void decode_nohead(int n, ceph::buffer::ptr::const_iterator& p) {
380 denc_traits<Map>::decode_nohead(n, m, p);
381 _size = 0;
382 for (const auto& p : m) {
383 _size += p.second;
384 }
385 }
386
387 void clear() {
388 m.clear();
389 _size = 0;
390 }
391
392 bool contains(T i, T *pstart=0, T *plen=0) const {
393 auto p = find_inc(i);
394 if (p == m.end()) return false;
395 if (p->first > i) return false;
396 if (p->first+p->second <= i) return false;
397 ceph_assert(p->first <= i && p->first+p->second > i);
398 if (pstart)
399 *pstart = p->first;
400 if (plen)
401 *plen = p->second;
402 return true;
403 }
404 bool contains(T start, T len) const {
405 auto p = find_inc(start);
406 if (p == m.end()) return false;
407 if (p->first > start) return false;
408 if (p->first+p->second <= start) return false;
409 ceph_assert(p->first <= start && p->first+p->second > start);
410 if (p->first+p->second < start+len) return false;
411 return true;
412 }
413 bool intersects(T start, T len) const {
414 interval_set a;
415 a.insert(start, len);
416 interval_set i;
417 i.intersection_of( *this, a );
418 if (i.empty()) return false;
419 return true;
420 }
421
422 // outer range of set
423 bool empty() const {
424 return m.empty();
425 }
426 offset_type range_start() const {
427 ceph_assert(!empty());
428 auto p = m.begin();
429 return p->first;
430 }
431 offset_type range_end() const {
432 ceph_assert(!empty());
433 auto p = m.rbegin();
434 return p->first + p->second;
435 }
436
437 // interval start after p (where p not in set)
438 bool starts_after(T i) const {
439 ceph_assert(!contains(i));
440 auto p = find_inc(i);
441 if (p == m.end()) return false;
442 return true;
443 }
444 offset_type start_after(T i) const {
445 ceph_assert(!contains(i));
446 auto p = find_inc(i);
447 return p->first;
448 }
449
450 // interval end that contains start
451 offset_type end_after(T start) const {
452 ceph_assert(contains(start));
453 auto p = find_inc(start);
454 return p->first+p->second;
455 }
456
457 void insert(T val) {
458 insert(val, 1);
459 }
460
461 void insert(T start, T len, T *pstart=0, T *plen=0) {
462 //cout << "insert " << start << "~" << len << endl;
463 ceph_assert(len > 0);
464 _size += len;
465 auto p = find_adj_m(start);
466 if (p == m.end()) {
467 m[start] = len; // new interval
468 if (pstart)
469 *pstart = start;
470 if (plen)
471 *plen = len;
472 } else {
473 if (p->first < start) {
474
475 if (p->first + p->second != start) {
476 //cout << "p is " << p->first << "~" << p->second << ", start is " << start << ", len is " << len << endl;
477 ceph_abort();
478 }
479
480 p->second += len; // append to end
481
482 auto n = p;
483 ++n;
484 if (pstart)
485 *pstart = p->first;
486 if (n != m.end() &&
487 start+len == n->first) { // combine with next, too!
488 p->second += n->second;
489 if (plen)
490 *plen = p->second;
491 m.erase(n);
492 } else {
493 if (plen)
494 *plen = p->second;
495 }
496 } else {
497 if (start+len == p->first) {
498 if (pstart)
499 *pstart = start;
500 if (plen)
501 *plen = len + p->second;
502 T psecond = p->second;
503 m.erase(p);
504 m[start] = len + psecond; // append to front
505 } else {
506 ceph_assert(p->first > start+len);
507 if (pstart)
508 *pstart = start;
509 if (plen)
510 *plen = len;
511 m[start] = len; // new interval
512 }
513 }
514 }
515 }
516
517 void swap(interval_set& other) {
518 m.swap(other.m);
519 std::swap(_size, other._size);
520 }
521
522 void erase(const iterator &i) {
523 _size -= i.get_len();
524 m.erase(i._iter);
525 }
526
527 void erase(T val) {
528 erase(val, 1);
529 }
530
531 void erase(T start, T len,
532 std::function<bool(T, T)> claim = {}) {
533 auto p = find_inc_m(start);
534
535 _size -= len;
536
537 ceph_assert(p != m.end());
538 ceph_assert(p->first <= start);
539
540 T before = start - p->first;
541 ceph_assert(p->second >= before+len);
542 T after = p->second - before - len;
543 if (before) {
544 if (claim && claim(p->first, before)) {
545 _size -= before;
546 m.erase(p);
547 } else {
548 p->second = before; // shorten bit before
549 }
550 } else {
551 m.erase(p);
552 }
553 if (after) {
554 if (claim && claim(start + len, after)) {
555 _size -= after;
556 } else {
557 m[start + len] = after;
558 }
559 }
560 }
561
562 void subtract(const interval_set &a) {
563 for (const auto& [start, len] : a.m) {
564 erase(start, len);
565 }
566 }
567
568 void insert(const interval_set &a) {
569 for (const auto& [start, len] : a.m) {
570 insert(start, len);
571 }
572 }
573
574
575 void intersection_of(const interval_set &a, const interval_set &b) {
576 ceph_assert(&a != this);
577 ceph_assert(&b != this);
578 clear();
579
580 const interval_set *s, *l;
581
582 if (a.size() < b.size()) {
583 s = &a;
584 l = &b;
585 } else {
586 s = &b;
587 l = &a;
588 }
589
590 if (!s->size())
591 return;
592
593 /*
594 * Use the lower_bound algorithm for larger size ratios
595 * where it performs better, but not for smaller size
596 * ratios where sequential search performs better.
597 */
598 if (l->size() / s->size() >= 10) {
599 intersection_size_asym(*s, *l);
600 return;
601 }
602
603 auto pa = a.m.begin();
604 auto pb = b.m.begin();
605 auto mi = m.begin();
606
607 while (pa != a.m.end() && pb != b.m.end()) {
608 // passing?
609 if (pa->first + pa->second <= pb->first)
610 { pa++; continue; }
611 if (pb->first + pb->second <= pa->first)
612 { pb++; continue; }
613
614 if (*pa == *pb) {
615 do {
616 mi = m.insert(mi, *pa);
617 _size += pa->second;
618 ++pa;
619 ++pb;
620 } while (pa != a.m.end() && pb != b.m.end() && *pa == *pb);
621 continue;
622 }
623
624 T start = std::max(pa->first, pb->first);
625 T en = std::min(pa->first+pa->second, pb->first+pb->second);
626 ceph_assert(en > start);
627 mi = m.emplace_hint(mi, start, en - start);
628 _size += mi->second;
629 if (pa->first+pa->second > pb->first+pb->second)
630 pb++;
631 else
632 pa++;
633 }
634 }
635 void intersection_of(const interval_set& b) {
636 interval_set a;
637 swap(a);
638 intersection_of(a, b);
639 }
640
641 void union_of(const interval_set &a, const interval_set &b) {
642 ceph_assert(&a != this);
643 ceph_assert(&b != this);
644 clear();
645
646 //cout << "union_of" << endl;
647
648 // a
649 m = a.m;
650 _size = a._size;
651
652 // - (a*b)
653 interval_set ab;
654 ab.intersection_of(a, b);
655 subtract(ab);
656
657 // + b
658 insert(b);
659 return;
660 }
661 void union_of(const interval_set &b) {
662 interval_set a;
663 swap(a);
664 union_of(a, b);
665 }
666 void union_insert(T off, T len) {
667 interval_set a;
668 a.insert(off, len);
669 union_of(a);
670 }
671
672 bool subset_of(const interval_set &big) const {
673 if (!size())
674 return true;
675 if (size() > big.size())
676 return false;
677 if (range_end() > big.range_end())
678 return false;
679
680 /*
681 * Use the lower_bound algorithm for larger size ratios
682 * where it performs better, but not for smaller size
683 * ratios where sequential search performs better.
684 */
685 if (big.size() / size() < 10)
686 return subset_size_sym(big);
687
688 for (const auto& [start, len] : m) {
689 if (!big.contains(start, len)) return false;
690 }
691 return true;
692 }
693
694 /*
695 * build a subset of @other, starting at or after @start, and including
696 * @len worth of values, skipping holes. e.g.,
697 * span_of([5~10,20~5], 8, 5) -> [8~2,20~3]
698 */
699 void span_of(const interval_set &other, T start, T len) {
700 clear();
701 auto p = other.find_inc(start);
702 if (p == other.m.end())
703 return;
704 if (p->first < start) {
705 if (p->first + p->second < start)
706 return;
707 if (p->first + p->second < start + len) {
708 T howmuch = p->second - (start - p->first);
709 insert(start, howmuch);
710 len -= howmuch;
711 p++;
712 } else {
713 insert(start, len);
714 return;
715 }
716 }
717 while (p != other.m.end() && len > 0) {
718 if (p->second < len) {
719 insert(p->first, p->second);
720 len -= p->second;
721 p++;
722 } else {
723 insert(p->first, len);
724 return;
725 }
726 }
727 }
728
729 /*
730 * Move contents of m into another Map. Use that instead of
731 * encoding interval_set into bufferlist then decoding it back into Map.
732 */
733 Map detach() && {
734 return std::move(m);
735 }
736
737 private:
738 // data
739 uint64_t _size = 0;
740 Map m; // map start -> len
741 };
742
743 // declare traits explicitly because (1) it's templatized, and (2) we
744 // want to include _nohead variants.
745 template<typename T, template<typename, typename, typename ...> class C>
746 struct denc_traits<interval_set<T, C>> {
747 private:
748 using container_t = interval_set<T, C>;
749 public:
750 static constexpr bool supported = true;
751 static constexpr bool bounded = false;
752 static constexpr bool featured = false;
753 static constexpr bool need_contiguous = denc_traits<T, C<T,T>>::need_contiguous;
754 static void bound_encode(const container_t& v, size_t& p) {
755 v.bound_encode(p);
756 }
757 static void encode(const container_t& v,
758 ceph::buffer::list::contiguous_appender& p) {
759 v.encode(p);
760 }
761 static void decode(container_t& v, ceph::buffer::ptr::const_iterator& p) {
762 v.decode(p);
763 }
764 template<typename U=T>
765 static typename std::enable_if<sizeof(U) && !need_contiguous>::type
766 decode(container_t& v, ceph::buffer::list::iterator& p) {
767 v.decode(p);
768 }
769 static void encode_nohead(const container_t& v,
770 ceph::buffer::list::contiguous_appender& p) {
771 v.encode_nohead(p);
772 }
773 static void decode_nohead(size_t n, container_t& v,
774 ceph::buffer::ptr::const_iterator& p) {
775 v.decode_nohead(n, p);
776 }
777 };
778
779
780 template<typename T, template<typename, typename, typename ...> class C>
781 inline std::ostream& operator<<(std::ostream& out, const interval_set<T,C> &s) {
782 out << "[";
783 bool first = true;
784 for (const auto& [start, len] : s) {
785 if (!first) out << ",";
786 out << start << "~" << len;
787 first = false;
788 }
789 out << "]";
790 return out;
791 }
792
793
794 #endif