]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / crimson / os / seastore / lba_manager / btree / btree_lba_manager.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #pragma once
5
6 #include <iostream>
7
8 #include <boost/intrusive_ptr.hpp>
9 #include <boost/smart_ptr/intrusive_ref_counter.hpp>
10 #include <seastar/core/future.hh>
11
12 #include "include/ceph_assert.h"
13 #include "include/buffer_fwd.h"
14 #include "include/interval_set.h"
15 #include "common/interval_map.h"
16 #include "crimson/osd/exceptions.h"
17
18 #include "crimson/os/seastore/btree/fixed_kv_btree.h"
19 #include "crimson/os/seastore/seastore_types.h"
20 #include "crimson/os/seastore/lba_manager.h"
21 #include "crimson/os/seastore/cache.h"
22
23 #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
24 #include "crimson/os/seastore/btree/btree_range_pin.h"
25
26 namespace crimson::os::seastore::lba_manager::btree {
27
28 class BtreeLBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> {
29 // To support cloning, there are two kinds of lba mappings:
30 // 1. physical lba mapping: the pladdr in the value of which is the paddr of
31 // the corresponding extent;
32 // 2. indirect lba mapping: the pladdr in the value of which is an laddr pointing
33 // to the physical lba mapping that's pointing to the actual paddr of the
34 // extent being searched;
35 //
36 // Accordingly, BtreeLBAMapping may also work under two modes: indirect or direct
37 // 1. BtreeLBAMappings that come from quering an indirect lba mapping in the lba tree
38 // are indirect;
39 // 2. BtreeLBAMappings that come from quering a physical lba mapping in the lba tree
40 // are direct.
41 //
42 // For direct BtreeLBAMappings, there are two important fields:
43 // 1. key: the laddr of the lba mapping being queried;
44 // 2. paddr: the paddr recorded in the value of the lba mapping being queried.
45 // For indirect BtreeLBAMappings, BtreeLBAMapping has three important fields:
46 // 1. key: the laddr key of the lba entry being queried;
47 // 2. intermediate_key: the laddr within the scope of the physical lba mapping
48 // that the current indirect lba mapping points to; although an indirect mapping
49 // points to the start of the physical lba mapping, it may change to other
50 // laddr after remap
51 // 3. intermediate_base: the laddr key of the physical lba mapping, intermediate_key
52 // and intermediate_base should be the same when doing cloning
53 // 4. intermediate_offset: intermediate_key - intermediate_base
54 // 5. paddr: the paddr recorded in the physical lba mapping pointed to by the
55 // indirect lba mapping being queried;
56 //
57 // NOTE THAT, for direct BtreeLBAMappings, their intermediate_keys are the same as
58 // their keys.
59 public:
60 BtreeLBAMapping(op_context_t<laddr_t> ctx)
61 : BtreeNodeMapping(ctx) {}
62 BtreeLBAMapping(
63 op_context_t<laddr_t> c,
64 CachedExtentRef parent,
65 uint16_t pos,
66 lba_map_val_t &val,
67 lba_node_meta_t meta)
68 : BtreeNodeMapping(
69 c,
70 parent,
71 pos,
72 val.pladdr.is_paddr() ? val.pladdr.get_paddr() : P_ADDR_NULL,
73 val.len,
74 meta),
75 key(meta.begin),
76 indirect(val.pladdr.is_laddr() ? true : false),
77 intermediate_key(indirect ? val.pladdr.get_laddr() : L_ADDR_NULL),
78 intermediate_length(indirect ? val.len : 0),
79 raw_val(val.pladdr),
80 map_val(val)
81 {}
82
83 lba_map_val_t get_map_val() const {
84 return map_val;
85 }
86
87 bool is_indirect() const final {
88 return indirect;
89 }
90
91 void set_key_for_indirect(
92 laddr_t new_key,
93 extent_len_t length,
94 laddr_t interkey = L_ADDR_NULL)
95 {
96 turn_indirect(interkey);
97 key = new_key;
98 intermediate_length = len;
99 len = length;
100 }
101
102 laddr_t get_key() const final {
103 return key;
104 }
105
106 pladdr_t get_raw_val() const {
107 return raw_val;
108 }
109
110 void set_paddr(paddr_t addr) {
111 value = addr;
112 }
113
114 laddr_t get_intermediate_key() const final {
115 assert(is_indirect());
116 assert(intermediate_key != L_ADDR_NULL);
117 return intermediate_key;
118 }
119
120 laddr_t get_intermediate_base() const final {
121 assert(is_indirect());
122 assert(intermediate_base != L_ADDR_NULL);
123 return intermediate_base;
124 }
125
126 extent_len_t get_intermediate_offset() const final {
127 assert(intermediate_key >= intermediate_base);
128 assert((intermediate_key == L_ADDR_NULL)
129 == (intermediate_base == L_ADDR_NULL));
130 return intermediate_key - intermediate_base;
131 }
132
133 extent_len_t get_intermediate_length() const final {
134 assert(is_indirect());
135 assert(intermediate_length);
136 return intermediate_length;
137 }
138
139 void set_intermediate_base(laddr_t base) {
140 intermediate_base = base;
141 }
142
143 protected:
144 std::unique_ptr<BtreeNodeMapping<laddr_t, paddr_t>> _duplicate(
145 op_context_t<laddr_t> ctx) const final {
146 auto pin = std::unique_ptr<BtreeLBAMapping>(new BtreeLBAMapping(ctx));
147 pin->key = key;
148 pin->intermediate_base = intermediate_base;
149 pin->intermediate_key = intermediate_key;
150 pin->indirect = indirect;
151 pin->raw_val = raw_val;
152 pin->map_val = map_val;
153 return pin;
154 }
155 private:
156 void turn_indirect(laddr_t interkey) {
157 assert(value.is_paddr());
158 intermediate_base = key;
159 intermediate_key = (interkey == L_ADDR_NULL ? key : interkey);
160 indirect = true;
161 }
162 laddr_t key = L_ADDR_NULL;
163 bool indirect = false;
164 laddr_t intermediate_key = L_ADDR_NULL;
165 laddr_t intermediate_base = L_ADDR_NULL;
166 extent_len_t intermediate_length = 0;
167 pladdr_t raw_val;
168 lba_map_val_t map_val;
169 };
170
171 using BtreeLBAMappingRef = std::unique_ptr<BtreeLBAMapping>;
172
173 using LBABtree = FixedKVBtree<
174 laddr_t, lba_map_val_t, LBAInternalNode,
175 LBALeafNode, BtreeLBAMapping, LBA_BLOCK_SIZE, true>;
176
177 /**
178 * BtreeLBAManager
179 *
180 * Uses a wandering btree to track two things:
181 * 1) lba state including laddr_t -> paddr_t mapping
182 * 2) reverse paddr_t -> laddr_t mapping for gc (TODO)
183 *
184 * Generally, any transaction will involve
185 * 1) deltas against lba tree nodes
186 * 2) new lba tree nodes
187 * - Note, there must necessarily be a delta linking
188 * these new nodes into the tree -- might be a
189 * bootstrap_state_t delta if new root
190 *
191 * get_mappings, alloc_extent_*, etc populate a Transaction
192 * which then gets submitted
193 */
194 class BtreeLBAManager : public LBAManager {
195 public:
196 BtreeLBAManager(Cache &cache)
197 : cache(cache)
198 {
199 register_metrics();
200 }
201
202 mkfs_ret mkfs(
203 Transaction &t) final;
204
205 get_mappings_ret get_mappings(
206 Transaction &t,
207 laddr_t offset, extent_len_t length) final;
208
209 get_mappings_ret get_mappings(
210 Transaction &t,
211 laddr_list_t &&list) final;
212
213 get_mapping_ret get_mapping(
214 Transaction &t,
215 laddr_t offset) final;
216
217 alloc_extent_ret reserve_region(
218 Transaction &t,
219 laddr_t hint,
220 extent_len_t len)
221 {
222 return _alloc_extent(
223 t,
224 hint,
225 len,
226 P_ADDR_ZERO,
227 P_ADDR_NULL,
228 L_ADDR_NULL,
229 nullptr);
230 }
231
232 alloc_extent_ret clone_extent(
233 Transaction &t,
234 laddr_t hint,
235 extent_len_t len,
236 laddr_t intermediate_key,
237 paddr_t actual_addr,
238 laddr_t intermediate_base)
239 {
240 return _alloc_extent(
241 t,
242 hint,
243 len,
244 intermediate_key,
245 actual_addr,
246 intermediate_base,
247 nullptr);
248 }
249
250 alloc_extent_ret alloc_extent(
251 Transaction &t,
252 laddr_t hint,
253 extent_len_t len,
254 paddr_t addr,
255 LogicalCachedExtent &ext) final
256 {
257 return _alloc_extent(
258 t,
259 hint,
260 len,
261 addr,
262 P_ADDR_NULL,
263 L_ADDR_NULL,
264 &ext);
265 }
266
267 ref_ret decref_extent(
268 Transaction &t,
269 laddr_t addr,
270 bool cascade_remove) final {
271 return update_refcount(t, addr, -1, cascade_remove);
272 }
273
274 ref_ret incref_extent(
275 Transaction &t,
276 laddr_t addr) final {
277 return update_refcount(t, addr, 1, false);
278 }
279
280 ref_ret incref_extent(
281 Transaction &t,
282 laddr_t addr,
283 int delta) final {
284 ceph_assert(delta > 0);
285 return update_refcount(t, addr, delta, false);
286 }
287
288 /**
289 * init_cached_extent
290 *
291 * Checks whether e is live (reachable from lba tree) and drops or initializes
292 * accordingly.
293 *
294 * Returns if e is live.
295 */
296 init_cached_extent_ret init_cached_extent(
297 Transaction &t,
298 CachedExtentRef e) final;
299
300 check_child_trackers_ret check_child_trackers(Transaction &t) final;
301
302 scan_mappings_ret scan_mappings(
303 Transaction &t,
304 laddr_t begin,
305 laddr_t end,
306 scan_mappings_func_t &&f) final;
307
308 rewrite_extent_ret rewrite_extent(
309 Transaction &t,
310 CachedExtentRef extent) final;
311
312 update_mapping_ret update_mapping(
313 Transaction& t,
314 laddr_t laddr,
315 paddr_t prev_addr,
316 paddr_t paddr,
317 LogicalCachedExtent*) final;
318
319 get_physical_extent_if_live_ret get_physical_extent_if_live(
320 Transaction &t,
321 extent_types_t type,
322 paddr_t addr,
323 laddr_t laddr,
324 extent_len_t len) final;
325 private:
326 Cache &cache;
327
328
329 struct {
330 uint64_t num_alloc_extents = 0;
331 uint64_t num_alloc_extents_iter_nexts = 0;
332 } stats;
333
334 op_context_t<laddr_t> get_context(Transaction &t) {
335 return op_context_t<laddr_t>{cache, t};
336 }
337
338 seastar::metrics::metric_group metrics;
339 void register_metrics();
340
341 /**
342 * update_refcount
343 *
344 * Updates refcount, returns resulting refcount
345 */
346 using update_refcount_ret = ref_ret;
347 update_refcount_ret update_refcount(
348 Transaction &t,
349 laddr_t addr,
350 int delta,
351 bool cascade_remove);
352
353 /**
354 * _update_mapping
355 *
356 * Updates mapping, removes if f returns nullopt
357 */
358 using _update_mapping_iertr = ref_iertr;
359 using _update_mapping_ret = ref_iertr::future<lba_map_val_t>;
360 using update_func_t = std::function<
361 lba_map_val_t(const lba_map_val_t &v)
362 >;
363 _update_mapping_ret _update_mapping(
364 Transaction &t,
365 laddr_t addr,
366 update_func_t &&f,
367 LogicalCachedExtent*);
368
369 alloc_extent_ret _alloc_extent(
370 Transaction &t,
371 laddr_t hint,
372 extent_len_t len,
373 pladdr_t addr,
374 paddr_t actual_addr,
375 laddr_t intermediate_base,
376 LogicalCachedExtent*);
377
378 using _get_mapping_ret = get_mapping_iertr::future<BtreeLBAMappingRef>;
379 _get_mapping_ret _get_mapping(
380 Transaction &t,
381 laddr_t offset);
382
383 using _get_original_mappings_ret = get_mappings_ret;
384 _get_original_mappings_ret _get_original_mappings(
385 op_context_t<laddr_t> c,
386 std::list<BtreeLBAMappingRef> &pin_list);
387
388 ref_iertr::future<std::optional<std::pair<paddr_t, extent_len_t>>>
389 _decref_intermediate(
390 Transaction &t,
391 laddr_t addr,
392 extent_len_t len);
393 };
394 using BtreeLBAManagerRef = std::unique_ptr<BtreeLBAManager>;
395
396 }