]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "crimson/os/seastore/cache.h" | |
20effc67 TL |
5 | |
6 | #include <sstream> | |
7 | #include <string_view> | |
8 | ||
1e59de90 TL |
9 | #include <seastar/core/metrics.hh> |
10 | ||
20effc67 TL |
11 | #include "crimson/os/seastore/logging.h" |
12 | #include "crimson/common/config_proxy.h" | |
1e59de90 | 13 | #include "crimson/os/seastore/async_cleaner.h" |
f67539c2 TL |
14 | |
15 | // included for get_extent_by_type | |
20effc67 TL |
16 | #include "crimson/os/seastore/collection_manager/collection_flat_node.h" |
17 | #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" | |
18 | #include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h" | |
19 | #include "crimson/os/seastore/object_data_handler.h" | |
20 | #include "crimson/os/seastore/collection_manager/collection_flat_node.h" | |
f67539c2 | 21 | #include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h" |
1e59de90 | 22 | #include "crimson/os/seastore/backref/backref_tree_node.h" |
f67539c2 TL |
23 | #include "test/crimson/seastore/test_block.h" |
24 | ||
20effc67 TL |
25 | using std::string_view; |
26 | ||
27 | SET_SUBSYS(seastore_cache); | |
f67539c2 TL |
28 | |
29 | namespace crimson::os::seastore { | |
30 | ||
1e59de90 TL |
31 | std::ostream &operator<<(std::ostream &out, const backref_entry_t &ent) { |
32 | return out << "backref_entry_t{" | |
33 | << ent.paddr << "~" << ent.len << ", " | |
34 | << "laddr: " << ent.laddr << ", " | |
35 | << "type: " << ent.type << ", " | |
36 | << "seq: " << ent.seq << ", " | |
37 | << "}"; | |
38 | } | |
39 | ||
20effc67 | 40 | Cache::Cache( |
1e59de90 TL |
41 | ExtentPlacementManager &epm) |
42 | : epm(epm), | |
20effc67 TL |
43 | lru(crimson::common::get_conf<Option::size_t>( |
44 | "seastore_cache_lru_size")) | |
45 | { | |
1e59de90 TL |
46 | LOG_PREFIX(Cache::Cache); |
47 | INFO("created, lru_size={}", lru.get_capacity()); | |
20effc67 | 48 | register_metrics(); |
1e59de90 | 49 | segment_providers_by_device_id.resize(DEVICE_ID_MAX, nullptr); |
20effc67 | 50 | } |
f67539c2 TL |
51 | |
52 | Cache::~Cache() | |
53 | { | |
20effc67 | 54 | LOG_PREFIX(Cache::~Cache); |
f67539c2 | 55 | for (auto &i: extents) { |
1e59de90 | 56 | ERROR("extent is still alive -- {}", i); |
f67539c2 TL |
57 | } |
58 | ceph_assert(extents.empty()); | |
59 | } | |
60 | ||
20effc67 TL |
61 | Cache::retire_extent_ret Cache::retire_extent_addr( |
62 | Transaction &t, paddr_t addr, extent_len_t length) | |
f67539c2 | 63 | { |
1e59de90 TL |
64 | LOG_PREFIX(Cache::retire_extent_addr); |
65 | TRACET("retire {}~{}", t, addr, length); | |
66 | ||
20effc67 TL |
67 | assert(addr.is_real() && !addr.is_block_relative()); |
68 | ||
20effc67 TL |
69 | CachedExtentRef ext; |
70 | auto result = t.get_extent(addr, &ext); | |
71 | if (result == Transaction::get_extent_ret::PRESENT) { | |
1e59de90 | 72 | DEBUGT("retire {}~{} on t -- {}", t, addr, length, *ext); |
f67539c2 | 73 | t.add_to_retired_set(CachedExtentRef(&*ext)); |
20effc67 TL |
74 | return retire_extent_iertr::now(); |
75 | } else if (result == Transaction::get_extent_ret::RETIRED) { | |
1e59de90 | 76 | ERRORT("retire {}~{} failed, already retired -- {}", t, addr, length, *ext); |
20effc67 TL |
77 | ceph_abort(); |
78 | } | |
79 | ||
80 | // any relative addr must have been on the transaction | |
81 | assert(!addr.is_relative()); | |
82 | ||
83 | // absent from transaction | |
84 | // retiring is not included by the cache hit metrics | |
85 | ext = query_cache(addr, nullptr); | |
86 | if (ext) { | |
1e59de90 | 87 | DEBUGT("retire {}~{} in cache -- {}", t, addr, length, *ext); |
f67539c2 | 88 | } else { |
20effc67 TL |
89 | // add a new placeholder to Cache |
90 | ext = CachedExtent::make_cached_extent_ref< | |
91 | RetiredExtentPlaceholder>(length); | |
1e59de90 TL |
92 | ext->init(CachedExtent::extent_state_t::CLEAN, |
93 | addr, | |
94 | PLACEMENT_HINT_NULL, | |
95 | NULL_GENERATION, | |
96 | TRANS_ID_NULL); | |
97 | DEBUGT("retire {}~{} as placeholder, add extent -- {}", | |
98 | t, addr, length, *ext); | |
99 | const auto t_src = t.get_src(); | |
100 | add_extent(ext, &t_src); | |
20effc67 | 101 | } |
20effc67 TL |
102 | t.add_to_read_set(ext); |
103 | t.add_to_retired_set(ext); | |
104 | return retire_extent_iertr::now(); | |
105 | } | |
106 | ||
107 | void Cache::dump_contents() | |
108 | { | |
109 | LOG_PREFIX(Cache::dump_contents); | |
110 | DEBUG("enter"); | |
111 | for (auto &&i: extents) { | |
112 | DEBUG("live {}", i); | |
113 | } | |
114 | DEBUG("exit"); | |
115 | } | |
116 | ||
117 | void Cache::register_metrics() | |
118 | { | |
1e59de90 TL |
119 | LOG_PREFIX(Cache::register_metrics); |
120 | DEBUG(""); | |
121 | ||
20effc67 TL |
122 | stats = {}; |
123 | ||
124 | namespace sm = seastar::metrics; | |
125 | using src_t = Transaction::src_t; | |
126 | ||
20effc67 | 127 | std::map<src_t, sm::label_instance> labels_by_src { |
1e59de90 TL |
128 | {src_t::MUTATE, sm::label_instance("src", "MUTATE")}, |
129 | {src_t::READ, sm::label_instance("src", "READ")}, | |
130 | {src_t::TRIM_DIRTY, sm::label_instance("src", "TRIM_DIRTY")}, | |
131 | {src_t::TRIM_ALLOC, sm::label_instance("src", "TRIM_ALLOC")}, | |
132 | {src_t::CLEANER_MAIN, sm::label_instance("src", "CLEANER_MAIN")}, | |
133 | {src_t::CLEANER_COLD, sm::label_instance("src", "CLEANER_COLD")}, | |
20effc67 | 134 | }; |
1e59de90 | 135 | assert(labels_by_src.size() == (std::size_t)src_t::MAX); |
20effc67 | 136 | |
20effc67 | 137 | std::map<extent_types_t, sm::label_instance> labels_by_ext { |
1e59de90 TL |
138 | {extent_types_t::ROOT, sm::label_instance("ext", "ROOT")}, |
139 | {extent_types_t::LADDR_INTERNAL, sm::label_instance("ext", "LADDR_INTERNAL")}, | |
140 | {extent_types_t::LADDR_LEAF, sm::label_instance("ext", "LADDR_LEAF")}, | |
141 | {extent_types_t::DINK_LADDR_LEAF, sm::label_instance("ext", "DINK_LADDR_LEAF")}, | |
142 | {extent_types_t::OMAP_INNER, sm::label_instance("ext", "OMAP_INNER")}, | |
143 | {extent_types_t::OMAP_LEAF, sm::label_instance("ext", "OMAP_LEAF")}, | |
144 | {extent_types_t::ONODE_BLOCK_STAGED, sm::label_instance("ext", "ONODE_BLOCK_STAGED")}, | |
145 | {extent_types_t::COLL_BLOCK, sm::label_instance("ext", "COLL_BLOCK")}, | |
146 | {extent_types_t::OBJECT_DATA_BLOCK, sm::label_instance("ext", "OBJECT_DATA_BLOCK")}, | |
147 | {extent_types_t::RETIRED_PLACEHOLDER, sm::label_instance("ext", "RETIRED_PLACEHOLDER")}, | |
148 | {extent_types_t::ALLOC_INFO, sm::label_instance("ext", "ALLOC_INFO")}, | |
149 | {extent_types_t::JOURNAL_TAIL, sm::label_instance("ext", "JOURNAL_TAIL")}, | |
150 | {extent_types_t::TEST_BLOCK, sm::label_instance("ext", "TEST_BLOCK")}, | |
151 | {extent_types_t::TEST_BLOCK_PHYSICAL, sm::label_instance("ext", "TEST_BLOCK_PHYSICAL")}, | |
152 | {extent_types_t::BACKREF_INTERNAL, sm::label_instance("ext", "BACKREF_INTERNAL")}, | |
153 | {extent_types_t::BACKREF_LEAF, sm::label_instance("ext", "BACKREF_LEAF")} | |
20effc67 | 154 | }; |
1e59de90 | 155 | assert(labels_by_ext.size() == (std::size_t)extent_types_t::NONE); |
20effc67 TL |
156 | |
157 | /* | |
158 | * trans_created | |
159 | */ | |
160 | for (auto& [src, src_label] : labels_by_src) { | |
161 | metrics.add_group( | |
162 | "cache", | |
163 | { | |
164 | sm::make_counter( | |
165 | "trans_created", | |
166 | get_by_src(stats.trans_created_by_src, src), | |
167 | sm::description("total number of transaction created"), | |
168 | {src_label} | |
169 | ), | |
170 | } | |
171 | ); | |
172 | } | |
173 | ||
174 | /* | |
175 | * cache_query: cache_access and cache_hit | |
176 | */ | |
177 | for (auto& [src, src_label] : labels_by_src) { | |
178 | metrics.add_group( | |
179 | "cache", | |
180 | { | |
181 | sm::make_counter( | |
182 | "cache_access", | |
183 | get_by_src(stats.cache_query_by_src, src).access, | |
184 | sm::description("total number of cache accesses"), | |
185 | {src_label} | |
186 | ), | |
187 | sm::make_counter( | |
188 | "cache_hit", | |
189 | get_by_src(stats.cache_query_by_src, src).hit, | |
190 | sm::description("total number of cache hits"), | |
191 | {src_label} | |
192 | ), | |
193 | } | |
194 | ); | |
195 | } | |
196 | ||
197 | { | |
198 | /* | |
199 | * efforts discarded/committed | |
200 | */ | |
201 | auto effort_label = sm::label("effort"); | |
202 | ||
203 | // invalidated efforts | |
204 | using namespace std::literals::string_view_literals; | |
205 | const string_view invalidated_effort_names[] = { | |
206 | "READ"sv, | |
207 | "MUTATE"sv, | |
208 | "RETIRE"sv, | |
209 | "FRESH"sv, | |
210 | "FRESH_OOL_WRITTEN"sv, | |
211 | }; | |
212 | for (auto& [src, src_label] : labels_by_src) { | |
213 | auto& efforts = get_by_src(stats.invalidated_efforts_by_src, src); | |
214 | for (auto& [ext, ext_label] : labels_by_ext) { | |
215 | auto& counter = get_by_ext(efforts.num_trans_invalidated, ext); | |
216 | metrics.add_group( | |
217 | "cache", | |
218 | { | |
219 | sm::make_counter( | |
aee94f69 | 220 | "trans_invalidated_by_extent", |
20effc67 | 221 | counter, |
aee94f69 | 222 | sm::description("total number of transactions invalidated by extents"), |
20effc67 TL |
223 | {src_label, ext_label} |
224 | ), | |
225 | } | |
226 | ); | |
227 | } | |
228 | ||
229 | if (src == src_t::READ) { | |
230 | // read transaction won't have non-read efforts | |
231 | auto read_effort_label = effort_label("READ"); | |
232 | metrics.add_group( | |
233 | "cache", | |
234 | { | |
235 | sm::make_counter( | |
236 | "invalidated_extents", | |
1e59de90 | 237 | efforts.read.num, |
20effc67 TL |
238 | sm::description("extents of invalidated transactions"), |
239 | {src_label, read_effort_label} | |
240 | ), | |
241 | sm::make_counter( | |
242 | "invalidated_extent_bytes", | |
243 | efforts.read.bytes, | |
244 | sm::description("extent bytes of invalidated transactions"), | |
245 | {src_label, read_effort_label} | |
246 | ), | |
247 | } | |
248 | ); | |
249 | continue; | |
250 | } | |
251 | ||
252 | // non READ invalidated efforts | |
253 | for (auto& effort_name : invalidated_effort_names) { | |
1e59de90 | 254 | auto& effort = [&effort_name, &efforts]() -> io_stat_t& { |
20effc67 TL |
255 | if (effort_name == "READ") { |
256 | return efforts.read; | |
257 | } else if (effort_name == "MUTATE") { | |
258 | return efforts.mutate; | |
259 | } else if (effort_name == "RETIRE") { | |
260 | return efforts.retire; | |
261 | } else if (effort_name == "FRESH") { | |
262 | return efforts.fresh; | |
263 | } else { | |
264 | assert(effort_name == "FRESH_OOL_WRITTEN"); | |
265 | return efforts.fresh_ool_written; | |
266 | } | |
267 | }(); | |
268 | metrics.add_group( | |
269 | "cache", | |
270 | { | |
271 | sm::make_counter( | |
272 | "invalidated_extents", | |
1e59de90 | 273 | effort.num, |
20effc67 TL |
274 | sm::description("extents of invalidated transactions"), |
275 | {src_label, effort_label(effort_name)} | |
276 | ), | |
277 | sm::make_counter( | |
278 | "invalidated_extent_bytes", | |
279 | effort.bytes, | |
280 | sm::description("extent bytes of invalidated transactions"), | |
281 | {src_label, effort_label(effort_name)} | |
282 | ), | |
283 | } | |
284 | ); | |
285 | } // effort_name | |
286 | ||
287 | metrics.add_group( | |
288 | "cache", | |
289 | { | |
aee94f69 TL |
290 | sm::make_counter( |
291 | "trans_invalidated", | |
292 | efforts.total_trans_invalidated, | |
293 | sm::description("total number of transactions invalidated"), | |
294 | {src_label} | |
295 | ), | |
20effc67 TL |
296 | sm::make_counter( |
297 | "invalidated_delta_bytes", | |
298 | efforts.mutate_delta_bytes, | |
299 | sm::description("delta bytes of invalidated transactions"), | |
300 | {src_label} | |
301 | ), | |
302 | sm::make_counter( | |
303 | "invalidated_ool_records", | |
304 | efforts.num_ool_records, | |
305 | sm::description("number of ool-records from invalidated transactions"), | |
306 | {src_label} | |
307 | ), | |
308 | sm::make_counter( | |
309 | "invalidated_ool_record_bytes", | |
310 | efforts.ool_record_bytes, | |
311 | sm::description("bytes of ool-record from invalidated transactions"), | |
312 | {src_label} | |
313 | ), | |
314 | } | |
315 | ); | |
316 | } // src | |
317 | ||
318 | // committed efforts | |
319 | const string_view committed_effort_names[] = { | |
320 | "READ"sv, | |
321 | "MUTATE"sv, | |
322 | "RETIRE"sv, | |
323 | "FRESH_INVALID"sv, | |
324 | "FRESH_INLINE"sv, | |
325 | "FRESH_OOL"sv, | |
326 | }; | |
327 | for (auto& [src, src_label] : labels_by_src) { | |
328 | if (src == src_t::READ) { | |
329 | // READ transaction won't commit | |
330 | continue; | |
331 | } | |
332 | auto& efforts = get_by_src(stats.committed_efforts_by_src, src); | |
333 | metrics.add_group( | |
334 | "cache", | |
335 | { | |
336 | sm::make_counter( | |
337 | "trans_committed", | |
338 | efforts.num_trans, | |
339 | sm::description("total number of transaction committed"), | |
340 | {src_label} | |
341 | ), | |
342 | sm::make_counter( | |
343 | "committed_ool_records", | |
344 | efforts.num_ool_records, | |
345 | sm::description("number of ool-records from committed transactions"), | |
346 | {src_label} | |
347 | ), | |
20effc67 TL |
348 | sm::make_counter( |
349 | "committed_ool_record_metadata_bytes", | |
350 | efforts.ool_record_metadata_bytes, | |
351 | sm::description("bytes of ool-record metadata from committed transactions"), | |
352 | {src_label} | |
353 | ), | |
354 | sm::make_counter( | |
355 | "committed_ool_record_data_bytes", | |
356 | efforts.ool_record_data_bytes, | |
357 | sm::description("bytes of ool-record data from committed transactions"), | |
358 | {src_label} | |
359 | ), | |
360 | sm::make_counter( | |
361 | "committed_inline_record_metadata_bytes", | |
362 | efforts.inline_record_metadata_bytes, | |
363 | sm::description("bytes of inline-record metadata from committed transactions" | |
364 | "(excludes delta buffer)"), | |
365 | {src_label} | |
366 | ), | |
367 | } | |
368 | ); | |
369 | for (auto& effort_name : committed_effort_names) { | |
370 | auto& effort_by_ext = [&efforts, &effort_name]() | |
1e59de90 | 371 | -> counter_by_extent_t<io_stat_t>& { |
20effc67 TL |
372 | if (effort_name == "READ") { |
373 | return efforts.read_by_ext; | |
374 | } else if (effort_name == "MUTATE") { | |
375 | return efforts.mutate_by_ext; | |
376 | } else if (effort_name == "RETIRE") { | |
377 | return efforts.retire_by_ext; | |
378 | } else if (effort_name == "FRESH_INVALID") { | |
379 | return efforts.fresh_invalid_by_ext; | |
380 | } else if (effort_name == "FRESH_INLINE") { | |
381 | return efforts.fresh_inline_by_ext; | |
382 | } else { | |
383 | assert(effort_name == "FRESH_OOL"); | |
384 | return efforts.fresh_ool_by_ext; | |
385 | } | |
386 | }(); | |
387 | for (auto& [ext, ext_label] : labels_by_ext) { | |
388 | auto& effort = get_by_ext(effort_by_ext, ext); | |
389 | metrics.add_group( | |
390 | "cache", | |
391 | { | |
392 | sm::make_counter( | |
393 | "committed_extents", | |
1e59de90 | 394 | effort.num, |
20effc67 TL |
395 | sm::description("extents of committed transactions"), |
396 | {src_label, effort_label(effort_name), ext_label} | |
397 | ), | |
398 | sm::make_counter( | |
399 | "committed_extent_bytes", | |
400 | effort.bytes, | |
401 | sm::description("extent bytes of committed transactions"), | |
402 | {src_label, effort_label(effort_name), ext_label} | |
403 | ), | |
404 | } | |
405 | ); | |
406 | } // ext | |
407 | } // effort_name | |
408 | ||
409 | auto& delta_by_ext = efforts.delta_bytes_by_ext; | |
410 | for (auto& [ext, ext_label] : labels_by_ext) { | |
411 | auto& value = get_by_ext(delta_by_ext, ext); | |
412 | metrics.add_group( | |
413 | "cache", | |
414 | { | |
415 | sm::make_counter( | |
416 | "committed_delta_bytes", | |
417 | value, | |
418 | sm::description("delta bytes of committed transactions"), | |
419 | {src_label, ext_label} | |
420 | ), | |
421 | } | |
422 | ); | |
423 | } // ext | |
424 | } // src | |
425 | ||
426 | // successful read efforts | |
427 | metrics.add_group( | |
428 | "cache", | |
429 | { | |
430 | sm::make_counter( | |
431 | "trans_read_successful", | |
432 | stats.success_read_efforts.num_trans, | |
433 | sm::description("total number of successful read transactions") | |
434 | ), | |
435 | sm::make_counter( | |
436 | "successful_read_extents", | |
1e59de90 | 437 | stats.success_read_efforts.read.num, |
20effc67 TL |
438 | sm::description("extents of successful read transactions") |
439 | ), | |
440 | sm::make_counter( | |
441 | "successful_read_extent_bytes", | |
442 | stats.success_read_efforts.read.bytes, | |
443 | sm::description("extent bytes of successful read transactions") | |
444 | ), | |
445 | } | |
446 | ); | |
447 | } | |
448 | ||
449 | /** | |
450 | * Cached extents (including placeholders) | |
451 | * | |
452 | * Dirty extents | |
453 | */ | |
454 | metrics.add_group( | |
455 | "cache", | |
456 | { | |
457 | sm::make_counter( | |
458 | "cached_extents", | |
459 | [this] { | |
460 | return extents.size(); | |
461 | }, | |
462 | sm::description("total number of cached extents") | |
463 | ), | |
464 | sm::make_counter( | |
465 | "cached_extent_bytes", | |
466 | [this] { | |
467 | return extents.get_bytes(); | |
468 | }, | |
469 | sm::description("total bytes of cached extents") | |
470 | ), | |
471 | sm::make_counter( | |
472 | "dirty_extents", | |
473 | [this] { | |
474 | return dirty.size(); | |
475 | }, | |
476 | sm::description("total number of dirty extents") | |
477 | ), | |
478 | sm::make_counter( | |
479 | "dirty_extent_bytes", | |
480 | stats.dirty_bytes, | |
481 | sm::description("total bytes of dirty extents") | |
482 | ), | |
483 | sm::make_counter( | |
484 | "cache_lru_size_bytes", | |
485 | [this] { | |
486 | return lru.get_current_contents_bytes(); | |
487 | }, | |
488 | sm::description("total bytes pinned by the lru") | |
489 | ), | |
490 | sm::make_counter( | |
491 | "cache_lru_size_extents", | |
492 | [this] { | |
493 | return lru.get_current_contents_extents(); | |
494 | }, | |
495 | sm::description("total extents pinned by the lru") | |
496 | ), | |
497 | } | |
498 | ); | |
499 | ||
500 | /** | |
501 | * tree stats | |
502 | */ | |
503 | auto tree_label = sm::label("tree"); | |
504 | auto onode_label = tree_label("ONODE"); | |
1e59de90 | 505 | auto omap_label = tree_label("OMAP"); |
20effc67 | 506 | auto lba_label = tree_label("LBA"); |
1e59de90 TL |
507 | auto backref_label = tree_label("BACKREF"); |
508 | auto register_tree_metrics = [&labels_by_src, &onode_label, &omap_label, this]( | |
20effc67 TL |
509 | const sm::label_instance& tree_label, |
510 | uint64_t& tree_depth, | |
1e59de90 | 511 | int64_t& tree_extents_num, |
20effc67 TL |
512 | counter_by_src_t<tree_efforts_t>& committed_tree_efforts, |
513 | counter_by_src_t<tree_efforts_t>& invalidated_tree_efforts) { | |
514 | metrics.add_group( | |
515 | "cache", | |
516 | { | |
517 | sm::make_counter( | |
518 | "tree_depth", | |
519 | tree_depth, | |
520 | sm::description("the depth of tree"), | |
521 | {tree_label} | |
522 | ), | |
1e59de90 TL |
523 | sm::make_counter( |
524 | "tree_extents_num", | |
525 | tree_extents_num, | |
526 | sm::description("num of extents of the tree"), | |
527 | {tree_label} | |
528 | ) | |
20effc67 TL |
529 | } |
530 | ); | |
531 | for (auto& [src, src_label] : labels_by_src) { | |
532 | if (src == src_t::READ) { | |
533 | // READ transaction won't contain any tree inserts and erases | |
534 | continue; | |
535 | } | |
1e59de90 TL |
536 | if (is_background_transaction(src) && |
537 | (tree_label == onode_label || | |
538 | tree_label == omap_label)) { | |
539 | // CLEANER transaction won't contain any onode/omap tree operations | |
20effc67 TL |
540 | continue; |
541 | } | |
542 | auto& committed_efforts = get_by_src(committed_tree_efforts, src); | |
543 | auto& invalidated_efforts = get_by_src(invalidated_tree_efforts, src); | |
544 | metrics.add_group( | |
545 | "cache", | |
546 | { | |
547 | sm::make_counter( | |
548 | "tree_inserts_committed", | |
549 | committed_efforts.num_inserts, | |
550 | sm::description("total number of committed insert operations"), | |
551 | {tree_label, src_label} | |
552 | ), | |
553 | sm::make_counter( | |
554 | "tree_erases_committed", | |
555 | committed_efforts.num_erases, | |
556 | sm::description("total number of committed erase operations"), | |
557 | {tree_label, src_label} | |
558 | ), | |
1e59de90 TL |
559 | sm::make_counter( |
560 | "tree_updates_committed", | |
561 | committed_efforts.num_updates, | |
562 | sm::description("total number of committed update operations"), | |
563 | {tree_label, src_label} | |
564 | ), | |
20effc67 TL |
565 | sm::make_counter( |
566 | "tree_inserts_invalidated", | |
567 | invalidated_efforts.num_inserts, | |
568 | sm::description("total number of invalidated insert operations"), | |
569 | {tree_label, src_label} | |
570 | ), | |
571 | sm::make_counter( | |
572 | "tree_erases_invalidated", | |
573 | invalidated_efforts.num_erases, | |
574 | sm::description("total number of invalidated erase operations"), | |
575 | {tree_label, src_label} | |
576 | ), | |
1e59de90 TL |
577 | sm::make_counter( |
578 | "tree_updates_invalidated", | |
579 | invalidated_efforts.num_updates, | |
580 | sm::description("total number of invalidated update operations"), | |
581 | {tree_label, src_label} | |
582 | ), | |
20effc67 TL |
583 | } |
584 | ); | |
585 | } | |
586 | }; | |
587 | register_tree_metrics( | |
588 | onode_label, | |
589 | stats.onode_tree_depth, | |
1e59de90 | 590 | stats.onode_tree_extents_num, |
20effc67 TL |
591 | stats.committed_onode_tree_efforts, |
592 | stats.invalidated_onode_tree_efforts); | |
1e59de90 TL |
593 | register_tree_metrics( |
594 | omap_label, | |
595 | stats.omap_tree_depth, | |
596 | stats.omap_tree_extents_num, | |
597 | stats.committed_omap_tree_efforts, | |
598 | stats.invalidated_omap_tree_efforts); | |
20effc67 TL |
599 | register_tree_metrics( |
600 | lba_label, | |
601 | stats.lba_tree_depth, | |
1e59de90 | 602 | stats.lba_tree_extents_num, |
20effc67 TL |
603 | stats.committed_lba_tree_efforts, |
604 | stats.invalidated_lba_tree_efforts); | |
1e59de90 TL |
605 | register_tree_metrics( |
606 | backref_label, | |
607 | stats.backref_tree_depth, | |
608 | stats.backref_tree_extents_num, | |
609 | stats.committed_backref_tree_efforts, | |
610 | stats.invalidated_backref_tree_efforts); | |
20effc67 TL |
611 | |
612 | /** | |
613 | * conflict combinations | |
614 | */ | |
615 | auto srcs_label = sm::label("srcs"); | |
616 | auto num_srcs = static_cast<std::size_t>(Transaction::src_t::MAX); | |
617 | std::size_t srcs_index = 0; | |
618 | for (uint8_t src2_int = 0; src2_int < num_srcs; ++src2_int) { | |
619 | auto src2 = static_cast<Transaction::src_t>(src2_int); | |
620 | for (uint8_t src1_int = src2_int; src1_int < num_srcs; ++src1_int) { | |
621 | ++srcs_index; | |
622 | auto src1 = static_cast<Transaction::src_t>(src1_int); | |
623 | // impossible combinations | |
624 | // should be consistent with checks in account_conflict() | |
625 | if ((src1 == Transaction::src_t::READ && | |
626 | src2 == Transaction::src_t::READ) || | |
1e59de90 TL |
627 | (src1 == Transaction::src_t::TRIM_DIRTY && |
628 | src2 == Transaction::src_t::TRIM_DIRTY) || | |
629 | (src1 == Transaction::src_t::CLEANER_MAIN && | |
630 | src2 == Transaction::src_t::CLEANER_MAIN) || | |
631 | (src1 == Transaction::src_t::CLEANER_COLD && | |
632 | src2 == Transaction::src_t::CLEANER_COLD) || | |
633 | (src1 == Transaction::src_t::TRIM_ALLOC && | |
634 | src2 == Transaction::src_t::TRIM_ALLOC)) { | |
20effc67 TL |
635 | continue; |
636 | } | |
637 | std::ostringstream oss; | |
638 | oss << src1 << "," << src2; | |
639 | metrics.add_group( | |
640 | "cache", | |
641 | { | |
642 | sm::make_counter( | |
643 | "trans_srcs_invalidated", | |
644 | stats.trans_conflicts_by_srcs[srcs_index - 1], | |
645 | sm::description("total number conflicted transactions by src pair"), | |
646 | {srcs_label(oss.str())} | |
647 | ), | |
648 | } | |
649 | ); | |
650 | } | |
651 | } | |
652 | assert(srcs_index == NUM_SRC_COMB); | |
653 | srcs_index = 0; | |
654 | for (uint8_t src_int = 0; src_int < num_srcs; ++src_int) { | |
655 | ++srcs_index; | |
656 | auto src = static_cast<Transaction::src_t>(src_int); | |
657 | std::ostringstream oss; | |
658 | oss << "UNKNOWN," << src; | |
659 | metrics.add_group( | |
660 | "cache", | |
661 | { | |
662 | sm::make_counter( | |
663 | "trans_srcs_invalidated", | |
664 | stats.trans_conflicts_by_unknown[srcs_index - 1], | |
665 | sm::description("total number conflicted transactions by src pair"), | |
666 | {srcs_label(oss.str())} | |
667 | ), | |
668 | } | |
669 | ); | |
f67539c2 | 670 | } |
1e59de90 TL |
671 | |
672 | /** | |
673 | * rewrite version | |
674 | */ | |
675 | metrics.add_group( | |
676 | "cache", | |
677 | { | |
678 | sm::make_counter( | |
679 | "version_count_dirty", | |
680 | stats.committed_dirty_version.num, | |
681 | sm::description("total number of rewrite-dirty extents") | |
682 | ), | |
683 | sm::make_counter( | |
684 | "version_sum_dirty", | |
685 | stats.committed_dirty_version.version, | |
686 | sm::description("sum of the version from rewrite-dirty extents") | |
687 | ), | |
688 | sm::make_counter( | |
689 | "version_count_reclaim", | |
690 | stats.committed_reclaim_version.num, | |
691 | sm::description("total number of rewrite-reclaim extents") | |
692 | ), | |
693 | sm::make_counter( | |
694 | "version_sum_reclaim", | |
695 | stats.committed_reclaim_version.version, | |
696 | sm::description("sum of the version from rewrite-reclaim extents") | |
697 | ), | |
698 | } | |
699 | ); | |
f67539c2 TL |
700 | } |
701 | ||
1e59de90 TL |
702 | void Cache::add_extent( |
703 | CachedExtentRef ref, | |
704 | const Transaction::src_t* p_src=nullptr) | |
f67539c2 TL |
705 | { |
706 | assert(ref->is_valid()); | |
1e59de90 TL |
707 | assert(ref->user_hint == PLACEMENT_HINT_NULL); |
708 | assert(ref->rewrite_generation == NULL_GENERATION); | |
f67539c2 | 709 | extents.insert(*ref); |
f67539c2 TL |
710 | if (ref->is_dirty()) { |
711 | add_to_dirty(ref); | |
712 | } else { | |
1e59de90 | 713 | touch_extent(*ref, p_src); |
f67539c2 | 714 | } |
f67539c2 TL |
715 | } |
716 | ||
717 | void Cache::mark_dirty(CachedExtentRef ref) | |
718 | { | |
719 | if (ref->is_dirty()) { | |
720 | assert(ref->primary_ref_list_hook.is_linked()); | |
721 | return; | |
722 | } | |
723 | ||
20effc67 | 724 | lru.remove_from_lru(*ref); |
f67539c2 | 725 | ref->state = CachedExtent::extent_state_t::DIRTY; |
1e59de90 | 726 | add_to_dirty(ref); |
f67539c2 TL |
727 | } |
728 | ||
729 | void Cache::add_to_dirty(CachedExtentRef ref) | |
730 | { | |
1e59de90 | 731 | assert(ref->is_dirty()); |
f67539c2 | 732 | assert(!ref->primary_ref_list_hook.is_linked()); |
1e59de90 | 733 | ceph_assert(ref->get_modify_time() != NULL_TIME); |
f67539c2 TL |
734 | intrusive_ptr_add_ref(&*ref); |
735 | dirty.push_back(*ref); | |
20effc67 | 736 | stats.dirty_bytes += ref->get_length(); |
f67539c2 TL |
737 | } |
738 | ||
20effc67 | 739 | void Cache::remove_from_dirty(CachedExtentRef ref) |
f67539c2 | 740 | { |
f67539c2 TL |
741 | if (ref->is_dirty()) { |
742 | ceph_assert(ref->primary_ref_list_hook.is_linked()); | |
20effc67 | 743 | stats.dirty_bytes -= ref->get_length(); |
f67539c2 TL |
744 | dirty.erase(dirty.s_iterator_to(*ref)); |
745 | intrusive_ptr_release(&*ref); | |
746 | } else { | |
747 | ceph_assert(!ref->primary_ref_list_hook.is_linked()); | |
748 | } | |
749 | } | |
750 | ||
20effc67 | 751 | void Cache::remove_extent(CachedExtentRef ref) |
f67539c2 | 752 | { |
20effc67 TL |
753 | assert(ref->is_valid()); |
754 | if (ref->is_dirty()) { | |
755 | remove_from_dirty(ref); | |
1e59de90 | 756 | } else if (!ref->is_placeholder()) { |
20effc67 TL |
757 | lru.remove_from_lru(*ref); |
758 | } | |
759 | extents.erase(*ref); | |
760 | } | |
761 | ||
762 | void Cache::commit_retire_extent( | |
763 | Transaction& t, | |
764 | CachedExtentRef ref) | |
765 | { | |
1e59de90 | 766 | remove_extent(ref); |
20effc67 | 767 | |
1e59de90 | 768 | ref->dirty_from_or_retired_at = JOURNAL_SEQ_NULL; |
20effc67 | 769 | invalidate_extent(t, *ref); |
20effc67 TL |
770 | } |
771 | ||
772 | void Cache::commit_replace_extent( | |
773 | Transaction& t, | |
774 | CachedExtentRef next, | |
775 | CachedExtentRef prev) | |
776 | { | |
1e59de90 | 777 | assert(next->is_dirty()); |
f67539c2 TL |
778 | assert(next->get_paddr() == prev->get_paddr()); |
779 | assert(next->version == prev->version + 1); | |
780 | extents.replace(*next, *prev); | |
781 | ||
20effc67 | 782 | if (prev->get_type() == extent_types_t::ROOT) { |
aee94f69 | 783 | assert(prev->is_stable_clean() |
20effc67 TL |
784 | || prev->primary_ref_list_hook.is_linked()); |
785 | if (prev->is_dirty()) { | |
786 | stats.dirty_bytes -= prev->get_length(); | |
787 | dirty.erase(dirty.s_iterator_to(*prev)); | |
788 | intrusive_ptr_release(&*prev); | |
789 | } | |
790 | add_to_dirty(next); | |
791 | } else if (prev->is_dirty()) { | |
792 | assert(prev->get_dirty_from() == next->get_dirty_from()); | |
793 | assert(prev->primary_ref_list_hook.is_linked()); | |
f67539c2 TL |
794 | auto prev_it = dirty.iterator_to(*prev); |
795 | dirty.insert(prev_it, *next); | |
796 | dirty.erase(prev_it); | |
797 | intrusive_ptr_release(&*prev); | |
798 | intrusive_ptr_add_ref(&*next); | |
799 | } else { | |
20effc67 | 800 | lru.remove_from_lru(*prev); |
f67539c2 TL |
801 | add_to_dirty(next); |
802 | } | |
20effc67 | 803 | |
1e59de90 | 804 | next->on_replace_prior(t); |
20effc67 TL |
805 | invalidate_extent(t, *prev); |
806 | } | |
807 | ||
808 | void Cache::invalidate_extent( | |
809 | Transaction& t, | |
810 | CachedExtent& extent) | |
811 | { | |
1e59de90 TL |
812 | if (!extent.may_conflict()) { |
813 | assert(extent.transactions.empty()); | |
814 | extent.set_invalid(t); | |
815 | return; | |
816 | } | |
817 | ||
818 | LOG_PREFIX(Cache::invalidate_extent); | |
819 | bool do_conflict_log = true; | |
20effc67 TL |
820 | for (auto &&i: extent.transactions) { |
821 | if (!i.t->conflicted) { | |
1e59de90 TL |
822 | if (do_conflict_log) { |
823 | SUBDEBUGT(seastore_t, "conflict begin -- {}", t, extent); | |
824 | do_conflict_log = false; | |
825 | } | |
20effc67 TL |
826 | assert(!i.t->is_weak()); |
827 | account_conflict(t.get_src(), i.t->get_src()); | |
828 | mark_transaction_conflicted(*i.t, extent); | |
829 | } | |
830 | } | |
1e59de90 | 831 | extent.set_invalid(t); |
20effc67 TL |
832 | } |
833 | ||
834 | void Cache::mark_transaction_conflicted( | |
835 | Transaction& t, CachedExtent& conflicting_extent) | |
836 | { | |
837 | LOG_PREFIX(Cache::mark_transaction_conflicted); | |
1e59de90 | 838 | SUBTRACET(seastore_t, "", t); |
20effc67 | 839 | assert(!t.conflicted); |
20effc67 TL |
840 | t.conflicted = true; |
841 | ||
842 | auto& efforts = get_by_src(stats.invalidated_efforts_by_src, | |
843 | t.get_src()); | |
aee94f69 | 844 | ++efforts.total_trans_invalidated; |
20effc67 TL |
845 | |
846 | auto& counter = get_by_ext(efforts.num_trans_invalidated, | |
847 | conflicting_extent.get_type()); | |
848 | ++counter; | |
849 | ||
1e59de90 | 850 | io_stat_t read_stat; |
20effc67 | 851 | for (auto &i: t.read_set) { |
1e59de90 | 852 | read_stat.increment(i.ref->get_length()); |
20effc67 | 853 | } |
1e59de90 | 854 | efforts.read.increment_stat(read_stat); |
20effc67 TL |
855 | |
856 | if (t.get_src() != Transaction::src_t::READ) { | |
1e59de90 | 857 | io_stat_t retire_stat; |
20effc67 | 858 | for (auto &i: t.retired_set) { |
1e59de90 | 859 | retire_stat.increment(i->get_length()); |
20effc67 | 860 | } |
1e59de90 | 861 | efforts.retire.increment_stat(retire_stat); |
20effc67 | 862 | |
1e59de90 TL |
863 | auto& fresh_stat = t.get_fresh_block_stats(); |
864 | efforts.fresh.increment_stat(fresh_stat); | |
20effc67 | 865 | |
1e59de90 | 866 | io_stat_t delta_stat; |
20effc67 TL |
867 | for (auto &i: t.mutated_block_list) { |
868 | if (!i->is_valid()) { | |
869 | continue; | |
870 | } | |
20effc67 | 871 | efforts.mutate.increment(i->get_length()); |
1e59de90 TL |
872 | delta_stat.increment(i->get_delta().length()); |
873 | } | |
874 | efforts.mutate_delta_bytes += delta_stat.bytes; | |
875 | ||
876 | for (auto &i: t.pre_alloc_list) { | |
877 | epm.mark_space_free(i->get_paddr(), i->get_length()); | |
20effc67 TL |
878 | } |
879 | ||
880 | auto& ool_stats = t.get_ool_write_stats(); | |
1e59de90 | 881 | efforts.fresh_ool_written.increment_stat(ool_stats.extents); |
20effc67 | 882 | efforts.num_ool_records += ool_stats.num_records; |
1e59de90 TL |
883 | auto ool_record_bytes = (ool_stats.md_bytes + ool_stats.get_data_bytes()); |
884 | efforts.ool_record_bytes += ool_record_bytes; | |
20effc67 | 885 | |
1e59de90 TL |
886 | if (is_background_transaction(t.get_src())) { |
887 | // CLEANER transaction won't contain any onode/omap tree operations | |
20effc67 | 888 | assert(t.onode_tree_stats.is_clear()); |
1e59de90 | 889 | assert(t.omap_tree_stats.is_clear()); |
20effc67 TL |
890 | } else { |
891 | get_by_src(stats.invalidated_onode_tree_efforts, t.get_src() | |
892 | ).increment(t.onode_tree_stats); | |
1e59de90 TL |
893 | get_by_src(stats.invalidated_omap_tree_efforts, t.get_src() |
894 | ).increment(t.omap_tree_stats); | |
20effc67 TL |
895 | } |
896 | ||
897 | get_by_src(stats.invalidated_lba_tree_efforts, t.get_src() | |
898 | ).increment(t.lba_tree_stats); | |
1e59de90 TL |
899 | get_by_src(stats.invalidated_backref_tree_efforts, t.get_src() |
900 | ).increment(t.backref_tree_stats); | |
901 | ||
902 | SUBDEBUGT(seastore_t, | |
903 | "discard {} read, {} fresh, {} delta, {} retire, {}({}B) ool-records", | |
904 | t, | |
905 | read_stat, | |
906 | fresh_stat, | |
907 | delta_stat, | |
908 | retire_stat, | |
909 | ool_stats.num_records, | |
910 | ool_record_bytes); | |
20effc67 TL |
911 | } else { |
912 | // read transaction won't have non-read efforts | |
913 | assert(t.retired_set.empty()); | |
914 | assert(t.get_fresh_block_stats().is_clear()); | |
915 | assert(t.mutated_block_list.empty()); | |
916 | assert(t.get_ool_write_stats().is_clear()); | |
917 | assert(t.onode_tree_stats.is_clear()); | |
1e59de90 | 918 | assert(t.omap_tree_stats.is_clear()); |
20effc67 | 919 | assert(t.lba_tree_stats.is_clear()); |
1e59de90 TL |
920 | assert(t.backref_tree_stats.is_clear()); |
921 | SUBDEBUGT(seastore_t, "discard {} read", t, read_stat); | |
20effc67 TL |
922 | } |
923 | } | |
924 | ||
925 | void Cache::on_transaction_destruct(Transaction& t) | |
926 | { | |
927 | LOG_PREFIX(Cache::on_transaction_destruct); | |
1e59de90 | 928 | SUBTRACET(seastore_t, "", t); |
20effc67 | 929 | if (t.get_src() == Transaction::src_t::READ && |
1e59de90 TL |
930 | t.conflicted == false) { |
931 | io_stat_t read_stat; | |
20effc67 | 932 | for (auto &i: t.read_set) { |
1e59de90 | 933 | read_stat.increment(i.ref->get_length()); |
20effc67 | 934 | } |
1e59de90 TL |
935 | SUBDEBUGT(seastore_t, "done {} read", t, read_stat); |
936 | ||
937 | if (!t.is_weak()) { | |
938 | // exclude weak transaction as it is impossible to conflict | |
939 | ++stats.success_read_efforts.num_trans; | |
940 | stats.success_read_efforts.read.increment_stat(read_stat); | |
941 | } | |
942 | ||
20effc67 TL |
943 | // read transaction won't have non-read efforts |
944 | assert(t.retired_set.empty()); | |
1e59de90 | 945 | assert(t.get_fresh_block_stats().is_clear()); |
20effc67 TL |
946 | assert(t.mutated_block_list.empty()); |
947 | assert(t.onode_tree_stats.is_clear()); | |
1e59de90 | 948 | assert(t.omap_tree_stats.is_clear()); |
20effc67 | 949 | assert(t.lba_tree_stats.is_clear()); |
1e59de90 | 950 | assert(t.backref_tree_stats.is_clear()); |
20effc67 | 951 | } |
f67539c2 TL |
952 | } |
953 | ||
954 | CachedExtentRef Cache::alloc_new_extent_by_type( | |
1e59de90 TL |
955 | Transaction &t, ///< [in, out] current transaction |
956 | extent_types_t type, ///< [in] type tag | |
957 | extent_len_t length, ///< [in] length | |
958 | placement_hint_t hint, ///< [in] user hint | |
959 | rewrite_gen_t gen ///< [in] rewrite generation | |
f67539c2 TL |
960 | ) |
961 | { | |
1e59de90 TL |
962 | LOG_PREFIX(Cache::alloc_new_extent_by_type); |
963 | SUBDEBUGT(seastore_cache, "allocate {} {}B, hint={}, gen={}", | |
964 | t, type, length, hint, rewrite_gen_printer_t{gen}); | |
f67539c2 TL |
965 | switch (type) { |
966 | case extent_types_t::ROOT: | |
1e59de90 | 967 | ceph_assert(0 == "ROOT is never directly alloc'd"); |
f67539c2 TL |
968 | return CachedExtentRef(); |
969 | case extent_types_t::LADDR_INTERNAL: | |
1e59de90 | 970 | return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, hint, gen); |
f67539c2 | 971 | case extent_types_t::LADDR_LEAF: |
1e59de90 TL |
972 | return alloc_new_extent<lba_manager::btree::LBALeafNode>( |
973 | t, length, hint, gen); | |
20effc67 | 974 | case extent_types_t::ONODE_BLOCK_STAGED: |
1e59de90 | 975 | return alloc_new_extent<onode::SeastoreNodeExtent>(t, length, hint, gen); |
20effc67 | 976 | case extent_types_t::OMAP_INNER: |
1e59de90 | 977 | return alloc_new_extent<omap_manager::OMapInnerNode>(t, length, hint, gen); |
20effc67 | 978 | case extent_types_t::OMAP_LEAF: |
1e59de90 | 979 | return alloc_new_extent<omap_manager::OMapLeafNode>(t, length, hint, gen); |
20effc67 | 980 | case extent_types_t::COLL_BLOCK: |
1e59de90 | 981 | return alloc_new_extent<collection_manager::CollectionNode>(t, length, hint, gen); |
20effc67 | 982 | case extent_types_t::OBJECT_DATA_BLOCK: |
1e59de90 | 983 | return alloc_new_extent<ObjectDataBlock>(t, length, hint, gen); |
20effc67 TL |
984 | case extent_types_t::RETIRED_PLACEHOLDER: |
985 | ceph_assert(0 == "impossible"); | |
986 | return CachedExtentRef(); | |
f67539c2 | 987 | case extent_types_t::TEST_BLOCK: |
1e59de90 | 988 | return alloc_new_extent<TestBlock>(t, length, hint, gen); |
f67539c2 | 989 | case extent_types_t::TEST_BLOCK_PHYSICAL: |
1e59de90 | 990 | return alloc_new_extent<TestBlockPhysical>(t, length, hint, gen); |
f67539c2 TL |
991 | case extent_types_t::NONE: { |
992 | ceph_assert(0 == "NONE is an invalid extent type"); | |
993 | return CachedExtentRef(); | |
994 | } | |
995 | default: | |
996 | ceph_assert(0 == "impossible"); | |
997 | return CachedExtentRef(); | |
998 | } | |
999 | } | |
1000 | ||
1001 | CachedExtentRef Cache::duplicate_for_write( | |
1002 | Transaction &t, | |
1003 | CachedExtentRef i) { | |
20effc67 | 1004 | LOG_PREFIX(Cache::duplicate_for_write); |
aee94f69 TL |
1005 | assert(i->is_fully_loaded()); |
1006 | ||
1e59de90 TL |
1007 | if (i->is_mutable()) |
1008 | return i; | |
1009 | ||
1010 | if (i->is_exist_clean()) { | |
1011 | i->version++; | |
1012 | i->state = CachedExtent::extent_state_t::EXIST_MUTATION_PENDING; | |
1013 | i->last_committed_crc = i->get_crc32c(); | |
aee94f69 TL |
1014 | // deepcopy the buffer of exist clean extent beacuse it shares |
1015 | // buffer with original clean extent. | |
1016 | auto bp = i->get_bptr(); | |
1017 | auto nbp = ceph::bufferptr(bp.c_str(), bp.length()); | |
1018 | i->set_bptr(std::move(nbp)); | |
1019 | ||
1e59de90 TL |
1020 | t.add_mutated_extent(i); |
1021 | DEBUGT("duplicate existing extent {}", t, *i); | |
f67539c2 | 1022 | return i; |
1e59de90 | 1023 | } |
f67539c2 | 1024 | |
1e59de90 TL |
1025 | auto ret = i->duplicate_for_write(t); |
1026 | ret->pending_for_transaction = t.get_trans_id(); | |
20effc67 | 1027 | ret->prior_instance = i; |
1e59de90 TL |
1028 | // duplicate_for_write won't occur after ool write finished |
1029 | assert(!i->prior_poffset); | |
1030 | auto [iter, inserted] = i->mutation_pendings.insert(*ret); | |
1031 | ceph_assert(inserted); | |
20effc67 | 1032 | t.add_mutated_extent(ret); |
f67539c2 | 1033 | if (ret->get_type() == extent_types_t::ROOT) { |
f67539c2 TL |
1034 | t.root = ret->cast<RootBlock>(); |
1035 | } else { | |
1036 | ret->last_committed_crc = i->last_committed_crc; | |
f67539c2 TL |
1037 | } |
1038 | ||
1039 | ret->version++; | |
1040 | ret->state = CachedExtent::extent_state_t::MUTATION_PENDING; | |
20effc67 | 1041 | DEBUGT("{} -> {}", t, *i, *ret); |
f67539c2 TL |
1042 | return ret; |
1043 | } | |
1044 | ||
1e59de90 TL |
1045 | record_t Cache::prepare_record( |
1046 | Transaction &t, | |
1047 | const journal_seq_t &journal_head, | |
1048 | const journal_seq_t &journal_dirty_tail) | |
f67539c2 | 1049 | { |
20effc67 | 1050 | LOG_PREFIX(Cache::prepare_record); |
1e59de90 | 1051 | SUBTRACET(seastore_t, "enter", t); |
20effc67 TL |
1052 | |
1053 | auto trans_src = t.get_src(); | |
1054 | assert(!t.is_weak()); | |
1055 | assert(trans_src != Transaction::src_t::READ); | |
1056 | ||
1057 | auto& efforts = get_by_src(stats.committed_efforts_by_src, | |
1058 | trans_src); | |
1059 | ||
1060 | // Should be valid due to interruptible future | |
1e59de90 | 1061 | io_stat_t read_stat; |
f67539c2 | 1062 | for (auto &i: t.read_set) { |
20effc67 | 1063 | if (!i.ref->is_valid()) { |
1e59de90 TL |
1064 | SUBERRORT(seastore_t, |
1065 | "read_set got invalid extent, aborting -- {}", t, *i.ref); | |
20effc67 TL |
1066 | ceph_abort("no invalid extent allowed in transactions' read_set"); |
1067 | } | |
1068 | get_by_ext(efforts.read_by_ext, | |
1069 | i.ref->get_type()).increment(i.ref->get_length()); | |
1e59de90 | 1070 | read_stat.increment(i.ref->get_length()); |
f67539c2 | 1071 | } |
20effc67 | 1072 | t.read_set.clear(); |
f67539c2 TL |
1073 | t.write_set.clear(); |
1074 | ||
1e59de90 TL |
1075 | record_t record(trans_src); |
1076 | auto commit_time = seastar::lowres_system_clock::now(); | |
1077 | ||
f67539c2 TL |
1078 | // Add new copy of mutated blocks, set_io_wait to block until written |
1079 | record.deltas.reserve(t.mutated_block_list.size()); | |
1e59de90 | 1080 | io_stat_t delta_stat; |
f67539c2 TL |
1081 | for (auto &i: t.mutated_block_list) { |
1082 | if (!i->is_valid()) { | |
1e59de90 | 1083 | DEBUGT("invalid mutated extent -- {}", t, *i); |
f67539c2 TL |
1084 | continue; |
1085 | } | |
1e59de90 TL |
1086 | assert(i->is_exist_mutation_pending() || |
1087 | i->prior_instance); | |
20effc67 TL |
1088 | get_by_ext(efforts.mutate_by_ext, |
1089 | i->get_type()).increment(i->get_length()); | |
f67539c2 | 1090 | |
1e59de90 TL |
1091 | auto delta_bl = i->get_delta(); |
1092 | auto delta_length = delta_bl.length(); | |
1093 | i->set_modify_time(commit_time); | |
1094 | DEBUGT("mutated extent with {}B delta -- {}", | |
1095 | t, delta_length, *i); | |
1096 | if (!i->is_exist_mutation_pending()) { | |
1097 | DEBUGT("commit replace extent ... -- {}, prior={}", | |
1098 | t, *i, *i->prior_instance); | |
1099 | // extent with EXIST_MUTATION_PENDING doesn't have | |
1100 | // prior_instance field so skip these extents. | |
1101 | // the existing extents should be added into Cache | |
1102 | // during complete_commit to sync with gc transaction. | |
1103 | commit_replace_extent(t, i, i->prior_instance); | |
1104 | } | |
f67539c2 TL |
1105 | |
1106 | i->prepare_write(); | |
1107 | i->set_io_wait(); | |
aee94f69 | 1108 | i->prepare_commit(); |
f67539c2 TL |
1109 | |
1110 | assert(i->get_version() > 0); | |
1111 | auto final_crc = i->get_crc32c(); | |
20effc67 | 1112 | if (i->get_type() == extent_types_t::ROOT) { |
1e59de90 TL |
1113 | SUBTRACET(seastore_t, "writing out root delta {}B -- {}", |
1114 | t, delta_length, *i); | |
1115 | assert(t.root == i); | |
20effc67 | 1116 | root = t.root; |
20effc67 TL |
1117 | record.push_back( |
1118 | delta_info_t{ | |
1119 | extent_types_t::ROOT, | |
1e59de90 | 1120 | P_ADDR_NULL, |
20effc67 TL |
1121 | L_ADDR_NULL, |
1122 | 0, | |
1123 | 0, | |
1124 | 0, | |
1125 | t.root->get_version() - 1, | |
1e59de90 TL |
1126 | MAX_SEG_SEQ, |
1127 | segment_type_t::NULL_SEG, | |
1128 | std::move(delta_bl) | |
20effc67 TL |
1129 | }); |
1130 | } else { | |
1e59de90 TL |
1131 | auto sseq = NULL_SEG_SEQ; |
1132 | auto stype = segment_type_t::NULL_SEG; | |
1133 | ||
1134 | // FIXME: This is specific to the segmented implementation | |
1135 | if (i->get_paddr().get_addr_type() == paddr_types_t::SEGMENT) { | |
1136 | auto sid = i->get_paddr().as_seg_paddr().get_segment_id(); | |
1137 | auto sinfo = get_segment_info(sid); | |
1138 | if (sinfo) { | |
1139 | sseq = sinfo->seq; | |
1140 | stype = sinfo->type; | |
1141 | } | |
1142 | } | |
1143 | ||
20effc67 TL |
1144 | record.push_back( |
1145 | delta_info_t{ | |
1146 | i->get_type(), | |
1147 | i->get_paddr(), | |
1148 | (i->is_logical() | |
1149 | ? i->cast<LogicalCachedExtent>()->get_laddr() | |
1150 | : L_ADDR_NULL), | |
1151 | i->last_committed_crc, | |
1152 | final_crc, | |
1e59de90 | 1153 | i->get_length(), |
20effc67 | 1154 | i->get_version() - 1, |
1e59de90 TL |
1155 | sseq, |
1156 | stype, | |
1157 | std::move(delta_bl) | |
20effc67 TL |
1158 | }); |
1159 | i->last_committed_crc = final_crc; | |
1160 | } | |
20effc67 TL |
1161 | assert(delta_length); |
1162 | get_by_ext(efforts.delta_bytes_by_ext, | |
1163 | i->get_type()) += delta_length; | |
1e59de90 | 1164 | delta_stat.increment(delta_length); |
f67539c2 TL |
1165 | } |
1166 | ||
1167 | // Transaction is now a go, set up in-memory cache state | |
1168 | // invalidate now invalid blocks | |
1e59de90 TL |
1169 | io_stat_t retire_stat; |
1170 | std::vector<alloc_delta_t> alloc_deltas; | |
1171 | alloc_delta_t rel_delta; | |
1172 | rel_delta.op = alloc_delta_t::op_types_t::CLEAR; | |
f67539c2 | 1173 | for (auto &i: t.retired_set) { |
20effc67 TL |
1174 | get_by_ext(efforts.retire_by_ext, |
1175 | i->get_type()).increment(i->get_length()); | |
1e59de90 TL |
1176 | retire_stat.increment(i->get_length()); |
1177 | DEBUGT("retired and remove extent -- {}", t, *i); | |
20effc67 | 1178 | commit_retire_extent(t, i); |
1e59de90 TL |
1179 | if (is_backref_mapped_extent_node(i) |
1180 | || is_retired_placeholder(i->get_type())) { | |
1181 | rel_delta.alloc_blk_ranges.emplace_back( | |
1182 | i->get_paddr(), | |
1183 | L_ADDR_NULL, | |
1184 | i->get_length(), | |
1185 | i->get_type()); | |
20effc67 | 1186 | } |
f67539c2 | 1187 | } |
1e59de90 | 1188 | alloc_deltas.emplace_back(std::move(rel_delta)); |
f67539c2 | 1189 | |
20effc67 | 1190 | record.extents.reserve(t.inline_block_list.size()); |
1e59de90 TL |
1191 | io_stat_t fresh_stat; |
1192 | io_stat_t fresh_invalid_stat; | |
1193 | alloc_delta_t alloc_delta; | |
1194 | alloc_delta.op = alloc_delta_t::op_types_t::SET; | |
20effc67 TL |
1195 | for (auto &i: t.inline_block_list) { |
1196 | if (!i->is_valid()) { | |
1e59de90 TL |
1197 | DEBUGT("invalid fresh inline extent -- {}", t, *i); |
1198 | fresh_invalid_stat.increment(i->get_length()); | |
20effc67 TL |
1199 | get_by_ext(efforts.fresh_invalid_by_ext, |
1200 | i->get_type()).increment(i->get_length()); | |
1201 | } else { | |
1e59de90 | 1202 | TRACET("fresh inline extent -- {}", t, *i); |
20effc67 | 1203 | } |
1e59de90 | 1204 | fresh_stat.increment(i->get_length()); |
20effc67 TL |
1205 | get_by_ext(efforts.fresh_inline_by_ext, |
1206 | i->get_type()).increment(i->get_length()); | |
1e59de90 | 1207 | assert(i->is_inline() || i->get_paddr().is_fake()); |
20effc67 | 1208 | |
f67539c2 TL |
1209 | bufferlist bl; |
1210 | i->prepare_write(); | |
aee94f69 | 1211 | i->prepare_commit(); |
f67539c2 TL |
1212 | bl.append(i->get_bptr()); |
1213 | if (i->get_type() == extent_types_t::ROOT) { | |
1e59de90 | 1214 | ceph_assert(0 == "ROOT never gets written as a fresh block"); |
f67539c2 TL |
1215 | } |
1216 | ||
1217 | assert(bl.length() == i->get_length()); | |
1e59de90 TL |
1218 | auto modify_time = i->get_modify_time(); |
1219 | if (modify_time == NULL_TIME) { | |
1220 | modify_time = commit_time; | |
1221 | } | |
20effc67 | 1222 | record.push_back(extent_t{ |
f67539c2 TL |
1223 | i->get_type(), |
1224 | i->is_logical() | |
1225 | ? i->cast<LogicalCachedExtent>()->get_laddr() | |
20effc67 TL |
1226 | : (is_lba_node(i->get_type()) |
1227 | ? i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin | |
1228 | : L_ADDR_NULL), | |
f67539c2 | 1229 | std::move(bl) |
1e59de90 TL |
1230 | }, |
1231 | modify_time); | |
1232 | if (i->is_valid() | |
1233 | && is_backref_mapped_extent_node(i)) { | |
1234 | alloc_delta.alloc_blk_ranges.emplace_back( | |
1235 | i->get_paddr(), | |
1236 | i->is_logical() | |
1237 | ? i->cast<LogicalCachedExtent>()->get_laddr() | |
1238 | : (is_lba_node(i->get_type()) | |
1239 | ? i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin | |
1240 | : L_ADDR_NULL), | |
1241 | i->get_length(), | |
1242 | i->get_type()); | |
1243 | } | |
f67539c2 TL |
1244 | } |
1245 | ||
1e59de90 TL |
1246 | for (auto &i: t.written_ool_block_list) { |
1247 | TRACET("fresh ool extent -- {}", t, *i); | |
1248 | ceph_assert(i->is_valid()); | |
1249 | assert(!i->is_inline()); | |
1250 | get_by_ext(efforts.fresh_ool_by_ext, | |
1251 | i->get_type()).increment(i->get_length()); | |
aee94f69 | 1252 | i->prepare_commit(); |
1e59de90 TL |
1253 | if (is_backref_mapped_extent_node(i)) { |
1254 | alloc_delta.alloc_blk_ranges.emplace_back( | |
1255 | i->get_paddr(), | |
1256 | i->is_logical() | |
1257 | ? i->cast<LogicalCachedExtent>()->get_laddr() | |
1258 | : i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin, | |
1259 | i->get_length(), | |
1260 | i->get_type()); | |
1261 | } | |
1262 | } | |
1263 | ||
1264 | for (auto &i: t.existing_block_list) { | |
1265 | if (i->is_valid()) { | |
1266 | alloc_delta.alloc_blk_ranges.emplace_back( | |
1267 | i->get_paddr(), | |
1268 | i->cast<LogicalCachedExtent>()->get_laddr(), | |
1269 | i->get_length(), | |
1270 | i->get_type()); | |
1271 | } | |
1272 | } | |
1273 | alloc_deltas.emplace_back(std::move(alloc_delta)); | |
1274 | ||
1275 | for (auto b : alloc_deltas) { | |
20effc67 TL |
1276 | bufferlist bl; |
1277 | encode(b, bl); | |
1278 | delta_info_t delta; | |
1e59de90 | 1279 | delta.type = extent_types_t::ALLOC_INFO; |
20effc67 TL |
1280 | delta.bl = bl; |
1281 | record.push_back(std::move(delta)); | |
1282 | } | |
1283 | ||
1e59de90 TL |
1284 | if (is_background_transaction(trans_src)) { |
1285 | assert(journal_head != JOURNAL_SEQ_NULL); | |
1286 | assert(journal_dirty_tail != JOURNAL_SEQ_NULL); | |
1287 | journal_seq_t dirty_tail; | |
1288 | auto maybe_dirty_tail = get_oldest_dirty_from(); | |
1289 | if (!maybe_dirty_tail.has_value()) { | |
1290 | dirty_tail = journal_head; | |
1291 | SUBINFOT(seastore_t, "dirty_tail all trimmed, set to head {}, src={}", | |
1292 | t, dirty_tail, trans_src); | |
1293 | } else if (*maybe_dirty_tail == JOURNAL_SEQ_NULL) { | |
1294 | dirty_tail = journal_dirty_tail; | |
1295 | SUBINFOT(seastore_t, "dirty_tail is pending, set to {}, src={}", | |
1296 | t, dirty_tail, trans_src); | |
1297 | } else { | |
1298 | dirty_tail = *maybe_dirty_tail; | |
1299 | } | |
1300 | ceph_assert(dirty_tail != JOURNAL_SEQ_NULL); | |
1301 | journal_seq_t alloc_tail; | |
1302 | auto maybe_alloc_tail = get_oldest_backref_dirty_from(); | |
1303 | if (!maybe_alloc_tail.has_value()) { | |
1304 | // FIXME: the replay point of the allocations requires to be accurate. | |
1305 | // Setting the alloc_tail to get_journal_head() cannot skip replaying the | |
1306 | // last unnecessary record. | |
1307 | alloc_tail = journal_head; | |
1308 | SUBINFOT(seastore_t, "alloc_tail all trimmed, set to head {}, src={}", | |
1309 | t, alloc_tail, trans_src); | |
1310 | } else if (*maybe_alloc_tail == JOURNAL_SEQ_NULL) { | |
1311 | ceph_abort("impossible"); | |
1312 | } else { | |
1313 | alloc_tail = *maybe_alloc_tail; | |
1314 | } | |
1315 | ceph_assert(alloc_tail != JOURNAL_SEQ_NULL); | |
1316 | auto tails = journal_tail_delta_t{alloc_tail, dirty_tail}; | |
1317 | SUBDEBUGT(seastore_t, "update tails as delta {}", t, tails); | |
1318 | bufferlist bl; | |
1319 | encode(tails, bl); | |
1320 | delta_info_t delta; | |
1321 | delta.type = extent_types_t::JOURNAL_TAIL; | |
1322 | delta.bl = bl; | |
1323 | record.push_back(std::move(delta)); | |
20effc67 TL |
1324 | } |
1325 | ||
1326 | ceph_assert(t.get_fresh_block_stats().num == | |
1327 | t.inline_block_list.size() + | |
1e59de90 TL |
1328 | t.written_ool_block_list.size() + |
1329 | t.num_delayed_invalid_extents + | |
1330 | t.num_allocated_invalid_extents); | |
20effc67 TL |
1331 | |
1332 | auto& ool_stats = t.get_ool_write_stats(); | |
1e59de90 | 1333 | ceph_assert(ool_stats.extents.num == t.written_ool_block_list.size()); |
20effc67 TL |
1334 | |
1335 | if (record.is_empty()) { | |
1e59de90 TL |
1336 | SUBINFOT(seastore_t, |
1337 | "record to submit is empty, src={}", t, trans_src); | |
20effc67 | 1338 | assert(t.onode_tree_stats.is_clear()); |
1e59de90 | 1339 | assert(t.omap_tree_stats.is_clear()); |
20effc67 | 1340 | assert(t.lba_tree_stats.is_clear()); |
1e59de90 | 1341 | assert(t.backref_tree_stats.is_clear()); |
20effc67 TL |
1342 | assert(ool_stats.is_clear()); |
1343 | } | |
1344 | ||
1e59de90 TL |
1345 | if (record.modify_time == NULL_TIME) { |
1346 | record.modify_time = commit_time; | |
1347 | } | |
1348 | ||
1349 | SUBDEBUGT(seastore_t, | |
1350 | "commit H{} dirty_from={}, alloc_from={}, " | |
1351 | "{} read, {} fresh with {} invalid, " | |
1352 | "{} delta, {} retire, {}(md={}B, data={}B) ool-records, " | |
1353 | "{}B md, {}B data, modify_time={}", | |
1354 | t, (void*)&t.get_handle(), | |
1355 | get_oldest_dirty_from().value_or(JOURNAL_SEQ_NULL), | |
1356 | get_oldest_backref_dirty_from().value_or(JOURNAL_SEQ_NULL), | |
1357 | read_stat, | |
1358 | fresh_stat, | |
1359 | fresh_invalid_stat, | |
1360 | delta_stat, | |
1361 | retire_stat, | |
1362 | ool_stats.num_records, | |
1363 | ool_stats.md_bytes, | |
1364 | ool_stats.get_data_bytes(), | |
1365 | record.size.get_raw_mdlength(), | |
1366 | record.size.dlength, | |
1367 | sea_time_point_printer_t{record.modify_time}); | |
1368 | if (is_background_transaction(trans_src)) { | |
1369 | // background transaction won't contain any onode tree operations | |
20effc67 | 1370 | assert(t.onode_tree_stats.is_clear()); |
1e59de90 | 1371 | assert(t.omap_tree_stats.is_clear()); |
20effc67 TL |
1372 | } else { |
1373 | if (t.onode_tree_stats.depth) { | |
1374 | stats.onode_tree_depth = t.onode_tree_stats.depth; | |
1375 | } | |
1e59de90 TL |
1376 | if (t.omap_tree_stats.depth) { |
1377 | stats.omap_tree_depth = t.omap_tree_stats.depth; | |
1378 | } | |
1379 | stats.onode_tree_extents_num += t.onode_tree_stats.extents_num_delta; | |
1380 | ceph_assert(stats.onode_tree_extents_num >= 0); | |
20effc67 TL |
1381 | get_by_src(stats.committed_onode_tree_efforts, trans_src |
1382 | ).increment(t.onode_tree_stats); | |
1e59de90 TL |
1383 | stats.omap_tree_extents_num += t.omap_tree_stats.extents_num_delta; |
1384 | ceph_assert(stats.omap_tree_extents_num >= 0); | |
1385 | get_by_src(stats.committed_omap_tree_efforts, trans_src | |
1386 | ).increment(t.omap_tree_stats); | |
20effc67 TL |
1387 | } |
1388 | ||
1389 | if (t.lba_tree_stats.depth) { | |
1390 | stats.lba_tree_depth = t.lba_tree_stats.depth; | |
1391 | } | |
1e59de90 TL |
1392 | stats.lba_tree_extents_num += t.lba_tree_stats.extents_num_delta; |
1393 | ceph_assert(stats.lba_tree_extents_num >= 0); | |
20effc67 TL |
1394 | get_by_src(stats.committed_lba_tree_efforts, trans_src |
1395 | ).increment(t.lba_tree_stats); | |
1e59de90 TL |
1396 | if (t.backref_tree_stats.depth) { |
1397 | stats.backref_tree_depth = t.backref_tree_stats.depth; | |
1398 | } | |
1399 | stats.backref_tree_extents_num += t.backref_tree_stats.extents_num_delta; | |
1400 | ceph_assert(stats.backref_tree_extents_num >= 0); | |
1401 | get_by_src(stats.committed_backref_tree_efforts, trans_src | |
1402 | ).increment(t.backref_tree_stats); | |
20effc67 TL |
1403 | |
1404 | ++(efforts.num_trans); | |
1405 | efforts.num_ool_records += ool_stats.num_records; | |
1e59de90 TL |
1406 | efforts.ool_record_metadata_bytes += ool_stats.md_bytes; |
1407 | efforts.ool_record_data_bytes += ool_stats.get_data_bytes(); | |
20effc67 TL |
1408 | efforts.inline_record_metadata_bytes += |
1409 | (record.size.get_raw_mdlength() - record.get_delta_size()); | |
1410 | ||
1e59de90 TL |
1411 | auto &rewrite_version_stats = t.get_rewrite_version_stats(); |
1412 | if (trans_src == Transaction::src_t::TRIM_DIRTY) { | |
1413 | stats.committed_dirty_version.increment_stat(rewrite_version_stats); | |
1414 | } else if (trans_src == Transaction::src_t::CLEANER_MAIN || | |
1415 | trans_src == Transaction::src_t::CLEANER_COLD) { | |
1416 | stats.committed_reclaim_version.increment_stat(rewrite_version_stats); | |
1417 | } else { | |
1418 | assert(rewrite_version_stats.is_clear()); | |
1419 | } | |
1420 | ||
20effc67 | 1421 | return record; |
f67539c2 TL |
1422 | } |
1423 | ||
1e59de90 TL |
1424 | void Cache::backref_batch_update( |
1425 | std::vector<backref_entry_ref> &&list, | |
1426 | const journal_seq_t &seq) | |
1427 | { | |
1428 | LOG_PREFIX(Cache::backref_batch_update); | |
1429 | DEBUG("inserting {} entries at {}", list.size(), seq); | |
1430 | ceph_assert(seq != JOURNAL_SEQ_NULL); | |
1431 | ||
1432 | for (auto &ent : list) { | |
1433 | backref_entry_mset.insert(*ent); | |
1434 | } | |
1435 | ||
1436 | auto iter = backref_entryrefs_by_seq.find(seq); | |
1437 | if (iter == backref_entryrefs_by_seq.end()) { | |
1438 | backref_entryrefs_by_seq.emplace(seq, std::move(list)); | |
1439 | } else { | |
1440 | iter->second.insert( | |
1441 | iter->second.end(), | |
1442 | std::make_move_iterator(list.begin()), | |
1443 | std::make_move_iterator(list.end())); | |
1444 | } | |
1445 | } | |
1446 | ||
f67539c2 TL |
1447 | void Cache::complete_commit( |
1448 | Transaction &t, | |
1449 | paddr_t final_block_start, | |
1e59de90 | 1450 | journal_seq_t start_seq) |
f67539c2 | 1451 | { |
20effc67 | 1452 | LOG_PREFIX(Cache::complete_commit); |
1e59de90 TL |
1453 | SUBTRACET(seastore_t, "final_block_start={}, start_seq={}", |
1454 | t, final_block_start, start_seq); | |
1455 | ||
1456 | std::vector<backref_entry_ref> backref_list; | |
1457 | t.for_each_fresh_block([&](const CachedExtentRef &i) { | |
1458 | if (!i->is_valid()) { | |
1459 | return; | |
1460 | } | |
f67539c2 | 1461 | |
1e59de90 | 1462 | bool is_inline = false; |
20effc67 | 1463 | if (i->is_inline()) { |
1e59de90 | 1464 | is_inline = true; |
20effc67 TL |
1465 | i->set_paddr(final_block_start.add_relative(i->get_paddr())); |
1466 | } | |
f67539c2 | 1467 | i->last_committed_crc = i->get_crc32c(); |
1e59de90 | 1468 | i->pending_for_transaction = TRANS_ID_NULL; |
f67539c2 TL |
1469 | i->on_initial_write(); |
1470 | ||
1e59de90 TL |
1471 | i->state = CachedExtent::extent_state_t::CLEAN; |
1472 | DEBUGT("add extent as fresh, inline={} -- {}", | |
1473 | t, is_inline, *i); | |
1474 | const auto t_src = t.get_src(); | |
1475 | i->invalidate_hints(); | |
1476 | add_extent(i, &t_src); | |
1477 | epm.commit_space_used(i->get_paddr(), i->get_length()); | |
1478 | if (is_backref_mapped_extent_node(i)) { | |
1479 | DEBUGT("backref_list new {} len {}", | |
1480 | t, | |
1481 | i->get_paddr(), | |
1482 | i->get_length()); | |
1483 | backref_list.emplace_back( | |
1484 | std::make_unique<backref_entry_t>( | |
20effc67 | 1485 | i->get_paddr(), |
1e59de90 TL |
1486 | i->is_logical() |
1487 | ? i->cast<LogicalCachedExtent>()->get_laddr() | |
1488 | : (is_lba_node(i->get_type()) | |
1489 | ? i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin | |
1490 | : L_ADDR_NULL), | |
1491 | i->get_length(), | |
1492 | i->get_type(), | |
1493 | start_seq)); | |
1494 | } else if (is_backref_node(i->get_type())) { | |
1495 | add_backref_extent( | |
1496 | i->get_paddr(), | |
1497 | i->cast<backref::BackrefNode>()->get_node_meta().begin, | |
1498 | i->get_type()); | |
1499 | } else { | |
1500 | ERRORT("{}", t, *i); | |
1501 | ceph_abort("not possible"); | |
f67539c2 | 1502 | } |
20effc67 | 1503 | }); |
f67539c2 TL |
1504 | |
1505 | // Add new copy of mutated blocks, set_io_wait to block until written | |
1506 | for (auto &i: t.mutated_block_list) { | |
f67539c2 | 1507 | if (!i->is_valid()) { |
f67539c2 TL |
1508 | continue; |
1509 | } | |
1e59de90 TL |
1510 | assert(i->is_exist_mutation_pending() || |
1511 | i->prior_instance); | |
20effc67 | 1512 | i->on_delta_write(final_block_start); |
1e59de90 | 1513 | i->pending_for_transaction = TRANS_ID_NULL; |
20effc67 | 1514 | i->prior_instance = CachedExtentRef(); |
f67539c2 | 1515 | i->state = CachedExtent::extent_state_t::DIRTY; |
1e59de90 | 1516 | assert(i->version > 0); |
20effc67 | 1517 | if (i->version == 1 || i->get_type() == extent_types_t::ROOT) { |
1e59de90 TL |
1518 | i->dirty_from_or_retired_at = start_seq; |
1519 | DEBUGT("commit extent done, become dirty -- {}", t, *i); | |
1520 | } else { | |
1521 | DEBUGT("commit extent done -- {}", t, *i); | |
f67539c2 TL |
1522 | } |
1523 | } | |
1524 | ||
1e59de90 TL |
1525 | for (auto &i: t.retired_set) { |
1526 | epm.mark_space_free(i->get_paddr(), i->get_length()); | |
1527 | } | |
1528 | for (auto &i: t.existing_block_list) { | |
1529 | if (i->is_valid()) { | |
1530 | epm.mark_space_used(i->get_paddr(), i->get_length()); | |
f67539c2 TL |
1531 | } |
1532 | } | |
1533 | ||
1534 | for (auto &i: t.mutated_block_list) { | |
20effc67 TL |
1535 | if (!i->is_valid()) { |
1536 | continue; | |
1537 | } | |
f67539c2 TL |
1538 | i->complete_io(); |
1539 | } | |
20effc67 | 1540 | |
1e59de90 | 1541 | last_commit = start_seq; |
20effc67 | 1542 | for (auto &i: t.retired_set) { |
1e59de90 TL |
1543 | i->dirty_from_or_retired_at = start_seq; |
1544 | if (is_backref_mapped_extent_node(i) | |
1545 | || is_retired_placeholder(i->get_type())) { | |
1546 | DEBUGT("backref_list free {} len {}", | |
1547 | t, | |
1548 | i->get_paddr(), | |
1549 | i->get_length()); | |
1550 | backref_list.emplace_back( | |
1551 | std::make_unique<backref_entry_t>( | |
1552 | i->get_paddr(), | |
1553 | L_ADDR_NULL, | |
1554 | i->get_length(), | |
1555 | i->get_type(), | |
1556 | start_seq)); | |
1557 | } else if (is_backref_node(i->get_type())) { | |
1558 | remove_backref_extent(i->get_paddr()); | |
1559 | } else { | |
1560 | ERRORT("{}", t, *i); | |
1561 | ceph_abort("not possible"); | |
1562 | } | |
1563 | } | |
1564 | ||
1565 | auto existing_stats = t.get_existing_block_stats(); | |
1566 | DEBUGT("total existing blocks num: {}, exist clean num: {}, " | |
1567 | "exist mutation pending num: {}", | |
1568 | t, | |
1569 | existing_stats.valid_num, | |
1570 | existing_stats.clean_num, | |
1571 | existing_stats.mutated_num); | |
1572 | for (auto &i: t.existing_block_list) { | |
1573 | if (i->is_valid()) { | |
1574 | if (i->is_exist_clean()) { | |
1575 | i->state = CachedExtent::extent_state_t::CLEAN; | |
1576 | } else { | |
1577 | assert(i->state == CachedExtent::extent_state_t::DIRTY); | |
1578 | } | |
1579 | DEBUGT("backref_list new existing {} len {}", | |
1580 | t, | |
1581 | i->get_paddr(), | |
1582 | i->get_length()); | |
1583 | backref_list.emplace_back( | |
1584 | std::make_unique<backref_entry_t>( | |
1585 | i->get_paddr(), | |
1586 | i->cast<LogicalCachedExtent>()->get_laddr(), | |
1587 | i->get_length(), | |
1588 | i->get_type(), | |
1589 | start_seq)); | |
1590 | const auto t_src = t.get_src(); | |
1591 | add_extent(i, &t_src); | |
1592 | } | |
1593 | } | |
1594 | if (!backref_list.empty()) { | |
1595 | backref_batch_update(std::move(backref_list), start_seq); | |
1596 | } | |
1597 | ||
1598 | for (auto &i: t.pre_alloc_list) { | |
1599 | if (!i->is_valid()) { | |
1600 | epm.mark_space_free(i->get_paddr(), i->get_length()); | |
1601 | } | |
20effc67 | 1602 | } |
f67539c2 TL |
1603 | } |
1604 | ||
1e59de90 TL |
1605 | void Cache::init() |
1606 | { | |
1607 | LOG_PREFIX(Cache::init); | |
f67539c2 TL |
1608 | if (root) { |
1609 | // initial creation will do mkfs followed by mount each of which calls init | |
1e59de90 | 1610 | DEBUG("remove extent -- prv_root={}", *root); |
f67539c2 TL |
1611 | remove_extent(root); |
1612 | root = nullptr; | |
1613 | } | |
1614 | root = new RootBlock(); | |
1e59de90 TL |
1615 | root->init(CachedExtent::extent_state_t::CLEAN, |
1616 | P_ADDR_ROOT, | |
1617 | PLACEMENT_HINT_NULL, | |
1618 | NULL_GENERATION, | |
1619 | TRANS_ID_NULL); | |
1620 | INFO("init root -- {}", *root); | |
20effc67 | 1621 | extents.insert(*root); |
f67539c2 TL |
1622 | } |
1623 | ||
20effc67 | 1624 | Cache::mkfs_iertr::future<> Cache::mkfs(Transaction &t) |
f67539c2 | 1625 | { |
1e59de90 TL |
1626 | LOG_PREFIX(Cache::mkfs); |
1627 | INFOT("create root", t); | |
20effc67 | 1628 | return get_root(t).si_then([this, &t](auto croot) { |
f67539c2 | 1629 | duplicate_for_write(t, croot); |
20effc67 TL |
1630 | return mkfs_iertr::now(); |
1631 | }).handle_error_interruptible( | |
1632 | mkfs_iertr::pass_further{}, | |
1633 | crimson::ct_error::assert_all{ | |
1634 | "Invalid error in Cache::mkfs" | |
1635 | } | |
1636 | ); | |
f67539c2 TL |
1637 | } |
1638 | ||
1639 | Cache::close_ertr::future<> Cache::close() | |
1640 | { | |
1e59de90 TL |
1641 | LOG_PREFIX(Cache::close); |
1642 | INFO("close with {}({}B) dirty, dirty_from={}, alloc_from={}, " | |
1643 | "{}({}B) lru, totally {}({}B) indexed extents", | |
1644 | dirty.size(), | |
1645 | stats.dirty_bytes, | |
1646 | get_oldest_dirty_from().value_or(JOURNAL_SEQ_NULL), | |
1647 | get_oldest_backref_dirty_from().value_or(JOURNAL_SEQ_NULL), | |
1648 | lru.get_current_contents_extents(), | |
1649 | lru.get_current_contents_bytes(), | |
1650 | extents.size(), | |
1651 | extents.get_bytes()); | |
f67539c2 TL |
1652 | root.reset(); |
1653 | for (auto i = dirty.begin(); i != dirty.end(); ) { | |
1654 | auto ptr = &*i; | |
20effc67 | 1655 | stats.dirty_bytes -= ptr->get_length(); |
f67539c2 TL |
1656 | dirty.erase(i++); |
1657 | intrusive_ptr_release(ptr); | |
1658 | } | |
1e59de90 TL |
1659 | backref_extents.clear(); |
1660 | backref_entryrefs_by_seq.clear(); | |
20effc67 | 1661 | assert(stats.dirty_bytes == 0); |
1e59de90 | 1662 | lru.clear(); |
f67539c2 TL |
1663 | return close_ertr::now(); |
1664 | } | |
1665 | ||
1666 | Cache::replay_delta_ret | |
1667 | Cache::replay_delta( | |
1668 | journal_seq_t journal_seq, | |
1669 | paddr_t record_base, | |
1e59de90 TL |
1670 | const delta_info_t &delta, |
1671 | const journal_seq_t &dirty_tail, | |
1672 | const journal_seq_t &alloc_tail, | |
1673 | sea_time_point modify_time) | |
f67539c2 | 1674 | { |
20effc67 | 1675 | LOG_PREFIX(Cache::replay_delta); |
1e59de90 TL |
1676 | assert(dirty_tail != JOURNAL_SEQ_NULL); |
1677 | assert(alloc_tail != JOURNAL_SEQ_NULL); | |
1678 | ceph_assert(modify_time != NULL_TIME); | |
1679 | ||
1680 | // FIXME: This is specific to the segmented implementation | |
1681 | /* The journal may validly contain deltas for extents in | |
1682 | * since released segments. We can detect those cases by | |
1683 | * checking whether the segment in question currently has a | |
1684 | * sequence number > the current journal segment seq. We can | |
1685 | * safetly skip these deltas because the extent must already | |
1686 | * have been rewritten. | |
1687 | */ | |
1688 | if (delta.paddr != P_ADDR_NULL && | |
1689 | delta.paddr.get_addr_type() == paddr_types_t::SEGMENT) { | |
1690 | auto& seg_addr = delta.paddr.as_seg_paddr(); | |
1691 | auto seg_info = get_segment_info(seg_addr.get_segment_id()); | |
1692 | if (seg_info) { | |
1693 | auto delta_paddr_segment_seq = seg_info->seq; | |
1694 | auto delta_paddr_segment_type = seg_info->type; | |
1695 | if (delta_paddr_segment_seq != delta.ext_seq || | |
1696 | delta_paddr_segment_type != delta.seg_type) { | |
1697 | DEBUG("delta is obsolete, delta_paddr_segment_seq={}," | |
1698 | " delta_paddr_segment_type={} -- {}", | |
1699 | segment_seq_printer_t{delta_paddr_segment_seq}, | |
1700 | delta_paddr_segment_type, | |
1701 | delta); | |
1702 | return replay_delta_ertr::make_ready_future<bool>(false); | |
1703 | } | |
1704 | } | |
1705 | } | |
1706 | ||
1707 | if (delta.type == extent_types_t::JOURNAL_TAIL) { | |
1708 | // this delta should have been dealt with during segment cleaner mounting | |
1709 | return replay_delta_ertr::make_ready_future<bool>(false); | |
1710 | } | |
1711 | ||
1712 | // replay alloc | |
1713 | if (delta.type == extent_types_t::ALLOC_INFO) { | |
1714 | if (journal_seq < alloc_tail) { | |
1715 | DEBUG("journal_seq {} < alloc_tail {}, don't replay {}", | |
1716 | journal_seq, alloc_tail, delta); | |
1717 | return replay_delta_ertr::make_ready_future<bool>(false); | |
1718 | } | |
1719 | ||
1720 | alloc_delta_t alloc_delta; | |
1721 | decode(alloc_delta, delta.bl); | |
1722 | std::vector<backref_entry_ref> backref_list; | |
1723 | for (auto &alloc_blk : alloc_delta.alloc_blk_ranges) { | |
1724 | if (alloc_blk.paddr.is_relative()) { | |
1725 | assert(alloc_blk.paddr.is_record_relative()); | |
1726 | alloc_blk.paddr = record_base.add_relative(alloc_blk.paddr); | |
1727 | } | |
1728 | DEBUG("replay alloc_blk {}~{} {}, journal_seq: {}", | |
1729 | alloc_blk.paddr, alloc_blk.len, alloc_blk.laddr, journal_seq); | |
1730 | backref_list.emplace_back( | |
1731 | std::make_unique<backref_entry_t>( | |
1732 | alloc_blk.paddr, | |
1733 | alloc_blk.laddr, | |
1734 | alloc_blk.len, | |
1735 | alloc_blk.type, | |
1736 | journal_seq)); | |
1737 | } | |
1738 | if (!backref_list.empty()) { | |
1739 | backref_batch_update(std::move(backref_list), journal_seq); | |
1740 | } | |
1741 | return replay_delta_ertr::make_ready_future<bool>(true); | |
1742 | } | |
1743 | ||
1744 | // replay dirty | |
1745 | if (journal_seq < dirty_tail) { | |
1746 | DEBUG("journal_seq {} < dirty_tail {}, don't replay {}", | |
1747 | journal_seq, dirty_tail, delta); | |
1748 | return replay_delta_ertr::make_ready_future<bool>(false); | |
1749 | } | |
1750 | ||
f67539c2 | 1751 | if (delta.type == extent_types_t::ROOT) { |
1e59de90 TL |
1752 | TRACE("replay root delta at {} {}, remove extent ... -- {}, prv_root={}", |
1753 | journal_seq, record_base, delta, *root); | |
20effc67 | 1754 | remove_extent(root); |
f67539c2 | 1755 | root->apply_delta_and_adjust_crc(record_base, delta.bl); |
20effc67 TL |
1756 | root->dirty_from_or_retired_at = journal_seq; |
1757 | root->state = CachedExtent::extent_state_t::DIRTY; | |
1e59de90 TL |
1758 | DEBUG("replayed root delta at {} {}, add extent -- {}, root={}", |
1759 | journal_seq, record_base, delta, *root); | |
1760 | root->set_modify_time(modify_time); | |
20effc67 | 1761 | add_extent(root); |
1e59de90 | 1762 | return replay_delta_ertr::make_ready_future<bool>(true); |
f67539c2 | 1763 | } else { |
20effc67 TL |
1764 | auto _get_extent_if_cached = [this](paddr_t addr) |
1765 | -> get_extent_ertr::future<CachedExtentRef> { | |
1766 | // replay is not included by the cache hit metrics | |
1767 | auto ret = query_cache(addr, nullptr); | |
1768 | if (ret) { | |
1769 | // no retired-placeholder should be exist yet because no transaction | |
1770 | // has been created. | |
1771 | assert(ret->get_type() != extent_types_t::RETIRED_PLACEHOLDER); | |
1772 | return ret->wait_io().then([ret] { | |
1773 | return ret; | |
1774 | }); | |
f67539c2 | 1775 | } else { |
20effc67 | 1776 | return seastar::make_ready_future<CachedExtentRef>(); |
f67539c2 TL |
1777 | } |
1778 | }; | |
20effc67 TL |
1779 | auto extent_fut = (delta.pversion == 0 ? |
1780 | // replay is not included by the cache hit metrics | |
1781 | _get_extent_by_type( | |
1782 | delta.type, | |
1783 | delta.paddr, | |
1784 | delta.laddr, | |
1785 | delta.length, | |
1786 | nullptr, | |
1e59de90 | 1787 | [](CachedExtent &) {}, |
20effc67 TL |
1788 | [](CachedExtent &) {}) : |
1789 | _get_extent_if_cached( | |
1790 | delta.paddr) | |
1791 | ).handle_error( | |
1792 | replay_delta_ertr::pass_further{}, | |
1793 | crimson::ct_error::assert_all{ | |
1794 | "Invalid error in Cache::replay_delta" | |
1795 | } | |
1796 | ); | |
1e59de90 | 1797 | return extent_fut.safe_then([=, this, &delta](auto extent) { |
f67539c2 | 1798 | if (!extent) { |
1e59de90 TL |
1799 | DEBUG("replay extent is not present, so delta is obsolete at {} {} -- {}", |
1800 | journal_seq, record_base, delta); | |
f67539c2 | 1801 | assert(delta.pversion > 0); |
1e59de90 | 1802 | return replay_delta_ertr::make_ready_future<bool>(true); |
f67539c2 TL |
1803 | } |
1804 | ||
1e59de90 TL |
1805 | DEBUG("replay extent delta at {} {} ... -- {}, prv_extent={}", |
1806 | journal_seq, record_base, delta, *extent); | |
f67539c2 TL |
1807 | |
1808 | assert(extent->last_committed_crc == delta.prev_crc); | |
1e59de90 | 1809 | assert(extent->version == delta.pversion); |
f67539c2 | 1810 | extent->apply_delta_and_adjust_crc(record_base, delta.bl); |
1e59de90 | 1811 | extent->set_modify_time(modify_time); |
f67539c2 TL |
1812 | assert(extent->last_committed_crc == delta.final_crc); |
1813 | ||
1e59de90 TL |
1814 | extent->version++; |
1815 | if (extent->version == 1) { | |
20effc67 | 1816 | extent->dirty_from_or_retired_at = journal_seq; |
1e59de90 TL |
1817 | DEBUG("replayed extent delta at {} {}, become dirty -- {}, extent={}" , |
1818 | journal_seq, record_base, delta, *extent); | |
1819 | } else { | |
1820 | DEBUG("replayed extent delta at {} {} -- {}, extent={}" , | |
1821 | journal_seq, record_base, delta, *extent); | |
f67539c2 | 1822 | } |
f67539c2 | 1823 | mark_dirty(extent); |
1e59de90 | 1824 | return replay_delta_ertr::make_ready_future<bool>(true); |
f67539c2 TL |
1825 | }); |
1826 | } | |
1827 | } | |
1828 | ||
1829 | Cache::get_next_dirty_extents_ret Cache::get_next_dirty_extents( | |
20effc67 TL |
1830 | Transaction &t, |
1831 | journal_seq_t seq, | |
1832 | size_t max_bytes) | |
f67539c2 | 1833 | { |
20effc67 | 1834 | LOG_PREFIX(Cache::get_next_dirty_extents); |
1e59de90 TL |
1835 | if (dirty.empty()) { |
1836 | DEBUGT("max_bytes={}B, seq={}, dirty is empty", | |
1837 | t, max_bytes, seq); | |
1838 | } else { | |
1839 | DEBUGT("max_bytes={}B, seq={}, dirty_from={}", | |
1840 | t, max_bytes, seq, dirty.begin()->get_dirty_from()); | |
1841 | } | |
20effc67 TL |
1842 | std::vector<CachedExtentRef> cand; |
1843 | size_t bytes_so_far = 0; | |
1844 | for (auto i = dirty.begin(); | |
1845 | i != dirty.end() && bytes_so_far < max_bytes; | |
1846 | ++i) { | |
1e59de90 | 1847 | auto dirty_from = i->get_dirty_from(); |
aee94f69 TL |
1848 | //dirty extents must be fully loaded |
1849 | assert(i->is_fully_loaded()); | |
1e59de90 TL |
1850 | if (unlikely(dirty_from == JOURNAL_SEQ_NULL)) { |
1851 | ERRORT("got dirty extent with JOURNAL_SEQ_NULL -- {}", t, *i); | |
1852 | ceph_abort(); | |
1853 | } | |
1854 | if (dirty_from < seq) { | |
1855 | TRACET("next extent -- {}", t, *i); | |
1856 | if (!cand.empty() && cand.back()->get_dirty_from() > dirty_from) { | |
1857 | ERRORT("dirty extents are not ordered by dirty_from -- last={}, next={}", | |
1858 | t, *cand.back(), *i); | |
1859 | ceph_abort(); | |
20effc67 | 1860 | } |
20effc67 TL |
1861 | bytes_so_far += i->get_length(); |
1862 | cand.push_back(&*i); | |
f67539c2 TL |
1863 | } else { |
1864 | break; | |
1865 | } | |
1866 | } | |
1867 | return seastar::do_with( | |
20effc67 TL |
1868 | std::move(cand), |
1869 | decltype(cand)(), | |
1870 | [FNAME, this, &t](auto &cand, auto &ret) { | |
1871 | return trans_intr::do_for_each( | |
1872 | cand, | |
1873 | [FNAME, this, &t, &ret](auto &ext) { | |
1e59de90 | 1874 | TRACET("waiting on extent -- {}", t, *ext); |
20effc67 TL |
1875 | return trans_intr::make_interruptible( |
1876 | ext->wait_io() | |
1877 | ).then_interruptible([FNAME, this, ext, &t, &ret] { | |
1878 | if (!ext->is_valid()) { | |
1879 | ++(get_by_src(stats.trans_conflicts_by_unknown, t.get_src())); | |
1880 | mark_transaction_conflicted(t, *ext); | |
1881 | return; | |
1882 | } | |
1883 | ||
1884 | CachedExtentRef on_transaction; | |
1885 | auto result = t.get_extent(ext->get_paddr(), &on_transaction); | |
1886 | if (result == Transaction::get_extent_ret::ABSENT) { | |
1e59de90 | 1887 | DEBUGT("extent is absent on t -- {}", t, *ext); |
20effc67 TL |
1888 | t.add_to_read_set(ext); |
1889 | if (ext->get_type() == extent_types_t::ROOT) { | |
1890 | if (t.root) { | |
1891 | assert(&*t.root == &*ext); | |
1e59de90 | 1892 | ceph_assert(0 == "t.root would have to already be in the read set"); |
20effc67 TL |
1893 | } else { |
1894 | assert(&*ext == &*root); | |
1895 | t.root = root; | |
1896 | } | |
1897 | } | |
1898 | ret.push_back(ext); | |
1899 | } else if (result == Transaction::get_extent_ret::PRESENT) { | |
1e59de90 | 1900 | DEBUGT("extent is present on t -- {}, on t {}", t, *ext, *on_transaction); |
20effc67 TL |
1901 | ret.push_back(on_transaction); |
1902 | } else { | |
1903 | assert(result == Transaction::get_extent_ret::RETIRED); | |
1e59de90 | 1904 | DEBUGT("extent is retired on t -- {}", t, *ext); |
20effc67 TL |
1905 | } |
1906 | }); | |
1907 | }).then_interruptible([&ret] { | |
1908 | return std::move(ret); | |
f67539c2 TL |
1909 | }); |
1910 | }); | |
1911 | } | |
1912 | ||
1913 | Cache::get_root_ret Cache::get_root(Transaction &t) | |
1914 | { | |
20effc67 | 1915 | LOG_PREFIX(Cache::get_root); |
f67539c2 | 1916 | if (t.root) { |
1e59de90 TL |
1917 | TRACET("root already on t -- {}", t, *t.root); |
1918 | return t.root->wait_io().then([&t] { | |
1919 | return get_root_iertr::make_ready_future<RootBlockRef>( | |
1920 | t.root); | |
1921 | }); | |
f67539c2 | 1922 | } else { |
1e59de90 TL |
1923 | DEBUGT("root not on t -- {}", t, *root); |
1924 | t.root = root; | |
1925 | t.add_to_read_set(root); | |
1926 | return root->wait_io().then([root=root] { | |
1927 | return get_root_iertr::make_ready_future<RootBlockRef>( | |
1928 | root); | |
f67539c2 TL |
1929 | }); |
1930 | } | |
1931 | } | |
1932 | ||
20effc67 | 1933 | Cache::get_extent_ertr::future<CachedExtentRef> Cache::_get_extent_by_type( |
f67539c2 TL |
1934 | extent_types_t type, |
1935 | paddr_t offset, | |
1936 | laddr_t laddr, | |
1e59de90 | 1937 | extent_len_t length, |
20effc67 | 1938 | const Transaction::src_t* p_src, |
1e59de90 TL |
1939 | extent_init_func_t &&extent_init_func, |
1940 | extent_init_func_t &&on_cache) | |
f67539c2 | 1941 | { |
1e59de90 | 1942 | return [=, this, extent_init_func=std::move(extent_init_func)]() mutable { |
20effc67 TL |
1943 | src_ext_t* p_metric_key = nullptr; |
1944 | src_ext_t metric_key; | |
1945 | if (p_src) { | |
1946 | metric_key = std::make_pair(*p_src, type); | |
1947 | p_metric_key = &metric_key; | |
1948 | } | |
1949 | ||
f67539c2 TL |
1950 | switch (type) { |
1951 | case extent_types_t::ROOT: | |
1e59de90 | 1952 | ceph_assert(0 == "ROOT is never directly read"); |
f67539c2 | 1953 | return get_extent_ertr::make_ready_future<CachedExtentRef>(); |
1e59de90 TL |
1954 | case extent_types_t::BACKREF_INTERNAL: |
1955 | return get_extent<backref::BackrefInternalNode>( | |
1956 | offset, length, p_metric_key, std::move(extent_init_func), std::move(on_cache) | |
1957 | ).safe_then([](auto extent) { | |
1958 | return CachedExtentRef(extent.detach(), false /* add_ref */); | |
1959 | }); | |
1960 | case extent_types_t::BACKREF_LEAF: | |
1961 | return get_extent<backref::BackrefLeafNode>( | |
1962 | offset, length, p_metric_key, std::move(extent_init_func), std::move(on_cache) | |
1963 | ).safe_then([](auto extent) { | |
1964 | return CachedExtentRef(extent.detach(), false /* add_ref */); | |
1965 | }); | |
f67539c2 | 1966 | case extent_types_t::LADDR_INTERNAL: |
20effc67 | 1967 | return get_extent<lba_manager::btree::LBAInternalNode>( |
1e59de90 | 1968 | offset, length, p_metric_key, std::move(extent_init_func), std::move(on_cache) |
f67539c2 TL |
1969 | ).safe_then([](auto extent) { |
1970 | return CachedExtentRef(extent.detach(), false /* add_ref */); | |
1971 | }); | |
1972 | case extent_types_t::LADDR_LEAF: | |
20effc67 | 1973 | return get_extent<lba_manager::btree::LBALeafNode>( |
1e59de90 | 1974 | offset, length, p_metric_key, std::move(extent_init_func), std::move(on_cache) |
f67539c2 TL |
1975 | ).safe_then([](auto extent) { |
1976 | return CachedExtentRef(extent.detach(), false /* add_ref */); | |
1977 | }); | |
20effc67 TL |
1978 | case extent_types_t::OMAP_INNER: |
1979 | return get_extent<omap_manager::OMapInnerNode>( | |
1e59de90 | 1980 | offset, length, p_metric_key, std::move(extent_init_func), std::move(on_cache) |
f67539c2 TL |
1981 | ).safe_then([](auto extent) { |
1982 | return CachedExtentRef(extent.detach(), false /* add_ref */); | |
1983 | }); | |
20effc67 TL |
1984 | case extent_types_t::OMAP_LEAF: |
1985 | return get_extent<omap_manager::OMapLeafNode>( | |
1e59de90 | 1986 | offset, length, p_metric_key, std::move(extent_init_func), std::move(on_cache) |
f67539c2 TL |
1987 | ).safe_then([](auto extent) { |
1988 | return CachedExtentRef(extent.detach(), false /* add_ref */); | |
1989 | }); | |
20effc67 TL |
1990 | case extent_types_t::COLL_BLOCK: |
1991 | return get_extent<collection_manager::CollectionNode>( | |
1e59de90 | 1992 | offset, length, p_metric_key, std::move(extent_init_func), std::move(on_cache) |
f67539c2 | 1993 | ).safe_then([](auto extent) { |
20effc67 | 1994 | return CachedExtentRef(extent.detach(), false /* add_ref */); |
f67539c2 TL |
1995 | }); |
1996 | case extent_types_t::ONODE_BLOCK_STAGED: | |
20effc67 | 1997 | return get_extent<onode::SeastoreNodeExtent>( |
1e59de90 | 1998 | offset, length, p_metric_key, std::move(extent_init_func), std::move(on_cache) |
20effc67 TL |
1999 | ).safe_then([](auto extent) { |
2000 | return CachedExtentRef(extent.detach(), false /* add_ref */); | |
2001 | }); | |
2002 | case extent_types_t::OBJECT_DATA_BLOCK: | |
2003 | return get_extent<ObjectDataBlock>( | |
1e59de90 | 2004 | offset, length, p_metric_key, std::move(extent_init_func), std::move(on_cache) |
f67539c2 TL |
2005 | ).safe_then([](auto extent) { |
2006 | return CachedExtentRef(extent.detach(), false /* add_ref */); | |
2007 | }); | |
20effc67 TL |
2008 | case extent_types_t::RETIRED_PLACEHOLDER: |
2009 | ceph_assert(0 == "impossible"); | |
2010 | return get_extent_ertr::make_ready_future<CachedExtentRef>(); | |
f67539c2 | 2011 | case extent_types_t::TEST_BLOCK: |
20effc67 | 2012 | return get_extent<TestBlock>( |
1e59de90 | 2013 | offset, length, p_metric_key, std::move(extent_init_func), std::move(on_cache) |
f67539c2 TL |
2014 | ).safe_then([](auto extent) { |
2015 | return CachedExtentRef(extent.detach(), false /* add_ref */); | |
2016 | }); | |
2017 | case extent_types_t::TEST_BLOCK_PHYSICAL: | |
20effc67 | 2018 | return get_extent<TestBlockPhysical>( |
1e59de90 | 2019 | offset, length, p_metric_key, std::move(extent_init_func), std::move(on_cache) |
f67539c2 TL |
2020 | ).safe_then([](auto extent) { |
2021 | return CachedExtentRef(extent.detach(), false /* add_ref */); | |
2022 | }); | |
2023 | case extent_types_t::NONE: { | |
2024 | ceph_assert(0 == "NONE is an invalid extent type"); | |
2025 | return get_extent_ertr::make_ready_future<CachedExtentRef>(); | |
2026 | } | |
2027 | default: | |
2028 | ceph_assert(0 == "impossible"); | |
2029 | return get_extent_ertr::make_ready_future<CachedExtentRef>(); | |
2030 | } | |
2031 | }().safe_then([laddr](CachedExtentRef e) { | |
2032 | assert(e->is_logical() == (laddr != L_ADDR_NULL)); | |
2033 | if (e->is_logical()) { | |
2034 | e->cast<LogicalCachedExtent>()->set_laddr(laddr); | |
2035 | } | |
2036 | return get_extent_ertr::make_ready_future<CachedExtentRef>(e); | |
2037 | }); | |
2038 | } | |
2039 | ||
2040 | } |