]> git.proxmox.com Git - ceph.git/blame - ceph/src/crimson/os/seastore/transaction_manager.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / crimson / os / seastore / transaction_manager.h
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#pragma once
5
6#include <iostream>
7#include <optional>
8#include <vector>
9#include <utility>
10#include <functional>
11
12#include <boost/intrusive_ptr.hpp>
20effc67 13#include <boost/iterator/counting_iterator.hpp>
f67539c2
TL
14#include <boost/smart_ptr/intrusive_ref_counter.hpp>
15
16#include <seastar/core/future.hh>
17
18#include "include/ceph_assert.h"
19#include "include/buffer.h"
20
21#include "crimson/osd/exceptions.h"
22
20effc67 23#include "crimson/os/seastore/logging.h"
f67539c2
TL
24#include "crimson/os/seastore/segment_cleaner.h"
25#include "crimson/os/seastore/seastore_types.h"
26#include "crimson/os/seastore/cache.h"
27#include "crimson/os/seastore/segment_manager.h"
28#include "crimson/os/seastore/lba_manager.h"
29#include "crimson/os/seastore/journal.h"
20effc67 30#include "crimson/os/seastore/extent_placement_manager.h"
f67539c2
TL
31
32namespace crimson::os::seastore {
33class Journal;
34
20effc67
TL
35template <typename F>
36auto repeat_eagain(F &&f) {
37 LOG_PREFIX("repeat_eagain");
38 return seastar::do_with(
39 std::forward<F>(f),
40 [FNAME](auto &f) {
41 return crimson::repeat(
42 [FNAME, &f] {
43 return std::invoke(f
44 ).safe_then([] {
45 return seastar::stop_iteration::yes;
46 }).handle_error(
47 [FNAME](const crimson::ct_error::eagain &e) {
48 SUBDEBUG(seastore_tm, "hit eagain, restarting");
49 return seastar::stop_iteration::no;
50 },
51 crimson::ct_error::pass_further_all{}
52 );
53 });
54 });
55}
56
f67539c2
TL
57/**
58 * TransactionManager
59 *
60 * Abstraction hiding reading and writing to persistence.
61 * Exposes transaction based interface with read isolation.
62 */
63class TransactionManager : public SegmentCleaner::ExtentCallbackInterface {
64public:
20effc67
TL
65 using base_ertr = Cache::base_ertr;
66 using base_iertr = Cache::base_iertr;
67
f67539c2
TL
68 TransactionManager(
69 SegmentManager &segment_manager,
20effc67
TL
70 SegmentCleanerRef segment_cleaner,
71 JournalRef journal,
72 CacheRef cache,
73 LBAManagerRef lba_manager,
74 ExtentPlacementManagerRef&& epm,
75 ExtentReader& scanner);
f67539c2
TL
76
77 /// Writes initial metadata to disk
20effc67 78 using mkfs_ertr = base_ertr;
f67539c2
TL
79 mkfs_ertr::future<> mkfs();
80
81 /// Reads initial metadata from disk
20effc67 82 using mount_ertr = base_ertr;
f67539c2
TL
83 mount_ertr::future<> mount();
84
85 /// Closes transaction_manager
20effc67 86 using close_ertr = base_ertr;
f67539c2
TL
87 close_ertr::future<> close();
88
89 /// Creates empty transaction
20effc67
TL
90 TransactionRef create_transaction(
91 Transaction::src_t src,
92 const char* name) final {
93 return cache->create_transaction(src, name, false);
94 }
95
96 /// Creates empty weak transaction
97 TransactionRef create_weak_transaction(
98 Transaction::src_t src,
99 const char* name) {
100 return cache->create_transaction(src, name, true);
101 }
102
103 /// Resets transaction
104 void reset_transaction_preserve_handle(Transaction &t) {
105 return cache->reset_transaction_preserve_handle(t);
106 }
107
108 /**
109 * get_pin
110 *
111 * Get the logical pin at offset
112 */
113 using get_pin_iertr = LBAManager::get_mapping_iertr;
114 using get_pin_ret = LBAManager::get_mapping_iertr::future<LBAPinRef>;
115 get_pin_ret get_pin(
116 Transaction &t,
117 laddr_t offset) {
118 return lba_manager->get_mapping(t, offset);
119 }
120
121 /**
122 * get_pins
123 *
124 * Get logical pins overlapping offset~length
125 */
126 using get_pins_iertr = LBAManager::get_mappings_iertr;
127 using get_pins_ret = get_pins_iertr::future<lba_pin_list_t>;
128 get_pins_ret get_pins(
129 Transaction &t,
130 laddr_t offset,
131 extent_len_t length) {
132 return lba_manager->get_mappings(
133 t, offset, length);
f67539c2
TL
134 }
135
20effc67
TL
136 /**
137 * pin_to_extent
138 *
139 * Get extent mapped at pin.
140 */
141 using pin_to_extent_iertr = get_pin_iertr::extend_ertr<
142 SegmentManager::read_ertr>;
143 template <typename T>
144 using pin_to_extent_ret = pin_to_extent_iertr::future<
145 TCachedExtentRef<T>>;
146 template <typename T>
147 pin_to_extent_ret<T> pin_to_extent(
148 Transaction &t,
149 LBAPinRef pin) {
150 LOG_PREFIX(TransactionManager::pin_to_extent);
151 using ret = pin_to_extent_ret<T>;
152 SUBDEBUGT(seastore_tm, "getting extent {}", t, *pin);
153 auto &pref = *pin;
154 return cache->get_extent<T>(
155 t,
156 pref.get_paddr(),
157 pref.get_length(),
158 [this, pin=std::move(pin)](T &extent) mutable {
159 assert(!extent.has_pin());
160 assert(!extent.has_been_invalidated());
161 assert(!pin->has_been_invalidated());
162 extent.set_pin(std::move(pin));
163 lba_manager->add_pin(extent.get_pin());
164 }
165 ).si_then([FNAME, &t](auto ref) mutable -> ret {
166 SUBDEBUGT(seastore_tm, "got extent {}", t, *ref);
167 return pin_to_extent_ret<T>(
168 interruptible::ready_future_marker{},
169 std::move(ref));
170 });
f67539c2
TL
171 }
172
173 /**
20effc67
TL
174 * read_extent
175 *
176 * Read extent of type T at offset~length
f67539c2 177 */
20effc67
TL
178 using read_extent_iertr = get_pin_iertr::extend_ertr<
179 SegmentManager::read_ertr>;
f67539c2 180 template <typename T>
20effc67
TL
181 using read_extent_ret = read_extent_iertr::future<
182 TCachedExtentRef<T>>;
f67539c2 183 template <typename T>
20effc67 184 read_extent_ret<T> read_extent(
f67539c2
TL
185 Transaction &t,
186 laddr_t offset,
20effc67
TL
187 extent_len_t length) {
188 LOG_PREFIX(TransactionManager::read_extent);
189 return get_pin(
190 t, offset
191 ).si_then([this, FNAME, &t, offset, length] (auto pin) {
192 if (length != pin->get_length() || !pin->get_paddr().is_real()) {
193 SUBERRORT(seastore_tm,
194 "offset {} len {} got wrong pin {}",
195 t, offset, length, *pin);
196 ceph_assert(0 == "Should be impossible");
197 }
198 return this->pin_to_extent<T>(t, std::move(pin));
199 });
200 }
201
202 /**
203 * read_extent
204 *
205 * Read extent of type T at offset
206 */
207 template <typename T>
208 read_extent_ret<T> read_extent(
209 Transaction &t,
210 laddr_t offset) {
211 LOG_PREFIX(TransactionManager::read_extent);
212 return get_pin(
213 t, offset
214 ).si_then([this, FNAME, &t, offset] (auto pin) {
215 if (!pin->get_paddr().is_real()) {
216 SUBERRORT(seastore_tm,
217 "offset {} got wrong pin {}",
218 t, offset, *pin);
219 ceph_assert(0 == "Should be impossible");
220 }
221 return this->pin_to_extent<T>(t, std::move(pin));
f67539c2
TL
222 });
223 }
224
225 /// Obtain mutable copy of extent
226 LogicalCachedExtentRef get_mutable_extent(Transaction &t, LogicalCachedExtentRef ref) {
20effc67
TL
227 LOG_PREFIX(TransactionManager::get_mutable_extent);
228 auto ret = cache->duplicate_for_write(
f67539c2
TL
229 t,
230 ref)->cast<LogicalCachedExtent>();
20effc67
TL
231 stats.extents_mutated_total++;
232 stats.extents_mutated_bytes += ret->get_length();
f67539c2 233 if (!ret->has_pin()) {
20effc67
TL
234 SUBDEBUGT(seastore_tm,
235 "duplicating {} for write: {}",
236 t,
f67539c2
TL
237 *ref,
238 *ret);
239 ret->set_pin(ref->get_pin().duplicate());
240 } else {
20effc67
TL
241 SUBDEBUGT(seastore_tm,
242 "{} already pending",
243 t,
f67539c2
TL
244 *ref);
245 assert(ref->is_pending());
246 assert(&*ref == &*ret);
247 }
248 return ret;
249 }
250
251
20effc67
TL
252 using ref_iertr = LBAManager::ref_iertr;
253 using ref_ret = ref_iertr::future<unsigned>;
f67539c2
TL
254
255 /// Add refcount for ref
256 ref_ret inc_ref(
257 Transaction &t,
258 LogicalCachedExtentRef &ref);
259
260 /// Add refcount for offset
261 ref_ret inc_ref(
262 Transaction &t,
263 laddr_t offset);
264
265 /// Remove refcount for ref
266 ref_ret dec_ref(
267 Transaction &t,
268 LogicalCachedExtentRef &ref);
269
270 /// Remove refcount for offset
271 ref_ret dec_ref(
272 Transaction &t,
273 laddr_t offset);
274
20effc67
TL
275 /// remove refcount for list of offset
276 using refs_ret = ref_iertr::future<std::vector<unsigned>>;
277 refs_ret dec_ref(
278 Transaction &t,
279 std::vector<laddr_t> offsets);
280
f67539c2
TL
281 /**
282 * alloc_extent
283 *
284 * Allocates a new block of type T with the minimum lba range of size len
20effc67 285 * greater than laddr_hint.
f67539c2 286 */
20effc67 287 using alloc_extent_iertr = LBAManager::alloc_extent_iertr;
f67539c2 288 template <typename T>
20effc67 289 using alloc_extent_ret = alloc_extent_iertr::future<TCachedExtentRef<T>>;
f67539c2
TL
290 template <typename T>
291 alloc_extent_ret<T> alloc_extent(
292 Transaction &t,
20effc67 293 laddr_t laddr_hint,
f67539c2 294 extent_len_t len) {
20effc67
TL
295 placement_hint_t placement_hint;
296 if constexpr (T::TYPE == extent_types_t::OBJECT_DATA_BLOCK ||
297 T::TYPE == extent_types_t::COLL_BLOCK) {
298 placement_hint = placement_hint_t::COLD;
299 } else {
300 placement_hint = placement_hint_t::HOT;
301 }
302 auto ext = epm->alloc_new_extent<T>(
f67539c2 303 t,
20effc67
TL
304 len,
305 placement_hint);
306 return lba_manager->alloc_extent(
f67539c2 307 t,
20effc67 308 laddr_hint,
f67539c2
TL
309 len,
310 ext->get_paddr()
20effc67
TL
311 ).si_then([ext=std::move(ext), len, laddr_hint, &t, this](auto &&ref) mutable {
312 LOG_PREFIX(TransactionManager::alloc_extent);
f67539c2 313 ext->set_pin(std::move(ref));
20effc67
TL
314 stats.extents_allocated_total++;
315 stats.extents_allocated_bytes += len;
316 SUBDEBUGT(seastore_tm, "new extent: {}, laddr_hint: {}", t, *ext, laddr_hint);
317 return alloc_extent_iertr::make_ready_future<TCachedExtentRef<T>>(
f67539c2
TL
318 std::move(ext));
319 });
320 }
321
20effc67
TL
322 using reserve_extent_iertr = alloc_extent_iertr;
323 using reserve_extent_ret = reserve_extent_iertr::future<LBAPinRef>;
324 reserve_extent_ret reserve_region(
325 Transaction &t,
326 laddr_t hint,
327 extent_len_t len) {
328 return lba_manager->alloc_extent(
329 t,
330 hint,
331 len,
332 P_ADDR_ZERO);
333 }
334
335 /* alloc_extents
336 *
337 * allocates more than one new blocks of type T.
338 */
339 using alloc_extents_iertr = alloc_extent_iertr;
340 template<class T>
341 alloc_extents_iertr::future<std::vector<TCachedExtentRef<T>>>
342 alloc_extents(
343 Transaction &t,
344 laddr_t hint,
345 extent_len_t len,
346 int num) {
347 return seastar::do_with(std::vector<TCachedExtentRef<T>>(),
348 [this, &t, hint, len, num] (auto &extents) {
349 return trans_intr::do_for_each(
350 boost::make_counting_iterator(0),
351 boost::make_counting_iterator(num),
352 [this, &t, len, hint, &extents] (auto i) {
353 return alloc_extent<T>(t, hint, len).si_then(
354 [&extents](auto &&node) {
355 extents.push_back(node);
356 });
357 }).si_then([&extents] {
358 return alloc_extents_iertr::make_ready_future
359 <std::vector<TCachedExtentRef<T>>>(std::move(extents));
360 });
361 });
362 }
363
f67539c2
TL
364 /**
365 * submit_transaction
366 *
367 * Atomically submits transaction to persistence
368 */
20effc67
TL
369 using submit_transaction_iertr = base_iertr;
370 submit_transaction_iertr::future<> submit_transaction(Transaction &);
f67539c2
TL
371
372 /// SegmentCleaner::ExtentCallbackInterface
20effc67
TL
373 using SegmentCleaner::ExtentCallbackInterface::submit_transaction_direct_ret;
374 submit_transaction_direct_ret submit_transaction_direct(
375 Transaction &t) final;
f67539c2
TL
376
377 using SegmentCleaner::ExtentCallbackInterface::get_next_dirty_extents_ret;
378 get_next_dirty_extents_ret get_next_dirty_extents(
20effc67
TL
379 Transaction &t,
380 journal_seq_t seq,
381 size_t max_bytes) final;
f67539c2
TL
382
383 using SegmentCleaner::ExtentCallbackInterface::rewrite_extent_ret;
384 rewrite_extent_ret rewrite_extent(
385 Transaction &t,
386 CachedExtentRef extent) final;
387
388 using SegmentCleaner::ExtentCallbackInterface::get_extent_if_live_ret;
389 get_extent_if_live_ret get_extent_if_live(
390 Transaction &t,
391 extent_types_t type,
392 paddr_t addr,
393 laddr_t laddr,
394 segment_off_t len) final;
395
f67539c2
TL
396 using release_segment_ret =
397 SegmentCleaner::ExtentCallbackInterface::release_segment_ret;
398 release_segment_ret release_segment(
399 segment_id_t id) final {
400 return segment_manager.release(id);
401 }
402
20effc67
TL
403 /**
404 * read_root_meta
405 *
406 * Read root block meta entry for key.
407 */
408 using read_root_meta_iertr = base_iertr;
409 using read_root_meta_bare = std::optional<std::string>;
410 using read_root_meta_ret = read_root_meta_iertr::future<
411 read_root_meta_bare>;
412 read_root_meta_ret read_root_meta(
413 Transaction &t,
414 const std::string &key) {
415 return cache->get_root(
416 t
417 ).si_then([&key](auto root) {
418 auto meta = root->root.get_meta();
419 auto iter = meta.find(key);
420 if (iter == meta.end()) {
421 return seastar::make_ready_future<read_root_meta_bare>(std::nullopt);
422 } else {
423 return seastar::make_ready_future<read_root_meta_bare>(iter->second);
424 }
425 });
426 }
427
428 /**
429 * update_root_meta
430 *
431 * Update root block meta entry for key to value.
432 */
433 using update_root_meta_iertr = base_iertr;
434 using update_root_meta_ret = update_root_meta_iertr::future<>;
435 update_root_meta_ret update_root_meta(
436 Transaction& t,
437 const std::string& key,
438 const std::string& value) {
439 return cache->get_root(
440 t
441 ).si_then([this, &t, &key, &value](RootBlockRef root) {
442 root = cache->duplicate_for_write(t, root)->cast<RootBlock>();
443
444 auto meta = root->root.get_meta();
445 meta[key] = value;
446
447 root->root.set_meta(meta);
448 return seastar::now();
449 });
450 }
451
f67539c2
TL
452 /**
453 * read_onode_root
454 *
455 * Get onode-tree root logical address
456 */
20effc67
TL
457 using read_onode_root_iertr = base_iertr;
458 using read_onode_root_ret = read_onode_root_iertr::future<laddr_t>;
f67539c2 459 read_onode_root_ret read_onode_root(Transaction &t) {
20effc67
TL
460 return cache->get_root(t).si_then([](auto croot) {
461 laddr_t ret = croot->get_root().onode_root;
462 return ret;
f67539c2
TL
463 });
464 }
465
466 /**
467 * write_onode_root
468 *
469 * Write onode-tree root logical address, must be called after read.
470 */
471 void write_onode_root(Transaction &t, laddr_t addr) {
20effc67
TL
472 auto croot = cache->get_root_fast(t);
473 croot = cache->duplicate_for_write(t, croot)->cast<RootBlock>();
f67539c2
TL
474 croot->get_root().onode_root = addr;
475 }
476
20effc67
TL
477 /**
478 * read_collection_root
479 *
480 * Get collection root addr
481 */
482 using read_collection_root_iertr = base_iertr;
483 using read_collection_root_ret = read_collection_root_iertr::future<
484 coll_root_t>;
485 read_collection_root_ret read_collection_root(Transaction &t) {
486 return cache->get_root(t).si_then([](auto croot) {
487 return croot->get_root().collection_root.get();
488 });
489 }
490
491 /**
492 * write_collection_root
493 *
494 * Update collection root addr
495 */
496 void write_collection_root(Transaction &t, coll_root_t cmroot) {
497 auto croot = cache->get_root_fast(t);
498 croot = cache->duplicate_for_write(t, croot)->cast<RootBlock>();
499 croot->get_root().collection_root.update(cmroot);
500 }
501
502 extent_len_t get_block_size() const {
503 return segment_manager.get_block_size();
504 }
505
506 store_statfs_t store_stat() const {
507 return segment_cleaner->stat();
508 }
509
510 void add_segment_manager(SegmentManager* sm) {
511 LOG_PREFIX(TransactionManager::add_segment_manager);
512 SUBDEBUG(seastore_tm, "adding segment manager {}", sm->get_device_id());
513 scanner.add_segment_manager(sm);
514 epm->add_allocator(
515 device_type_t::SEGMENTED,
516 std::make_unique<SegmentedAllocator>(
517 *segment_cleaner,
518 *sm,
519 *lba_manager,
520 *journal,
521 *cache));
522 }
523
f67539c2
TL
524 ~TransactionManager();
525
526private:
527 friend class Transaction;
528
20effc67
TL
529 // although there might be multiple devices backing seastore,
530 // only one of them are supposed to hold the journal. This
531 // segment manager is that device
f67539c2 532 SegmentManager &segment_manager;
20effc67
TL
533 SegmentCleanerRef segment_cleaner;
534 CacheRef cache;
535 LBAManagerRef lba_manager;
536 JournalRef journal;
537 ExtentPlacementManagerRef epm;
538 ExtentReader& scanner;
539
540 WritePipeline write_pipeline;
541
542 struct {
543 uint64_t extents_retired_total = 0;
544 uint64_t extents_retired_bytes = 0;
545 uint64_t extents_mutated_total = 0;
546 uint64_t extents_mutated_bytes = 0;
547 uint64_t extents_allocated_total = 0;
548 uint64_t extents_allocated_bytes = 0;
549 } stats;
550 seastar::metrics::metric_group metrics;
551 void register_metrics();
552
553 rewrite_extent_ret rewrite_logical_extent(
554 Transaction& t,
555 LogicalCachedExtentRef extent);
556public:
557 // Testing interfaces
558 auto get_segment_cleaner() {
559 return segment_cleaner.get();
560 }
561
562 auto get_lba_manager() {
563 return lba_manager.get();
564 }
f67539c2
TL
565};
566using TransactionManagerRef = std::unique_ptr<TransactionManager>;
567
568}