]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/journal/circular_bounded_journal.h
update ceph source to reef 18.2.1
[ceph.git] / ceph / src / crimson / os / seastore / journal / circular_bounded_journal.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #pragma once
5
6 #include "crimson/common/log.h"
7
8 #include <boost/intrusive_ptr.hpp>
9
10 #include <seastar/core/future.hh>
11
12 #include "include/ceph_assert.h"
13 #include "include/buffer.h"
14 #include "include/denc.h"
15
16 #include "crimson/osd/exceptions.h"
17 #include "crimson/os/seastore/journal.h"
18 #include "include/uuid.h"
19 #include "crimson/os/seastore/random_block_manager.h"
20 #include "crimson/os/seastore/random_block_manager/rbm_device.h"
21 #include <list>
22 #include "crimson/os/seastore/journal/record_submitter.h"
23 #include "crimson/os/seastore/journal/circular_journal_space.h"
24 #include "crimson/os/seastore/record_scanner.h"
25
26 namespace crimson::os::seastore::journal {
27
28 using RBMDevice = random_block_device::RBMDevice;
29
30 /**
31 * CircularBoundedJournal
32 *
33 *
34 * CircularBoundedJournal (CBJournal) is the journal that works like circular
35 * queue. With CBJournal, Seastore will append some of the records if the size
36 * of the record is small (most likely metadata), at which point the head
37 * (written_to) will be moved. Then, eventually, Seastore applies the records
38 * in CBjournal to RBM (TODO).
39 *
40 * - Commit time
41 * After submit_record is done, written_to is increased(this in-memory value)
42 * ---written_to represents where the new record will be appended. Note that
43 * applied_to is not changed here.
44 *
45 * - Replay time
46 * At replay time, CBJournal begins to replay records in CBjournal by reading
47 * records from dirty_tail. Then, CBJournal examines whether the records is valid
48 * one by one, at which point written_to is recovered
49 * if the valid record is founded. Note that applied_to is stored
50 * permanently when the apply work---applying the records in CBJournal to RBM---
51 * is done by CBJournal (TODO).
52 *
53 * TODO: apply records from CircularBoundedJournal to RandomBlockManager
54 *
55 */
56
57 constexpr uint64_t DEFAULT_BLOCK_SIZE = 4096;
58
59 class CircularBoundedJournal : public Journal, RecordScanner {
60 public:
61 CircularBoundedJournal(
62 JournalTrimmer &trimmer, RBMDevice* device, const std::string &path);
63 ~CircularBoundedJournal() {}
64
65 JournalTrimmer &get_trimmer() final {
66 return trimmer;
67 }
68
69 open_for_mkfs_ret open_for_mkfs() final;
70
71 open_for_mount_ret open_for_mount() final;
72
73 close_ertr::future<> close() final;
74
75 journal_type_t get_type() final {
76 return journal_type_t::RANDOM_BLOCK;
77 }
78
79 submit_record_ret submit_record(
80 record_t &&record,
81 OrderingHandle &handle
82 ) final;
83
84 seastar::future<> flush(
85 OrderingHandle &handle
86 ) final {
87 // TODO
88 return seastar::now();
89 }
90
91 replay_ret replay(delta_handler_t &&delta_handler) final;
92
93 rbm_abs_addr get_rbm_addr(journal_seq_t seq) const {
94 return convert_paddr_to_abs_addr(seq.offset);
95 }
96
97 /**
98 *
99 * CircularBoundedJournal write
100 *
101 * NVMe will support a large block write (< 512KB) with atomic write unit command.
102 * With this command, we expect that the most of incoming data can be stored
103 * as a single write call, which has lower overhead than existing
104 * way that uses a combination of system calls such as write() and sync().
105 *
106 */
107
108 seastar::future<> update_journal_tail(
109 journal_seq_t dirty,
110 journal_seq_t alloc) {
111 return cjs.update_journal_tail(dirty, alloc);
112 }
113 journal_seq_t get_dirty_tail() const {
114 return cjs.get_dirty_tail();
115 }
116 journal_seq_t get_alloc_tail() const {
117 return cjs.get_alloc_tail();
118 }
119
120 void set_write_pipeline(WritePipeline *_write_pipeline) final {
121 write_pipeline = _write_pipeline;
122 }
123
124 device_id_t get_device_id() const {
125 return cjs.get_device_id();
126 }
127 extent_len_t get_block_size() const {
128 return cjs.get_block_size();
129 }
130
131 rbm_abs_addr get_journal_end() const {
132 return cjs.get_journal_end();
133 }
134
135 void set_written_to(journal_seq_t seq) {
136 cjs.set_written_to(seq);
137 }
138
139 journal_seq_t get_written_to() {
140 return cjs.get_written_to();
141 }
142
143 rbm_abs_addr get_records_start() const {
144 return cjs.get_records_start();
145 }
146
147 seastar::future<> finish_commit(transaction_type_t type) final;
148
149 using cbj_delta_handler_t = std::function<
150 replay_ertr::future<bool>(
151 const record_locator_t&,
152 const delta_info_t&,
153 sea_time_point modify_time)>;
154
155 Journal::replay_ret scan_valid_record_delta(
156 cbj_delta_handler_t &&delta_handler,
157 journal_seq_t tail);
158
159 submit_record_ret do_submit_record(record_t &&record, OrderingHandle &handle);
160
161 void try_read_rolled_header(scan_valid_records_cursor &cursor) {
162 paddr_t addr = convert_abs_addr_to_paddr(
163 get_records_start(),
164 get_device_id());
165 cursor.seq.offset = addr;
166 cursor.seq.segment_seq += 1;
167 }
168
169 void initialize_cursor(scan_valid_records_cursor& cursor) final {
170 cursor.block_size = get_block_size();
171 };
172
173 Journal::replay_ret replay_segment(
174 cbj_delta_handler_t &handler, scan_valid_records_cursor& cursor);
175
176 read_ret read(paddr_t start, size_t len) final;
177
178 bool is_record_segment_seq_invalid(scan_valid_records_cursor &cursor,
179 record_group_header_t &h) final;
180
181 int64_t get_segment_end_offset(paddr_t addr) final {
182 return get_journal_end();
183 }
184
185 // Test interfaces
186
187 CircularJournalSpace& get_cjs() {
188 return cjs;
189 }
190
191 read_validate_record_metadata_ret test_read_validate_record_metadata(
192 scan_valid_records_cursor &cursor,
193 segment_nonce_t nonce)
194 {
195 return read_validate_record_metadata(cursor, nonce);
196 }
197
198 void test_initialize_cursor(scan_valid_records_cursor &cursor)
199 {
200 initialize_cursor(cursor);
201 }
202
203 private:
204 JournalTrimmer &trimmer;
205 std::string path;
206 WritePipeline *write_pipeline = nullptr;
207 /**
208 * initialized
209 *
210 * true after open_device_read_header, set to false in close().
211 * Indicates that device is open and in-memory header is valid.
212 */
213 bool initialized = false;
214
215 // start address where the newest record will be written
216 // should be in range [get_records_start(), get_journal_end())
217 // written_to.segment_seq is circulation seq to track
218 // the sequence to written records
219 CircularJournalSpace cjs;
220 RecordSubmitter record_submitter;
221 };
222
223 }
224