]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/ECTransaction.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / osd / ECTransaction.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank Storage, Inc.
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef ECTRANSACTION_H
16 #define ECTRANSACTION_H
17
18 #include "OSD.h"
19 #include "PGBackend.h"
20 #include "ECUtil.h"
21 #include "erasure-code/ErasureCodeInterface.h"
22 #include "PGTransaction.h"
23 #include "ExtentCache.h"
24
25 namespace ECTransaction {
26 struct WritePlan {
27 PGTransactionUPtr t;
28 bool invalidates_cache = false; // Yes, both are possible
29 map<hobject_t,extent_set> to_read;
30 map<hobject_t,extent_set> will_write; // superset of to_read
31
32 map<hobject_t,ECUtil::HashInfoRef> hash_infos;
33 };
34
35 bool requires_overwrite(
36 uint64_t prev_size,
37 const PGTransaction::ObjectOperation &op);
38
39 template <typename F>
40 WritePlan get_write_plan(
41 const ECUtil::stripe_info_t &sinfo,
42 PGTransactionUPtr &&t,
43 F &&get_hinfo,
44 DoutPrefixProvider *dpp) {
45 WritePlan plan;
46 t->safe_create_traverse(
47 [&](pair<const hobject_t, PGTransaction::ObjectOperation> &i) {
48 ECUtil::HashInfoRef hinfo = get_hinfo(i.first);
49 plan.hash_infos[i.first] = hinfo;
50
51 uint64_t projected_size =
52 hinfo->get_projected_total_logical_size(sinfo);
53
54 if (i.second.has_source()) {
55 plan.invalidates_cache = true;
56 }
57
58 if (i.second.deletes_first()) {
59 ldpp_dout(dpp, 20) << __func__ << ": delete, setting projected size"
60 << " to 0" << dendl;
61 projected_size = 0;
62 }
63
64 hobject_t source;
65 if (i.second.has_source(&source)) {
66 ECUtil::HashInfoRef shinfo = get_hinfo(source);
67 projected_size = shinfo->get_projected_total_logical_size(sinfo);
68 plan.hash_infos[source] = shinfo;
69 }
70
71 auto &will_write = plan.will_write[i.first];
72 if (i.second.truncate &&
73 i.second.truncate->first < projected_size) {
74 if (!(sinfo.logical_offset_is_stripe_aligned(
75 i.second.truncate->first))) {
76 plan.to_read[i.first].union_insert(
77 sinfo.logical_to_prev_stripe_offset(i.second.truncate->first),
78 sinfo.get_stripe_width());
79
80 ldpp_dout(dpp, 20) << __func__ << ": unaligned truncate" << dendl;
81
82 will_write.union_insert(
83 sinfo.logical_to_prev_stripe_offset(i.second.truncate->first),
84 sinfo.get_stripe_width());
85 }
86 projected_size = sinfo.logical_to_next_stripe_offset(
87 i.second.truncate->first);
88 }
89
90 extent_set raw_write_set;
91 for (auto &&extent: i.second.buffer_updates) {
92 using BufferUpdate = PGTransaction::ObjectOperation::BufferUpdate;
93 if (boost::get<BufferUpdate::CloneRange>(&(extent.get_val()))) {
94 assert(
95 0 ==
96 "CloneRange is not allowed, do_op should have returned ENOTSUPP");
97 }
98 raw_write_set.insert(extent.get_off(), extent.get_len());
99 }
100
101 for (auto extent = raw_write_set.begin();
102 extent != raw_write_set.end();
103 ++extent) {
104 uint64_t head_start =
105 sinfo.logical_to_prev_stripe_offset(extent.get_start());
106 uint64_t head_finish =
107 sinfo.logical_to_next_stripe_offset(extent.get_start());
108 if (head_start > projected_size) {
109 head_start = projected_size;
110 }
111 if (head_start != head_finish &&
112 head_start < projected_size) {
113 assert(head_finish <= projected_size);
114 assert(head_finish - head_start == sinfo.get_stripe_width());
115 plan.to_read[i.first].union_insert(
116 head_start, sinfo.get_stripe_width());
117 }
118
119 uint64_t tail_start =
120 sinfo.logical_to_prev_stripe_offset(
121 extent.get_start() + extent.get_len());
122 uint64_t tail_finish =
123 sinfo.logical_to_next_stripe_offset(
124 extent.get_start() + extent.get_len());
125 if (tail_start != tail_finish &&
126 (head_start == head_finish || tail_start != head_start) &&
127 tail_start < projected_size) {
128 assert(tail_finish <= projected_size);
129 assert(tail_finish - tail_start == sinfo.get_stripe_width());
130 plan.to_read[i.first].union_insert(
131 tail_start, sinfo.get_stripe_width());
132 }
133
134 if (head_start != tail_finish) {
135 assert(
136 sinfo.logical_offset_is_stripe_aligned(
137 tail_finish - head_start)
138 );
139 will_write.union_insert(
140 head_start, tail_finish - head_start);
141 if (tail_finish > projected_size)
142 projected_size = tail_finish;
143 } else {
144 assert(tail_finish <= projected_size);
145 }
146 }
147
148 if (i.second.truncate &&
149 i.second.truncate->second > projected_size) {
150 uint64_t truncating_to =
151 sinfo.logical_to_next_stripe_offset(i.second.truncate->second);
152 ldpp_dout(dpp, 20) << __func__ << ": truncating out to "
153 << truncating_to
154 << dendl;
155 will_write.union_insert(projected_size, truncating_to - projected_size);
156 projected_size = truncating_to;
157 }
158
159 ldpp_dout(dpp, 20) << __func__ << ": " << i.first
160 << " projected size "
161 << projected_size
162 << dendl;
163 hinfo->set_projected_total_logical_size(
164 sinfo,
165 projected_size);
166
167 /* validate post conditions:
168 * to_read should have an entry for i.first iff it isn't empty
169 * and if we are reading from i.first, we can't be renaming or
170 * cloning it */
171 assert(plan.to_read.count(i.first) == 0 ||
172 (!plan.to_read.at(i.first).empty() &&
173 !i.second.has_source()));
174 });
175 plan.t = std::move(t);
176 return plan;
177 }
178
179 void generate_transactions(
180 WritePlan &plan,
181 ErasureCodeInterfaceRef &ecimpl,
182 pg_t pgid,
183 bool legacy_log_entries,
184 const ECUtil::stripe_info_t &sinfo,
185 const map<hobject_t,extent_map> &partial_extents,
186 vector<pg_log_entry_t> &entries,
187 map<hobject_t,extent_map> *written,
188 map<shard_id_t, ObjectStore::Transaction> *transactions,
189 set<hobject_t> *temp_added,
190 set<hobject_t> *temp_removed,
191 DoutPrefixProvider *dpp);
192 };
193
194 #endif