]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2013 Inktank Storage, Inc. | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #ifndef ECTRANSACTION_H | |
16 | #define ECTRANSACTION_H | |
17 | ||
18 | #include "OSD.h" | |
19 | #include "PGBackend.h" | |
20 | #include "ECUtil.h" | |
21 | #include "erasure-code/ErasureCodeInterface.h" | |
22 | #include "PGTransaction.h" | |
23 | #include "ExtentCache.h" | |
24 | ||
25 | namespace ECTransaction { | |
26 | struct WritePlan { | |
27 | PGTransactionUPtr t; | |
28 | bool invalidates_cache = false; // Yes, both are possible | |
29 | map<hobject_t,extent_set> to_read; | |
30 | map<hobject_t,extent_set> will_write; // superset of to_read | |
31 | ||
32 | map<hobject_t,ECUtil::HashInfoRef> hash_infos; | |
33 | }; | |
34 | ||
35 | bool requires_overwrite( | |
36 | uint64_t prev_size, | |
37 | const PGTransaction::ObjectOperation &op); | |
38 | ||
39 | template <typename F> | |
40 | WritePlan get_write_plan( | |
41 | const ECUtil::stripe_info_t &sinfo, | |
42 | PGTransactionUPtr &&t, | |
43 | F &&get_hinfo, | |
44 | DoutPrefixProvider *dpp) { | |
45 | WritePlan plan; | |
46 | t->safe_create_traverse( | |
47 | [&](pair<const hobject_t, PGTransaction::ObjectOperation> &i) { | |
48 | ECUtil::HashInfoRef hinfo = get_hinfo(i.first); | |
49 | plan.hash_infos[i.first] = hinfo; | |
50 | ||
51 | uint64_t projected_size = | |
52 | hinfo->get_projected_total_logical_size(sinfo); | |
53 | ||
7c673cae FG |
54 | if (i.second.deletes_first()) { |
55 | ldpp_dout(dpp, 20) << __func__ << ": delete, setting projected size" | |
56 | << " to 0" << dendl; | |
57 | projected_size = 0; | |
58 | } | |
59 | ||
60 | hobject_t source; | |
61 | if (i.second.has_source(&source)) { | |
224ce89b WB |
62 | plan.invalidates_cache = true; |
63 | ||
7c673cae FG |
64 | ECUtil::HashInfoRef shinfo = get_hinfo(source); |
65 | projected_size = shinfo->get_projected_total_logical_size(sinfo); | |
66 | plan.hash_infos[source] = shinfo; | |
67 | } | |
68 | ||
69 | auto &will_write = plan.will_write[i.first]; | |
70 | if (i.second.truncate && | |
71 | i.second.truncate->first < projected_size) { | |
72 | if (!(sinfo.logical_offset_is_stripe_aligned( | |
73 | i.second.truncate->first))) { | |
74 | plan.to_read[i.first].union_insert( | |
75 | sinfo.logical_to_prev_stripe_offset(i.second.truncate->first), | |
76 | sinfo.get_stripe_width()); | |
77 | ||
78 | ldpp_dout(dpp, 20) << __func__ << ": unaligned truncate" << dendl; | |
79 | ||
80 | will_write.union_insert( | |
81 | sinfo.logical_to_prev_stripe_offset(i.second.truncate->first), | |
82 | sinfo.get_stripe_width()); | |
83 | } | |
84 | projected_size = sinfo.logical_to_next_stripe_offset( | |
85 | i.second.truncate->first); | |
86 | } | |
87 | ||
88 | extent_set raw_write_set; | |
89 | for (auto &&extent: i.second.buffer_updates) { | |
90 | using BufferUpdate = PGTransaction::ObjectOperation::BufferUpdate; | |
91 | if (boost::get<BufferUpdate::CloneRange>(&(extent.get_val()))) { | |
92 | assert( | |
93 | 0 == | |
94 | "CloneRange is not allowed, do_op should have returned ENOTSUPP"); | |
95 | } | |
96 | raw_write_set.insert(extent.get_off(), extent.get_len()); | |
97 | } | |
98 | ||
31f18b77 | 99 | auto orig_size = projected_size; |
7c673cae FG |
100 | for (auto extent = raw_write_set.begin(); |
101 | extent != raw_write_set.end(); | |
102 | ++extent) { | |
103 | uint64_t head_start = | |
104 | sinfo.logical_to_prev_stripe_offset(extent.get_start()); | |
105 | uint64_t head_finish = | |
106 | sinfo.logical_to_next_stripe_offset(extent.get_start()); | |
107 | if (head_start > projected_size) { | |
108 | head_start = projected_size; | |
109 | } | |
110 | if (head_start != head_finish && | |
31f18b77 FG |
111 | head_start < orig_size) { |
112 | assert(head_finish <= orig_size); | |
7c673cae | 113 | assert(head_finish - head_start == sinfo.get_stripe_width()); |
31f18b77 FG |
114 | ldpp_dout(dpp, 20) << __func__ << ": reading partial head stripe " |
115 | << head_start << "~" << sinfo.get_stripe_width() | |
116 | << dendl; | |
7c673cae FG |
117 | plan.to_read[i.first].union_insert( |
118 | head_start, sinfo.get_stripe_width()); | |
119 | } | |
120 | ||
121 | uint64_t tail_start = | |
122 | sinfo.logical_to_prev_stripe_offset( | |
123 | extent.get_start() + extent.get_len()); | |
124 | uint64_t tail_finish = | |
125 | sinfo.logical_to_next_stripe_offset( | |
126 | extent.get_start() + extent.get_len()); | |
127 | if (tail_start != tail_finish && | |
128 | (head_start == head_finish || tail_start != head_start) && | |
31f18b77 FG |
129 | tail_start < orig_size) { |
130 | assert(tail_finish <= orig_size); | |
7c673cae | 131 | assert(tail_finish - tail_start == sinfo.get_stripe_width()); |
31f18b77 FG |
132 | ldpp_dout(dpp, 20) << __func__ << ": reading partial tail stripe " |
133 | << tail_start << "~" << sinfo.get_stripe_width() | |
134 | << dendl; | |
7c673cae FG |
135 | plan.to_read[i.first].union_insert( |
136 | tail_start, sinfo.get_stripe_width()); | |
137 | } | |
138 | ||
139 | if (head_start != tail_finish) { | |
140 | assert( | |
141 | sinfo.logical_offset_is_stripe_aligned( | |
142 | tail_finish - head_start) | |
143 | ); | |
144 | will_write.union_insert( | |
145 | head_start, tail_finish - head_start); | |
146 | if (tail_finish > projected_size) | |
147 | projected_size = tail_finish; | |
148 | } else { | |
149 | assert(tail_finish <= projected_size); | |
150 | } | |
151 | } | |
152 | ||
153 | if (i.second.truncate && | |
154 | i.second.truncate->second > projected_size) { | |
155 | uint64_t truncating_to = | |
156 | sinfo.logical_to_next_stripe_offset(i.second.truncate->second); | |
157 | ldpp_dout(dpp, 20) << __func__ << ": truncating out to " | |
158 | << truncating_to | |
159 | << dendl; | |
31f18b77 FG |
160 | will_write.union_insert(projected_size, |
161 | truncating_to - projected_size); | |
7c673cae FG |
162 | projected_size = truncating_to; |
163 | } | |
164 | ||
165 | ldpp_dout(dpp, 20) << __func__ << ": " << i.first | |
166 | << " projected size " | |
167 | << projected_size | |
168 | << dendl; | |
169 | hinfo->set_projected_total_logical_size( | |
170 | sinfo, | |
171 | projected_size); | |
172 | ||
173 | /* validate post conditions: | |
174 | * to_read should have an entry for i.first iff it isn't empty | |
175 | * and if we are reading from i.first, we can't be renaming or | |
176 | * cloning it */ | |
177 | assert(plan.to_read.count(i.first) == 0 || | |
178 | (!plan.to_read.at(i.first).empty() && | |
179 | !i.second.has_source())); | |
180 | }); | |
181 | plan.t = std::move(t); | |
182 | return plan; | |
183 | } | |
184 | ||
185 | void generate_transactions( | |
186 | WritePlan &plan, | |
187 | ErasureCodeInterfaceRef &ecimpl, | |
188 | pg_t pgid, | |
189 | bool legacy_log_entries, | |
190 | const ECUtil::stripe_info_t &sinfo, | |
191 | const map<hobject_t,extent_map> &partial_extents, | |
192 | vector<pg_log_entry_t> &entries, | |
193 | map<hobject_t,extent_map> *written, | |
194 | map<shard_id_t, ObjectStore::Transaction> *transactions, | |
195 | set<hobject_t> *temp_added, | |
196 | set<hobject_t> *temp_removed, | |
197 | DoutPrefixProvider *dpp); | |
198 | }; | |
199 | ||
200 | #endif |