]> git.proxmox.com Git - ceph.git/blob - ceph/src/librbd/io/CopyupRequest.cc
update sources to 12.2.10
[ceph.git] / ceph / src / librbd / io / CopyupRequest.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "librbd/io/CopyupRequest.h"
5 #include "common/ceph_context.h"
6 #include "common/dout.h"
7 #include "common/errno.h"
8 #include "common/Mutex.h"
9
10 #include "librbd/AsyncObjectThrottle.h"
11 #include "librbd/ExclusiveLock.h"
12 #include "librbd/ImageCtx.h"
13 #include "librbd/ObjectMap.h"
14 #include "librbd/Utils.h"
15 #include "librbd/io/AioCompletion.h"
16 #include "librbd/io/ImageRequest.h"
17 #include "librbd/io/ObjectRequest.h"
18 #include "librbd/io/ReadResult.h"
19
20 #include <boost/bind.hpp>
21 #include <boost/lambda/bind.hpp>
22 #include <boost/lambda/construct.hpp>
23
24 #define dout_subsys ceph_subsys_rbd
25 #undef dout_prefix
26 #define dout_prefix *_dout << "librbd::io::CopyupRequest: " << this \
27 << " " << __func__ << ": "
28
29 namespace librbd {
30 namespace io {
31
32 namespace {
33
34 class UpdateObjectMap : public C_AsyncObjectThrottle<> {
35 public:
36 UpdateObjectMap(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx,
37 uint64_t object_no, const std::vector<uint64_t> *snap_ids,
38 const ZTracer::Trace &trace, size_t snap_id_idx)
39 : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_no(object_no),
40 m_snap_ids(*snap_ids), m_trace(trace), m_snap_id_idx(snap_id_idx)
41 {
42 }
43
44 int send() override {
45 uint64_t snap_id = m_snap_ids[m_snap_id_idx];
46 if (snap_id == CEPH_NOSNAP) {
47 RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
48 RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock);
49 assert(m_image_ctx.exclusive_lock->is_lock_owner());
50 assert(m_image_ctx.object_map != nullptr);
51 bool sent = m_image_ctx.object_map->aio_update<Context>(
52 CEPH_NOSNAP, m_object_no, OBJECT_EXISTS, {}, m_trace, false, this);
53 return (sent ? 0 : 1);
54 }
55
56 uint8_t state = OBJECT_EXISTS;
57 if (m_image_ctx.test_features(RBD_FEATURE_FAST_DIFF) &&
58 m_snap_id_idx + 1 < m_snap_ids.size()) {
59 state = OBJECT_EXISTS_CLEAN;
60 }
61
62 RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
63 RWLock::RLocker object_map_locker(m_image_ctx.object_map_lock);
64 if (m_image_ctx.object_map == nullptr) {
65 return 1;
66 }
67
68 bool sent = m_image_ctx.object_map->aio_update<Context>(
69 snap_id, m_object_no, state, {}, m_trace, true, this);
70 assert(sent);
71 return 0;
72 }
73
74 private:
75 uint64_t m_object_no;
76 const std::vector<uint64_t> &m_snap_ids;
77 const ZTracer::Trace &m_trace;
78 size_t m_snap_id_idx;
79 };
80
81 } // anonymous namespace
82
83 template <typename I>
84 CopyupRequest<I>::CopyupRequest(I *ictx, const std::string &oid,
85 uint64_t objectno, Extents &&image_extents,
86 const ZTracer::Trace &parent_trace)
87 : m_ictx(util::get_image_ctx(ictx)), m_oid(oid), m_object_no(objectno),
88 m_image_extents(image_extents),
89 m_trace(util::create_trace(*m_ictx, "copy-up", parent_trace)),
90 m_state(STATE_READ_FROM_PARENT), m_lock("CopyupRequest", false, false)
91 {
92 m_async_op.start_op(*m_ictx);
93 }
94
95 template <typename I>
96 CopyupRequest<I>::~CopyupRequest() {
97 assert(m_pending_requests.empty());
98 m_async_op.finish_op();
99 }
100
101 template <typename I>
102 void CopyupRequest<I>::append_request(AbstractObjectWriteRequest<I> *req) {
103 ldout(m_ictx->cct, 20) << req << dendl;
104 m_pending_requests.push_back(req);
105 }
106
107 template <typename I>
108 void CopyupRequest<I>::complete_requests(int r) {
109 while (!m_pending_requests.empty()) {
110 auto it = m_pending_requests.begin();
111 auto req = *it;
112 ldout(m_ictx->cct, 20) << "completing request " << req << dendl;
113 req->handle_copyup(r);
114 m_pending_requests.erase(it);
115 }
116 }
117
118 template <typename I>
119 bool CopyupRequest<I>::send_copyup() {
120 bool copy_on_read = m_pending_requests.empty();
121 bool add_copyup_op = !m_copyup_data.is_zero();
122 if (!add_copyup_op) {
123 m_copyup_data.clear();
124 }
125
126 ldout(m_ictx->cct, 20) << "oid " << m_oid << dendl;
127 m_state = STATE_COPYUP;
128
129 m_ictx->snap_lock.get_read();
130 ::SnapContext snapc = m_ictx->snapc;
131 m_ictx->snap_lock.put_read();
132
133 std::vector<librados::snap_t> snaps;
134
135 Mutex::Locker locker(m_lock);
136 int r;
137 if (copy_on_read || (!snapc.snaps.empty() && add_copyup_op)) {
138
139 librados::ObjectWriteOperation copyup_op;
140 copyup_op.exec("rbd", "copyup", m_copyup_data);
141 m_copyup_data.clear();
142
143 ObjectRequest<I>::add_write_hint(*m_ictx, &copyup_op);
144
145 // send only the copyup request with a blank snapshot context so that
146 // all snapshots are detected from the parent for this object. If
147 // this is a CoW request, a second request will be created for the
148 // actual modification.
149 m_pending_copyups++;
150
151 ldout(m_ictx->cct, 20) << "copyup with empty snapshot context" << dendl;
152 librados::AioCompletion *comp = util::create_rados_callback(this);
153
154 librados::Rados rados(m_ictx->data_ctx);
155 r = rados.ioctx_create2(m_ictx->data_ctx.get_id(), m_data_ctx);
156 assert(r == 0);
157
158 r = m_data_ctx.aio_operate(
159 m_oid, comp, &copyup_op, 0, snaps,
160 (m_trace.valid() ? m_trace.get_info() : nullptr));
161 assert(r == 0);
162 comp->release();
163 }
164
165 if (!copy_on_read) {
166 librados::ObjectWriteOperation write_op;
167 write_op.exec("rbd", "copyup", m_copyup_data);
168
169 // merge all pending write ops into this single RADOS op
170 ObjectRequest<I>::add_write_hint(*m_ictx, &write_op);
171 for (auto req : m_pending_requests) {
172 ldout(m_ictx->cct, 20) << "add_copyup_ops " << req << dendl;
173 req->add_copyup_ops(&write_op);
174 }
175
176 m_pending_copyups++;
177
178 snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end());
179 librados::AioCompletion *comp = util::create_rados_callback(this);
180 r = m_ictx->data_ctx.aio_operate(
181 m_oid, comp, &write_op, snapc.seq, snaps,
182 (m_trace.valid() ? m_trace.get_info() : nullptr));
183 assert(r == 0);
184 comp->release();
185 }
186 return false;
187 }
188
189 template <typename I>
190 bool CopyupRequest<I>::is_copyup_required() {
191 bool copy_on_read = m_pending_requests.empty();
192 if (copy_on_read) {
193 // always force a copyup if CoR enabled
194 return true;
195 }
196
197 if (!m_copyup_data.is_zero()) {
198 return true;
199 }
200
201 for (auto req : m_pending_requests) {
202 if (!req->is_empty_write_op()) {
203 return true;
204 }
205 }
206 return false;
207 }
208
209 template <typename I>
210 void CopyupRequest<I>::send()
211 {
212 m_state = STATE_READ_FROM_PARENT;
213 AioCompletion *comp = AioCompletion::create_and_start(
214 this, m_ictx, AIO_TYPE_READ);
215
216 ldout(m_ictx->cct, 20) << "completion " << comp
217 << ", oid " << m_oid
218 << ", extents " << m_image_extents
219 << dendl;
220 ImageRequest<>::aio_read(m_ictx->parent, comp, std::move(m_image_extents),
221 ReadResult{&m_copyup_data}, 0, m_trace);
222 }
223
224 template <typename I>
225 void CopyupRequest<I>::complete(int r)
226 {
227 if (should_complete(r)) {
228 complete_requests(r);
229 delete this;
230 }
231 }
232
233 template <typename I>
234 bool CopyupRequest<I>::should_complete(int r)
235 {
236 CephContext *cct = m_ictx->cct;
237 ldout(cct, 20) << "oid " << m_oid
238 << ", r " << r << dendl;
239
240 uint64_t pending_copyups;
241 switch (m_state) {
242 case STATE_READ_FROM_PARENT:
243 ldout(cct, 20) << "READ_FROM_PARENT" << dendl;
244 remove_from_list();
245 if (r >= 0 || r == -ENOENT) {
246 if (!is_copyup_required()) {
247 ldout(cct, 20) << "nop, skipping" << dendl;
248 return true;
249 }
250
251 return send_object_map_head();
252 }
253 break;
254
255 case STATE_OBJECT_MAP_HEAD:
256 ldout(cct, 20) << "OBJECT_MAP_HEAD" << dendl;
257 assert(r == 0);
258 return send_object_map();
259
260 case STATE_OBJECT_MAP:
261 ldout(cct, 20) << "OBJECT_MAP" << dendl;
262 assert(r == 0);
263 return send_copyup();
264
265 case STATE_COPYUP:
266 {
267 Mutex::Locker locker(m_lock);
268 assert(m_pending_copyups > 0);
269 pending_copyups = --m_pending_copyups;
270 }
271 ldout(cct, 20) << "COPYUP (" << pending_copyups << " pending)"
272 << dendl;
273 if (r == -ENOENT) {
274 // hide the -ENOENT error if this is the last op
275 if (pending_copyups == 0) {
276 complete_requests(0);
277 }
278 } else if (r < 0) {
279 complete_requests(r);
280 }
281 return (pending_copyups == 0);
282
283 default:
284 lderr(cct) << "invalid state: " << m_state << dendl;
285 assert(false);
286 break;
287 }
288 return (r < 0);
289 }
290
291 template <typename I>
292 void CopyupRequest<I>::remove_from_list()
293 {
294 Mutex::Locker l(m_ictx->copyup_list_lock);
295
296 auto it = m_ictx->copyup_list.find(m_object_no);
297 assert(it != m_ictx->copyup_list.end());
298 m_ictx->copyup_list.erase(it);
299 }
300
301 template <typename I>
302 bool CopyupRequest<I>::send_object_map_head() {
303 CephContext *cct = m_ictx->cct;
304 ldout(cct, 20) << dendl;
305
306 m_state = STATE_OBJECT_MAP_HEAD;
307
308 {
309 RWLock::RLocker owner_locker(m_ictx->owner_lock);
310 RWLock::RLocker snap_locker(m_ictx->snap_lock);
311 if (m_ictx->object_map != nullptr) {
312 bool copy_on_read = m_pending_requests.empty();
313 assert(m_ictx->exclusive_lock->is_lock_owner());
314
315 RWLock::WLocker object_map_locker(m_ictx->object_map_lock);
316 if (!m_ictx->snaps.empty()) {
317 m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(),
318 m_ictx->snaps.end());
319 }
320 if (copy_on_read &&
321 (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) {
322 m_snap_ids.insert(m_snap_ids.begin(), CEPH_NOSNAP);
323 object_map_locker.unlock();
324 snap_locker.unlock();
325 owner_locker.unlock();
326 return send_object_map();
327 }
328
329 bool may_update = false;
330 uint8_t new_state;
331 uint8_t current_state = (*m_ictx->object_map)[m_object_no];
332
333 auto r_it = m_pending_requests.rbegin();
334 if (r_it != m_pending_requests.rend()) {
335 auto req = *r_it;
336 new_state = req->get_pre_write_object_map_state();
337
338 ldout(cct, 20) << req->get_op_type() << " object no "
339 << m_object_no << " current state "
340 << stringify(static_cast<uint32_t>(current_state))
341 << " new state " << stringify(static_cast<uint32_t>(new_state))
342 << dendl;
343 may_update = true;
344 }
345
346 if (may_update && (new_state != current_state) &&
347 m_ictx->object_map->aio_update<CopyupRequest>(
348 CEPH_NOSNAP, m_object_no, new_state, current_state, m_trace,
349 false, this)) {
350 return false;
351 }
352 }
353 }
354
355 return send_object_map();
356 }
357
358 template <typename I>
359 bool CopyupRequest<I>::send_object_map() {
360 // avoid possible recursive lock attempts
361 if (m_snap_ids.empty()) {
362 // no object map update required
363 return send_copyup();
364 } else {
365 // update object maps for HEAD and all existing snapshots
366 ldout(m_ictx->cct, 20) << "oid " << m_oid << dendl;
367 m_state = STATE_OBJECT_MAP;
368
369 RWLock::RLocker owner_locker(m_ictx->owner_lock);
370 AsyncObjectThrottle<>::ContextFactory context_factory(
371 boost::lambda::bind(boost::lambda::new_ptr<UpdateObjectMap>(),
372 boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids, m_trace,
373 boost::lambda::_2));
374 AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>(
375 NULL, *m_ictx, context_factory, util::create_context_callback(this),
376 NULL, 0, m_snap_ids.size());
377 throttle->start_ops(m_ictx->concurrent_management_ops);
378 }
379 return false;
380 }
381
382 } // namespace io
383 } // namespace librbd
384
385 template class librbd::io::CopyupRequest<librbd::ImageCtx>;