]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include "librbd/io/CopyupRequest.h" | |
5 | #include "common/ceph_context.h" | |
6 | #include "common/dout.h" | |
7 | #include "common/errno.h" | |
8 | #include "common/Mutex.h" | |
9 | ||
10 | #include "librbd/AsyncObjectThrottle.h" | |
11 | #include "librbd/ExclusiveLock.h" | |
12 | #include "librbd/ImageCtx.h" | |
13 | #include "librbd/ObjectMap.h" | |
14 | #include "librbd/Utils.h" | |
15 | #include "librbd/io/AioCompletion.h" | |
16 | #include "librbd/io/ImageRequest.h" | |
17 | #include "librbd/io/ObjectRequest.h" | |
18 | #include "librbd/io/ReadResult.h" | |
19 | ||
20 | #include <boost/bind.hpp> | |
21 | #include <boost/lambda/bind.hpp> | |
22 | #include <boost/lambda/construct.hpp> | |
23 | ||
24 | #define dout_subsys ceph_subsys_rbd | |
25 | #undef dout_prefix | |
26 | #define dout_prefix *_dout << "librbd::io::CopyupRequest: " << this \ | |
27 | << " " << __func__ << ": " | |
28 | ||
29 | namespace librbd { | |
30 | namespace io { | |
31 | ||
32 | namespace { | |
33 | ||
34 | class UpdateObjectMap : public C_AsyncObjectThrottle<> { | |
35 | public: | |
36 | UpdateObjectMap(AsyncObjectThrottle<> &throttle, ImageCtx *image_ctx, | |
37 | uint64_t object_no, const std::vector<uint64_t> *snap_ids, | |
31f18b77 FG |
38 | const ZTracer::Trace &trace, size_t snap_id_idx) |
39 | : C_AsyncObjectThrottle(throttle, *image_ctx), m_object_no(object_no), | |
40 | m_snap_ids(*snap_ids), m_trace(trace), m_snap_id_idx(snap_id_idx) | |
7c673cae FG |
41 | { |
42 | } | |
43 | ||
44 | int send() override { | |
45 | uint64_t snap_id = m_snap_ids[m_snap_id_idx]; | |
46 | if (snap_id == CEPH_NOSNAP) { | |
47 | RWLock::RLocker snap_locker(m_image_ctx.snap_lock); | |
48 | RWLock::WLocker object_map_locker(m_image_ctx.object_map_lock); | |
49 | assert(m_image_ctx.exclusive_lock->is_lock_owner()); | |
50 | assert(m_image_ctx.object_map != nullptr); | |
51 | bool sent = m_image_ctx.object_map->aio_update<Context>( | |
91327a77 | 52 | CEPH_NOSNAP, m_object_no, OBJECT_EXISTS, {}, m_trace, false, this); |
7c673cae FG |
53 | return (sent ? 0 : 1); |
54 | } | |
55 | ||
56 | uint8_t state = OBJECT_EXISTS; | |
57 | if (m_image_ctx.test_features(RBD_FEATURE_FAST_DIFF) && | |
58 | m_snap_id_idx + 1 < m_snap_ids.size()) { | |
59 | state = OBJECT_EXISTS_CLEAN; | |
60 | } | |
61 | ||
62 | RWLock::RLocker snap_locker(m_image_ctx.snap_lock); | |
63 | RWLock::RLocker object_map_locker(m_image_ctx.object_map_lock); | |
64 | if (m_image_ctx.object_map == nullptr) { | |
65 | return 1; | |
66 | } | |
67 | ||
68 | bool sent = m_image_ctx.object_map->aio_update<Context>( | |
91327a77 | 69 | snap_id, m_object_no, state, {}, m_trace, true, this); |
7c673cae FG |
70 | assert(sent); |
71 | return 0; | |
72 | } | |
73 | ||
74 | private: | |
75 | uint64_t m_object_no; | |
76 | const std::vector<uint64_t> &m_snap_ids; | |
31f18b77 | 77 | const ZTracer::Trace &m_trace; |
7c673cae FG |
78 | size_t m_snap_id_idx; |
79 | }; | |
80 | ||
81 | } // anonymous namespace | |
82 | ||
b32b8144 FG |
83 | template <typename I> |
84 | CopyupRequest<I>::CopyupRequest(I *ictx, const std::string &oid, | |
85 | uint64_t objectno, Extents &&image_extents, | |
86 | const ZTracer::Trace &parent_trace) | |
87 | : m_ictx(util::get_image_ctx(ictx)), m_oid(oid), m_object_no(objectno), | |
31f18b77 FG |
88 | m_image_extents(image_extents), |
89 | m_trace(util::create_trace(*m_ictx, "copy-up", parent_trace)), | |
b32b8144 | 90 | m_state(STATE_READ_FROM_PARENT), m_lock("CopyupRequest", false, false) |
7c673cae FG |
91 | { |
92 | m_async_op.start_op(*m_ictx); | |
93 | } | |
94 | ||
b32b8144 FG |
95 | template <typename I> |
96 | CopyupRequest<I>::~CopyupRequest() { | |
7c673cae FG |
97 | assert(m_pending_requests.empty()); |
98 | m_async_op.finish_op(); | |
99 | } | |
100 | ||
b32b8144 FG |
101 | template <typename I> |
102 | void CopyupRequest<I>::append_request(AbstractObjectWriteRequest<I> *req) { | |
7c673cae FG |
103 | ldout(m_ictx->cct, 20) << req << dendl; |
104 | m_pending_requests.push_back(req); | |
105 | } | |
106 | ||
b32b8144 FG |
107 | template <typename I> |
108 | void CopyupRequest<I>::complete_requests(int r) { | |
7c673cae | 109 | while (!m_pending_requests.empty()) { |
b32b8144 FG |
110 | auto it = m_pending_requests.begin(); |
111 | auto req = *it; | |
7c673cae | 112 | ldout(m_ictx->cct, 20) << "completing request " << req << dendl; |
b32b8144 | 113 | req->handle_copyup(r); |
7c673cae FG |
114 | m_pending_requests.erase(it); |
115 | } | |
116 | } | |
117 | ||
b32b8144 FG |
118 | template <typename I> |
119 | bool CopyupRequest<I>::send_copyup() { | |
7c673cae | 120 | bool copy_on_read = m_pending_requests.empty(); |
b32b8144 FG |
121 | bool add_copyup_op = !m_copyup_data.is_zero(); |
122 | if (!add_copyup_op) { | |
7c673cae | 123 | m_copyup_data.clear(); |
7c673cae FG |
124 | } |
125 | ||
126 | ldout(m_ictx->cct, 20) << "oid " << m_oid << dendl; | |
127 | m_state = STATE_COPYUP; | |
128 | ||
129 | m_ictx->snap_lock.get_read(); | |
130 | ::SnapContext snapc = m_ictx->snapc; | |
131 | m_ictx->snap_lock.put_read(); | |
132 | ||
133 | std::vector<librados::snap_t> snaps; | |
134 | ||
b32b8144 | 135 | Mutex::Locker locker(m_lock); |
7c673cae FG |
136 | int r; |
137 | if (copy_on_read || (!snapc.snaps.empty() && add_copyup_op)) { | |
7c673cae FG |
138 | |
139 | librados::ObjectWriteOperation copyup_op; | |
140 | copyup_op.exec("rbd", "copyup", m_copyup_data); | |
b32b8144 FG |
141 | m_copyup_data.clear(); |
142 | ||
143 | ObjectRequest<I>::add_write_hint(*m_ictx, ©up_op); | |
7c673cae FG |
144 | |
145 | // send only the copyup request with a blank snapshot context so that | |
146 | // all snapshots are detected from the parent for this object. If | |
147 | // this is a CoW request, a second request will be created for the | |
148 | // actual modification. | |
149 | m_pending_copyups++; | |
150 | ||
151 | ldout(m_ictx->cct, 20) << "copyup with empty snapshot context" << dendl; | |
152 | librados::AioCompletion *comp = util::create_rados_callback(this); | |
153 | ||
154 | librados::Rados rados(m_ictx->data_ctx); | |
155 | r = rados.ioctx_create2(m_ictx->data_ctx.get_id(), m_data_ctx); | |
156 | assert(r == 0); | |
157 | ||
31f18b77 FG |
158 | r = m_data_ctx.aio_operate( |
159 | m_oid, comp, ©up_op, 0, snaps, | |
160 | (m_trace.valid() ? m_trace.get_info() : nullptr)); | |
7c673cae FG |
161 | assert(r == 0); |
162 | comp->release(); | |
163 | } | |
164 | ||
165 | if (!copy_on_read) { | |
166 | librados::ObjectWriteOperation write_op; | |
b32b8144 | 167 | write_op.exec("rbd", "copyup", m_copyup_data); |
7c673cae FG |
168 | |
169 | // merge all pending write ops into this single RADOS op | |
b32b8144 FG |
170 | ObjectRequest<I>::add_write_hint(*m_ictx, &write_op); |
171 | for (auto req : m_pending_requests) { | |
7c673cae | 172 | ldout(m_ictx->cct, 20) << "add_copyup_ops " << req << dendl; |
b32b8144 | 173 | req->add_copyup_ops(&write_op); |
7c673cae | 174 | } |
b32b8144 FG |
175 | |
176 | m_pending_copyups++; | |
7c673cae FG |
177 | |
178 | snaps.insert(snaps.end(), snapc.snaps.begin(), snapc.snaps.end()); | |
179 | librados::AioCompletion *comp = util::create_rados_callback(this); | |
31f18b77 FG |
180 | r = m_ictx->data_ctx.aio_operate( |
181 | m_oid, comp, &write_op, snapc.seq, snaps, | |
182 | (m_trace.valid() ? m_trace.get_info() : nullptr)); | |
7c673cae FG |
183 | assert(r == 0); |
184 | comp->release(); | |
185 | } | |
186 | return false; | |
187 | } | |
188 | ||
b32b8144 FG |
189 | template <typename I> |
190 | bool CopyupRequest<I>::is_copyup_required() { | |
191 | bool copy_on_read = m_pending_requests.empty(); | |
192 | if (copy_on_read) { | |
193 | // always force a copyup if CoR enabled | |
194 | return true; | |
195 | } | |
196 | ||
197 | if (!m_copyup_data.is_zero()) { | |
198 | return true; | |
7c673cae FG |
199 | } |
200 | ||
b32b8144 FG |
201 | for (auto req : m_pending_requests) { |
202 | if (!req->is_empty_write_op()) { | |
203 | return true; | |
204 | } | |
205 | } | |
206 | return false; | |
7c673cae FG |
207 | } |
208 | ||
b32b8144 FG |
209 | template <typename I> |
210 | void CopyupRequest<I>::send() | |
7c673cae FG |
211 | { |
212 | m_state = STATE_READ_FROM_PARENT; | |
213 | AioCompletion *comp = AioCompletion::create_and_start( | |
214 | this, m_ictx, AIO_TYPE_READ); | |
215 | ||
216 | ldout(m_ictx->cct, 20) << "completion " << comp | |
217 | << ", oid " << m_oid | |
218 | << ", extents " << m_image_extents | |
219 | << dendl; | |
220 | ImageRequest<>::aio_read(m_ictx->parent, comp, std::move(m_image_extents), | |
31f18b77 | 221 | ReadResult{&m_copyup_data}, 0, m_trace); |
7c673cae FG |
222 | } |
223 | ||
b32b8144 FG |
224 | template <typename I> |
225 | void CopyupRequest<I>::complete(int r) | |
7c673cae FG |
226 | { |
227 | if (should_complete(r)) { | |
228 | complete_requests(r); | |
229 | delete this; | |
230 | } | |
231 | } | |
232 | ||
b32b8144 FG |
233 | template <typename I> |
234 | bool CopyupRequest<I>::should_complete(int r) | |
7c673cae FG |
235 | { |
236 | CephContext *cct = m_ictx->cct; | |
237 | ldout(cct, 20) << "oid " << m_oid | |
238 | << ", r " << r << dendl; | |
239 | ||
240 | uint64_t pending_copyups; | |
241 | switch (m_state) { | |
242 | case STATE_READ_FROM_PARENT: | |
243 | ldout(cct, 20) << "READ_FROM_PARENT" << dendl; | |
244 | remove_from_list(); | |
245 | if (r >= 0 || r == -ENOENT) { | |
b32b8144 | 246 | if (!is_copyup_required()) { |
7c673cae FG |
247 | ldout(cct, 20) << "nop, skipping" << dendl; |
248 | return true; | |
249 | } | |
250 | ||
251 | return send_object_map_head(); | |
252 | } | |
253 | break; | |
254 | ||
255 | case STATE_OBJECT_MAP_HEAD: | |
256 | ldout(cct, 20) << "OBJECT_MAP_HEAD" << dendl; | |
257 | assert(r == 0); | |
258 | return send_object_map(); | |
259 | ||
260 | case STATE_OBJECT_MAP: | |
261 | ldout(cct, 20) << "OBJECT_MAP" << dendl; | |
262 | assert(r == 0); | |
263 | return send_copyup(); | |
264 | ||
265 | case STATE_COPYUP: | |
b32b8144 FG |
266 | { |
267 | Mutex::Locker locker(m_lock); | |
268 | assert(m_pending_copyups > 0); | |
269 | pending_copyups = --m_pending_copyups; | |
270 | } | |
7c673cae FG |
271 | ldout(cct, 20) << "COPYUP (" << pending_copyups << " pending)" |
272 | << dendl; | |
273 | if (r == -ENOENT) { | |
274 | // hide the -ENOENT error if this is the last op | |
275 | if (pending_copyups == 0) { | |
276 | complete_requests(0); | |
277 | } | |
278 | } else if (r < 0) { | |
279 | complete_requests(r); | |
280 | } | |
281 | return (pending_copyups == 0); | |
282 | ||
283 | default: | |
284 | lderr(cct) << "invalid state: " << m_state << dendl; | |
285 | assert(false); | |
286 | break; | |
287 | } | |
288 | return (r < 0); | |
289 | } | |
290 | ||
b32b8144 FG |
291 | template <typename I> |
292 | void CopyupRequest<I>::remove_from_list() | |
7c673cae FG |
293 | { |
294 | Mutex::Locker l(m_ictx->copyup_list_lock); | |
295 | ||
b32b8144 | 296 | auto it = m_ictx->copyup_list.find(m_object_no); |
7c673cae FG |
297 | assert(it != m_ictx->copyup_list.end()); |
298 | m_ictx->copyup_list.erase(it); | |
299 | } | |
300 | ||
b32b8144 FG |
301 | template <typename I> |
302 | bool CopyupRequest<I>::send_object_map_head() { | |
7c673cae FG |
303 | CephContext *cct = m_ictx->cct; |
304 | ldout(cct, 20) << dendl; | |
305 | ||
306 | m_state = STATE_OBJECT_MAP_HEAD; | |
307 | ||
308 | { | |
309 | RWLock::RLocker owner_locker(m_ictx->owner_lock); | |
310 | RWLock::RLocker snap_locker(m_ictx->snap_lock); | |
311 | if (m_ictx->object_map != nullptr) { | |
312 | bool copy_on_read = m_pending_requests.empty(); | |
313 | assert(m_ictx->exclusive_lock->is_lock_owner()); | |
314 | ||
315 | RWLock::WLocker object_map_locker(m_ictx->object_map_lock); | |
316 | if (!m_ictx->snaps.empty()) { | |
317 | m_snap_ids.insert(m_snap_ids.end(), m_ictx->snaps.begin(), | |
318 | m_ictx->snaps.end()); | |
319 | } | |
320 | if (copy_on_read && | |
321 | (*m_ictx->object_map)[m_object_no] != OBJECT_EXISTS) { | |
322 | m_snap_ids.insert(m_snap_ids.begin(), CEPH_NOSNAP); | |
323 | object_map_locker.unlock(); | |
324 | snap_locker.unlock(); | |
325 | owner_locker.unlock(); | |
326 | return send_object_map(); | |
327 | } | |
328 | ||
329 | bool may_update = false; | |
b32b8144 FG |
330 | uint8_t new_state; |
331 | uint8_t current_state = (*m_ictx->object_map)[m_object_no]; | |
7c673cae | 332 | |
b32b8144 FG |
333 | auto r_it = m_pending_requests.rbegin(); |
334 | if (r_it != m_pending_requests.rend()) { | |
335 | auto req = *r_it; | |
336 | new_state = req->get_pre_write_object_map_state(); | |
7c673cae | 337 | |
7c673cae FG |
338 | ldout(cct, 20) << req->get_op_type() << " object no " |
339 | << m_object_no << " current state " | |
340 | << stringify(static_cast<uint32_t>(current_state)) | |
341 | << " new state " << stringify(static_cast<uint32_t>(new_state)) | |
342 | << dendl; | |
343 | may_update = true; | |
7c673cae FG |
344 | } |
345 | ||
346 | if (may_update && (new_state != current_state) && | |
347 | m_ictx->object_map->aio_update<CopyupRequest>( | |
31f18b77 | 348 | CEPH_NOSNAP, m_object_no, new_state, current_state, m_trace, |
91327a77 | 349 | false, this)) { |
7c673cae FG |
350 | return false; |
351 | } | |
352 | } | |
353 | } | |
354 | ||
355 | return send_object_map(); | |
356 | } | |
357 | ||
b32b8144 FG |
358 | template <typename I> |
359 | bool CopyupRequest<I>::send_object_map() { | |
7c673cae FG |
360 | // avoid possible recursive lock attempts |
361 | if (m_snap_ids.empty()) { | |
362 | // no object map update required | |
363 | return send_copyup(); | |
364 | } else { | |
365 | // update object maps for HEAD and all existing snapshots | |
366 | ldout(m_ictx->cct, 20) << "oid " << m_oid << dendl; | |
367 | m_state = STATE_OBJECT_MAP; | |
368 | ||
369 | RWLock::RLocker owner_locker(m_ictx->owner_lock); | |
370 | AsyncObjectThrottle<>::ContextFactory context_factory( | |
371 | boost::lambda::bind(boost::lambda::new_ptr<UpdateObjectMap>(), | |
31f18b77 | 372 | boost::lambda::_1, m_ictx, m_object_no, &m_snap_ids, m_trace, |
7c673cae FG |
373 | boost::lambda::_2)); |
374 | AsyncObjectThrottle<> *throttle = new AsyncObjectThrottle<>( | |
375 | NULL, *m_ictx, context_factory, util::create_context_callback(this), | |
376 | NULL, 0, m_snap_ids.size()); | |
377 | throttle->start_ops(m_ictx->concurrent_management_ops); | |
378 | } | |
379 | return false; | |
380 | } | |
381 | ||
382 | } // namespace io | |
383 | } // namespace librbd | |
b32b8144 FG |
384 | |
385 | template class librbd::io::CopyupRequest<librbd::ImageCtx>; |