]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "SnapServer.h" | |
16 | #include "MDSRank.h" | |
17 | #include "osd/OSDMap.h" | |
18 | #include "osdc/Objecter.h" | |
19 | #include "mon/MonClient.h" | |
20 | ||
21 | #include "include/types.h" | |
22 | #include "messages/MMDSTableRequest.h" | |
23 | #include "messages/MRemoveSnaps.h" | |
24 | ||
25 | #include "msg/Messenger.h" | |
26 | ||
27 | #include "common/config.h" | |
11fdf7f2 | 28 | #include "include/ceph_assert.h" |
7c673cae FG |
29 | |
30 | #define dout_context g_ceph_context | |
31 | #define dout_subsys ceph_subsys_mds | |
32 | #undef dout_prefix | |
33 | #define dout_prefix *_dout << "mds." << rank << ".snap " | |
34 | ||
35 | ||
36 | void SnapServer::reset_state() | |
37 | { | |
38 | last_snap = 1; /* snapid 1 reserved for initial root snaprealm */ | |
39 | snaps.clear(); | |
40 | need_to_purge.clear(); | |
11fdf7f2 TL |
41 | pending_update.clear(); |
42 | pending_destroy.clear(); | |
43 | pending_noop.clear(); | |
7c673cae FG |
44 | |
45 | // find any removed snapshot in data pools | |
46 | if (mds) { // only if I'm running in a live MDS | |
47 | snapid_t first_free = 0; | |
48 | mds->objecter->with_osdmap([&](const OSDMap& o) { | |
49 | for (const auto p : mds->mdsmap->get_data_pools()) { | |
50 | const pg_pool_t *pi = o.get_pg_pool(p); | |
51 | if (!pi) { | |
52 | // If pool isn't in OSDMap yet then can't have any snaps | |
53 | // needing removal, skip. | |
54 | continue; | |
55 | } | |
11fdf7f2 TL |
56 | if (pi->snap_seq > first_free) { |
57 | first_free = pi->snap_seq; | |
58 | } | |
7c673cae FG |
59 | } |
60 | }); | |
61 | if (first_free > last_snap) | |
62 | last_snap = first_free; | |
63 | } | |
11fdf7f2 TL |
64 | last_created = last_snap; |
65 | last_destroyed = last_snap; | |
66 | snaprealm_v2_since = last_snap + 1; | |
67 | ||
68 | MDSTableServer::reset_state(); | |
7c673cae FG |
69 | } |
70 | ||
71 | ||
72 | // SERVER | |
73 | ||
11fdf7f2 | 74 | void SnapServer::_prepare(const bufferlist& bl, uint64_t reqid, mds_rank_t bymds, bufferlist& out) |
7c673cae | 75 | { |
11fdf7f2 TL |
76 | using ceph::decode; |
77 | using ceph::encode; | |
78 | auto p = bl.cbegin(); | |
7c673cae | 79 | __u32 op; |
11fdf7f2 | 80 | decode(op, p); |
7c673cae FG |
81 | |
82 | switch (op) { | |
83 | case TABLE_OP_CREATE: | |
84 | { | |
7c673cae | 85 | SnapInfo info; |
11fdf7f2 | 86 | decode(info.ino, p); |
7c673cae | 87 | if (!p.end()) { |
11fdf7f2 TL |
88 | decode(info.name, p); |
89 | decode(info.stamp, p); | |
7c673cae | 90 | info.snapid = ++last_snap; |
7c673cae FG |
91 | pending_update[version] = info; |
92 | dout(10) << "prepare v" << version << " create " << info << dendl; | |
93 | } else { | |
94 | pending_noop.insert(version); | |
95 | dout(10) << "prepare v" << version << " noop" << dendl; | |
96 | } | |
11fdf7f2 TL |
97 | |
98 | encode(last_snap, out); | |
7c673cae FG |
99 | } |
100 | break; | |
101 | ||
102 | case TABLE_OP_DESTROY: | |
103 | { | |
104 | inodeno_t ino; | |
105 | snapid_t snapid; | |
11fdf7f2 TL |
106 | decode(ino, p); // not used, currently. |
107 | decode(snapid, p); | |
7c673cae FG |
108 | |
109 | // bump last_snap... we use it as a version value on the snaprealm. | |
110 | ++last_snap; | |
111 | ||
112 | pending_destroy[version] = pair<snapid_t,snapid_t>(snapid, last_snap); | |
113 | dout(10) << "prepare v" << version << " destroy " << snapid << " seq " << last_snap << dendl; | |
114 | ||
11fdf7f2 | 115 | encode(last_snap, out); |
7c673cae FG |
116 | } |
117 | break; | |
118 | ||
119 | case TABLE_OP_UPDATE: | |
120 | { | |
121 | SnapInfo info; | |
11fdf7f2 TL |
122 | decode(info.ino, p); |
123 | decode(info.snapid, p); | |
124 | decode(info.name, p); | |
125 | decode(info.stamp, p); | |
7c673cae | 126 | |
7c673cae FG |
127 | pending_update[version] = info; |
128 | dout(10) << "prepare v" << version << " update " << info << dendl; | |
7c673cae FG |
129 | } |
130 | break; | |
131 | ||
132 | default: | |
133 | ceph_abort(); | |
134 | } | |
135 | //dump(); | |
136 | } | |
137 | ||
11fdf7f2 | 138 | void SnapServer::_get_reply_buffer(version_t tid, bufferlist *pbl) const |
7c673cae | 139 | { |
11fdf7f2 TL |
140 | using ceph::encode; |
141 | auto p = pending_update.find(tid); | |
142 | if (p != pending_update.end()) { | |
143 | if (pbl && !snaps.count(p->second.snapid)) // create | |
144 | encode(p->second.snapid, *pbl); | |
145 | return; | |
146 | } | |
147 | auto q = pending_destroy.find(tid); | |
148 | if (q != pending_destroy.end()) { | |
149 | if (pbl) | |
150 | encode(q->second.second, *pbl); | |
151 | return; | |
152 | } | |
153 | auto r = pending_noop.find(tid); | |
154 | if (r != pending_noop.end()) { | |
155 | if (pbl) | |
156 | encode(last_snap, *pbl); | |
157 | return; | |
158 | } | |
159 | assert (0 == "tid not found"); | |
7c673cae FG |
160 | } |
161 | ||
9f95a23c | 162 | void SnapServer::_commit(version_t tid, cref_t<MMDSTableRequest> req) |
7c673cae FG |
163 | { |
164 | if (pending_update.count(tid)) { | |
165 | SnapInfo &info = pending_update[tid]; | |
166 | string opname; | |
11fdf7f2 TL |
167 | if (snaps.count(info.snapid)) { |
168 | opname = "update"; | |
169 | if (info.stamp == utime_t()) | |
170 | info.stamp = snaps[info.snapid].stamp; | |
171 | } else { | |
7c673cae | 172 | opname = "create"; |
11fdf7f2 TL |
173 | if (info.snapid > last_created) |
174 | last_created = info.snapid; | |
175 | } | |
7c673cae FG |
176 | dout(7) << "commit " << tid << " " << opname << " " << info << dendl; |
177 | snaps[info.snapid] = info; | |
178 | pending_update.erase(tid); | |
179 | } | |
180 | ||
181 | else if (pending_destroy.count(tid)) { | |
182 | snapid_t sn = pending_destroy[tid].first; | |
183 | snapid_t seq = pending_destroy[tid].second; | |
184 | dout(7) << "commit " << tid << " destroy " << sn << " seq " << seq << dendl; | |
185 | snaps.erase(sn); | |
11fdf7f2 TL |
186 | if (seq > last_destroyed) |
187 | last_destroyed = seq; | |
7c673cae | 188 | |
31f18b77 FG |
189 | for (const auto p : mds->mdsmap->get_data_pools()) { |
190 | need_to_purge[p].insert(sn); | |
191 | need_to_purge[p].insert(seq); | |
7c673cae FG |
192 | } |
193 | ||
194 | pending_destroy.erase(tid); | |
195 | } | |
196 | else if (pending_noop.count(tid)) { | |
197 | dout(7) << "commit " << tid << " noop" << dendl; | |
198 | pending_noop.erase(tid); | |
199 | } | |
200 | else | |
201 | ceph_abort(); | |
202 | ||
7c673cae | 203 | //dump(); |
7c673cae FG |
204 | } |
205 | ||
206 | void SnapServer::_rollback(version_t tid) | |
207 | { | |
208 | if (pending_update.count(tid)) { | |
209 | SnapInfo &info = pending_update[tid]; | |
210 | string opname; | |
11fdf7f2 TL |
211 | if (snaps.count(info.snapid)) |
212 | opname = "update"; | |
7c673cae | 213 | else |
11fdf7f2 | 214 | opname = "create"; |
7c673cae FG |
215 | dout(7) << "rollback " << tid << " " << opname << " " << info << dendl; |
216 | pending_update.erase(tid); | |
217 | } | |
218 | ||
219 | else if (pending_destroy.count(tid)) { | |
220 | dout(7) << "rollback " << tid << " destroy " << pending_destroy[tid] << dendl; | |
221 | pending_destroy.erase(tid); | |
222 | } | |
223 | ||
224 | else if (pending_noop.count(tid)) { | |
225 | dout(7) << "rollback " << tid << " noop" << dendl; | |
226 | pending_noop.erase(tid); | |
227 | } | |
228 | ||
229 | else | |
230 | ceph_abort(); | |
231 | ||
7c673cae FG |
232 | //dump(); |
233 | } | |
234 | ||
235 | void SnapServer::_server_update(bufferlist& bl) | |
236 | { | |
11fdf7f2 TL |
237 | using ceph::decode; |
238 | auto p = bl.cbegin(); | |
7c673cae | 239 | map<int, vector<snapid_t> > purge; |
11fdf7f2 | 240 | decode(purge, p); |
7c673cae FG |
241 | |
242 | dout(7) << "_server_update purged " << purge << dendl; | |
243 | for (map<int, vector<snapid_t> >::iterator p = purge.begin(); | |
244 | p != purge.end(); | |
245 | ++p) { | |
246 | for (vector<snapid_t>::iterator q = p->second.begin(); | |
247 | q != p->second.end(); | |
248 | ++q) | |
249 | need_to_purge[p->first].erase(*q); | |
250 | if (need_to_purge[p->first].empty()) | |
251 | need_to_purge.erase(p->first); | |
252 | } | |
7c673cae FG |
253 | } |
254 | ||
11fdf7f2 | 255 | bool SnapServer::_notify_prep(version_t tid) |
7c673cae | 256 | { |
11fdf7f2 TL |
257 | using ceph::encode; |
258 | bufferlist bl; | |
259 | char type = 'F'; | |
260 | encode(type, bl); | |
261 | encode(snaps, bl); | |
262 | encode(pending_update, bl); | |
263 | encode(pending_destroy, bl); | |
264 | encode(last_created, bl); | |
265 | encode(last_destroyed, bl); | |
266 | ceph_assert(version == tid); | |
267 | ||
268 | for (auto &p : active_clients) { | |
9f95a23c | 269 | auto m = make_message<MMDSTableRequest>(table, TABLESERVER_OP_NOTIFY_PREP, 0, version); |
11fdf7f2 TL |
270 | m->bl = bl; |
271 | mds->send_message_mds(m, p); | |
272 | } | |
273 | return true; | |
7c673cae FG |
274 | } |
275 | ||
9f95a23c | 276 | void SnapServer::handle_query(const cref_t<MMDSTableRequest> &req) |
11fdf7f2 TL |
277 | { |
278 | using ceph::encode; | |
279 | using ceph::decode; | |
280 | char op; | |
281 | auto p = req->bl.cbegin(); | |
282 | decode(op, p); | |
7c673cae | 283 | |
9f95a23c | 284 | auto reply = make_message<MMDSTableRequest>(table, TABLESERVER_OP_QUERY_REPLY, req->reqid, version); |
11fdf7f2 TL |
285 | |
286 | switch (op) { | |
287 | case 'F': // full | |
288 | version_t have_version; | |
289 | decode(have_version, p); | |
290 | ceph_assert(have_version <= version); | |
291 | if (have_version == version) { | |
292 | char type = 'U'; | |
293 | encode(type, reply->bl); | |
294 | } else { | |
295 | char type = 'F'; | |
296 | encode(type, reply->bl); | |
297 | encode(snaps, reply->bl); | |
298 | encode(pending_update, reply->bl); | |
299 | encode(pending_destroy, reply->bl); | |
300 | encode(last_created, reply->bl); | |
301 | encode(last_destroyed, reply->bl); | |
302 | } | |
303 | // FIXME: implement incremental change | |
304 | break; | |
305 | default: | |
306 | ceph_abort(); | |
307 | }; | |
308 | ||
309 | mds->send_message(reply, req->get_connection()); | |
310 | } | |
7c673cae FG |
311 | |
312 | void SnapServer::check_osd_map(bool force) | |
313 | { | |
314 | if (!force && version == last_checked_osdmap) { | |
315 | dout(10) << "check_osd_map - version unchanged" << dendl; | |
316 | return; | |
317 | } | |
318 | dout(10) << "check_osd_map need_to_purge=" << need_to_purge << dendl; | |
319 | ||
9f95a23c TL |
320 | map<int32_t, vector<snapid_t> > all_purge; |
321 | map<int32_t, vector<snapid_t> > all_purged; | |
7c673cae | 322 | |
9f95a23c TL |
323 | // NOTE: this is only needed for support during upgrades from pre-octopus, |
324 | // since starting with octopus we now get an explicit ack after we remove a | |
325 | // snap. | |
7c673cae FG |
326 | mds->objecter->with_osdmap( |
327 | [this, &all_purged, &all_purge](const OSDMap& osdmap) { | |
328 | for (const auto& p : need_to_purge) { | |
329 | int id = p.first; | |
330 | const pg_pool_t *pi = osdmap.get_pg_pool(id); | |
331 | if (pi == NULL) { | |
332 | // The pool is gone. So are the snapshots. | |
333 | all_purged[id] = std::vector<snapid_t>(p.second.begin(), | |
334 | p.second.end()); | |
335 | continue; | |
336 | } | |
337 | ||
338 | for (const auto& q : p.second) { | |
339 | if (pi->is_removed_snap(q)) { | |
340 | dout(10) << " osdmap marks " << q << " as removed" << dendl; | |
341 | all_purged[id].push_back(q); | |
342 | } else { | |
343 | all_purge[id].push_back(q); | |
344 | } | |
345 | } | |
346 | } | |
347 | }); | |
348 | ||
349 | if (!all_purged.empty()) { | |
350 | // prepare to remove from need_to_purge list | |
351 | bufferlist bl; | |
11fdf7f2 TL |
352 | using ceph::encode; |
353 | encode(all_purged, bl); | |
7c673cae FG |
354 | do_server_update(bl); |
355 | } | |
356 | ||
357 | if (!all_purge.empty()) { | |
358 | dout(10) << "requesting removal of " << all_purge << dendl; | |
9f95a23c | 359 | auto m = make_message<MRemoveSnaps>(all_purge); |
11fdf7f2 | 360 | mon_client->send_mon_message(m.detach()); |
7c673cae FG |
361 | } |
362 | ||
363 | last_checked_osdmap = version; | |
364 | } | |
365 | ||
9f95a23c TL |
366 | void SnapServer::handle_remove_snaps(const cref_t<MRemoveSnaps> &m) |
367 | { | |
368 | dout(10) << __func__ << " " << *m << dendl; | |
369 | ||
370 | map<int32_t, vector<snapid_t> > all_purged; | |
371 | int num = 0; | |
372 | ||
373 | for (const auto& [id, snaps] : need_to_purge) { | |
374 | auto i = m->snaps.find(id); | |
375 | if (i == m->snaps.end()) { | |
376 | continue; | |
377 | } | |
378 | for (const auto& q : snaps) { | |
379 | if (std::find(i->second.begin(), i->second.end(), q) != i->second.end()) { | |
380 | dout(10) << " mon reports " << q << " is removed" << dendl; | |
381 | all_purged[id].push_back(q); | |
382 | ++num; | |
383 | } | |
384 | } | |
385 | } | |
386 | ||
387 | dout(10) << __func__ << " " << num << " now removed" << dendl; | |
388 | if (num) { | |
389 | bufferlist bl; | |
390 | using ceph::encode; | |
391 | encode(all_purged, bl); | |
392 | do_server_update(bl); | |
393 | } | |
394 | } | |
395 | ||
7c673cae FG |
396 | |
397 | void SnapServer::dump(Formatter *f) const | |
398 | { | |
399 | f->open_object_section("snapserver"); | |
400 | ||
11fdf7f2 TL |
401 | f->dump_int("last_snap", last_snap); |
402 | f->dump_int("last_created", last_created); | |
403 | f->dump_int("last_destroyed", last_destroyed); | |
7c673cae FG |
404 | |
405 | f->open_array_section("pending_noop"); | |
406 | for(set<version_t>::const_iterator i = pending_noop.begin(); i != pending_noop.end(); ++i) { | |
407 | f->dump_unsigned("version", *i); | |
408 | } | |
409 | f->close_section(); | |
410 | ||
411 | f->open_array_section("snaps"); | |
412 | for (map<snapid_t, SnapInfo>::const_iterator i = snaps.begin(); i != snaps.end(); ++i) { | |
413 | f->open_object_section("snap"); | |
414 | i->second.dump(f); | |
415 | f->close_section(); | |
416 | } | |
417 | f->close_section(); | |
418 | ||
419 | f->open_object_section("need_to_purge"); | |
420 | for (map<int, set<snapid_t> >::const_iterator i = need_to_purge.begin(); i != need_to_purge.end(); ++i) { | |
f67539c2 TL |
421 | CachedStackStringStream css; |
422 | *css << i->first; | |
423 | f->open_array_section(css->strv()); | |
7c673cae FG |
424 | for (set<snapid_t>::const_iterator s = i->second.begin(); s != i->second.end(); ++s) { |
425 | f->dump_unsigned("snapid", s->val); | |
426 | } | |
427 | f->close_section(); | |
428 | } | |
429 | f->close_section(); | |
430 | ||
431 | f->open_array_section("pending_update"); | |
432 | for(map<version_t, SnapInfo>::const_iterator i = pending_update.begin(); i != pending_update.end(); ++i) { | |
433 | f->open_object_section("snap"); | |
434 | f->dump_unsigned("version", i->first); | |
435 | f->open_object_section("snapinfo"); | |
436 | i->second.dump(f); | |
437 | f->close_section(); | |
438 | f->close_section(); | |
439 | } | |
440 | f->close_section(); | |
441 | ||
442 | f->open_array_section("pending_destroy"); | |
443 | for(map<version_t, pair<snapid_t, snapid_t> >::const_iterator i = pending_destroy.begin(); i != pending_destroy.end(); ++i) { | |
444 | f->open_object_section("snap"); | |
445 | f->dump_unsigned("version", i->first); | |
446 | f->dump_unsigned("removed_snap", i->second.first); | |
447 | f->dump_unsigned("seq", i->second.second); | |
448 | f->close_section(); | |
449 | } | |
450 | f->close_section(); | |
451 | ||
452 | f->close_section(); | |
453 | } | |
454 | ||
9f95a23c | 455 | void SnapServer::generate_test_instances(std::list<SnapServer*>& ls) |
7c673cae FG |
456 | { |
457 | list<SnapInfo*> snapinfo_instances; | |
458 | SnapInfo::generate_test_instances(snapinfo_instances); | |
459 | SnapInfo populated_snapinfo = *(snapinfo_instances.back()); | |
9f95a23c TL |
460 | for (auto& info : snapinfo_instances) { |
461 | delete info; | |
462 | info = nullptr; | |
7c673cae FG |
463 | } |
464 | ||
465 | SnapServer *blank = new SnapServer(); | |
466 | ls.push_back(blank); | |
467 | SnapServer *populated = new SnapServer(); | |
468 | populated->last_snap = 123; | |
469 | populated->snaps[456] = populated_snapinfo; | |
470 | populated->need_to_purge[2].insert(012); | |
471 | populated->pending_update[234] = populated_snapinfo; | |
472 | populated->pending_destroy[345].first = 567; | |
473 | populated->pending_destroy[345].second = 768; | |
474 | populated->pending_noop.insert(890); | |
475 | ||
476 | ls.push_back(populated); | |
11fdf7f2 TL |
477 | } |
478 | ||
479 | bool SnapServer::force_update(snapid_t last, snapid_t v2_since, | |
480 | map<snapid_t, SnapInfo>& _snaps) | |
481 | { | |
482 | bool modified = false; | |
483 | if (last > last_snap) { | |
484 | derr << " updating last_snap " << last_snap << " -> " << last << dendl; | |
485 | last_snap = last; | |
486 | last_created = last; | |
487 | last_destroyed = last; | |
488 | modified = true; | |
489 | } | |
490 | if (v2_since > snaprealm_v2_since) { | |
491 | derr << " updating snaprealm_v2_since " << snaprealm_v2_since | |
492 | << " -> " << v2_since << dendl; | |
493 | snaprealm_v2_since = v2_since; | |
494 | modified = true; | |
495 | } | |
496 | if (snaps != _snaps) { | |
497 | derr << " updating snaps {" << snaps << "} -> {" << _snaps << "}" << dendl; | |
498 | snaps = _snaps; | |
499 | modified = true; | |
500 | } | |
7c673cae | 501 | |
11fdf7f2 TL |
502 | if (modified) { |
503 | need_to_purge.clear(); | |
504 | pending_update.clear(); | |
505 | pending_destroy.clear(); | |
506 | pending_noop.clear(); | |
507 | MDSTableServer::reset_state(); | |
508 | } | |
509 | return modified; | |
7c673cae | 510 | } |