]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "SnapServer.h" | |
16 | #include "MDSRank.h" | |
17 | #include "osd/OSDMap.h" | |
18 | #include "osdc/Objecter.h" | |
19 | #include "mon/MonClient.h" | |
20 | ||
21 | #include "include/types.h" | |
22 | #include "messages/MMDSTableRequest.h" | |
23 | #include "messages/MRemoveSnaps.h" | |
24 | ||
25 | #include "msg/Messenger.h" | |
26 | ||
27 | #include "common/config.h" | |
11fdf7f2 | 28 | #include "include/ceph_assert.h" |
7c673cae FG |
29 | |
30 | #define dout_context g_ceph_context | |
31 | #define dout_subsys ceph_subsys_mds | |
32 | #undef dout_prefix | |
33 | #define dout_prefix *_dout << "mds." << rank << ".snap " | |
34 | ||
20effc67 | 35 | using namespace std; |
7c673cae FG |
36 | |
37 | void SnapServer::reset_state() | |
38 | { | |
39 | last_snap = 1; /* snapid 1 reserved for initial root snaprealm */ | |
40 | snaps.clear(); | |
41 | need_to_purge.clear(); | |
11fdf7f2 TL |
42 | pending_update.clear(); |
43 | pending_destroy.clear(); | |
44 | pending_noop.clear(); | |
7c673cae FG |
45 | |
46 | // find any removed snapshot in data pools | |
47 | if (mds) { // only if I'm running in a live MDS | |
48 | snapid_t first_free = 0; | |
49 | mds->objecter->with_osdmap([&](const OSDMap& o) { | |
50 | for (const auto p : mds->mdsmap->get_data_pools()) { | |
51 | const pg_pool_t *pi = o.get_pg_pool(p); | |
52 | if (!pi) { | |
53 | // If pool isn't in OSDMap yet then can't have any snaps | |
54 | // needing removal, skip. | |
55 | continue; | |
56 | } | |
11fdf7f2 TL |
57 | if (pi->snap_seq > first_free) { |
58 | first_free = pi->snap_seq; | |
59 | } | |
7c673cae FG |
60 | } |
61 | }); | |
62 | if (first_free > last_snap) | |
63 | last_snap = first_free; | |
64 | } | |
11fdf7f2 TL |
65 | last_created = last_snap; |
66 | last_destroyed = last_snap; | |
67 | snaprealm_v2_since = last_snap + 1; | |
68 | ||
69 | MDSTableServer::reset_state(); | |
7c673cae FG |
70 | } |
71 | ||
72 | ||
73 | // SERVER | |
74 | ||
11fdf7f2 | 75 | void SnapServer::_prepare(const bufferlist& bl, uint64_t reqid, mds_rank_t bymds, bufferlist& out) |
7c673cae | 76 | { |
11fdf7f2 TL |
77 | using ceph::decode; |
78 | using ceph::encode; | |
79 | auto p = bl.cbegin(); | |
7c673cae | 80 | __u32 op; |
11fdf7f2 | 81 | decode(op, p); |
7c673cae FG |
82 | |
83 | switch (op) { | |
84 | case TABLE_OP_CREATE: | |
85 | { | |
7c673cae | 86 | SnapInfo info; |
11fdf7f2 | 87 | decode(info.ino, p); |
7c673cae | 88 | if (!p.end()) { |
11fdf7f2 TL |
89 | decode(info.name, p); |
90 | decode(info.stamp, p); | |
7c673cae | 91 | info.snapid = ++last_snap; |
7c673cae FG |
92 | pending_update[version] = info; |
93 | dout(10) << "prepare v" << version << " create " << info << dendl; | |
94 | } else { | |
95 | pending_noop.insert(version); | |
96 | dout(10) << "prepare v" << version << " noop" << dendl; | |
97 | } | |
11fdf7f2 TL |
98 | |
99 | encode(last_snap, out); | |
7c673cae FG |
100 | } |
101 | break; | |
102 | ||
103 | case TABLE_OP_DESTROY: | |
104 | { | |
105 | inodeno_t ino; | |
106 | snapid_t snapid; | |
11fdf7f2 TL |
107 | decode(ino, p); // not used, currently. |
108 | decode(snapid, p); | |
7c673cae FG |
109 | |
110 | // bump last_snap... we use it as a version value on the snaprealm. | |
111 | ++last_snap; | |
112 | ||
20effc67 | 113 | pending_destroy[version] = {snapid, last_snap}; |
7c673cae FG |
114 | dout(10) << "prepare v" << version << " destroy " << snapid << " seq " << last_snap << dendl; |
115 | ||
11fdf7f2 | 116 | encode(last_snap, out); |
7c673cae FG |
117 | } |
118 | break; | |
119 | ||
120 | case TABLE_OP_UPDATE: | |
121 | { | |
122 | SnapInfo info; | |
11fdf7f2 TL |
123 | decode(info.ino, p); |
124 | decode(info.snapid, p); | |
125 | decode(info.name, p); | |
126 | decode(info.stamp, p); | |
7c673cae | 127 | |
7c673cae FG |
128 | pending_update[version] = info; |
129 | dout(10) << "prepare v" << version << " update " << info << dendl; | |
7c673cae FG |
130 | } |
131 | break; | |
132 | ||
133 | default: | |
134 | ceph_abort(); | |
135 | } | |
136 | //dump(); | |
137 | } | |
138 | ||
11fdf7f2 | 139 | void SnapServer::_get_reply_buffer(version_t tid, bufferlist *pbl) const |
7c673cae | 140 | { |
11fdf7f2 TL |
141 | using ceph::encode; |
142 | auto p = pending_update.find(tid); | |
143 | if (p != pending_update.end()) { | |
144 | if (pbl && !snaps.count(p->second.snapid)) // create | |
145 | encode(p->second.snapid, *pbl); | |
146 | return; | |
147 | } | |
148 | auto q = pending_destroy.find(tid); | |
149 | if (q != pending_destroy.end()) { | |
150 | if (pbl) | |
151 | encode(q->second.second, *pbl); | |
152 | return; | |
153 | } | |
154 | auto r = pending_noop.find(tid); | |
155 | if (r != pending_noop.end()) { | |
156 | if (pbl) | |
157 | encode(last_snap, *pbl); | |
158 | return; | |
159 | } | |
160 | assert (0 == "tid not found"); | |
7c673cae FG |
161 | } |
162 | ||
9f95a23c | 163 | void SnapServer::_commit(version_t tid, cref_t<MMDSTableRequest> req) |
7c673cae FG |
164 | { |
165 | if (pending_update.count(tid)) { | |
166 | SnapInfo &info = pending_update[tid]; | |
167 | string opname; | |
11fdf7f2 TL |
168 | if (snaps.count(info.snapid)) { |
169 | opname = "update"; | |
170 | if (info.stamp == utime_t()) | |
171 | info.stamp = snaps[info.snapid].stamp; | |
172 | } else { | |
7c673cae | 173 | opname = "create"; |
11fdf7f2 TL |
174 | if (info.snapid > last_created) |
175 | last_created = info.snapid; | |
176 | } | |
7c673cae FG |
177 | dout(7) << "commit " << tid << " " << opname << " " << info << dendl; |
178 | snaps[info.snapid] = info; | |
179 | pending_update.erase(tid); | |
180 | } | |
181 | ||
182 | else if (pending_destroy.count(tid)) { | |
183 | snapid_t sn = pending_destroy[tid].first; | |
184 | snapid_t seq = pending_destroy[tid].second; | |
185 | dout(7) << "commit " << tid << " destroy " << sn << " seq " << seq << dendl; | |
186 | snaps.erase(sn); | |
11fdf7f2 TL |
187 | if (seq > last_destroyed) |
188 | last_destroyed = seq; | |
7c673cae | 189 | |
31f18b77 FG |
190 | for (const auto p : mds->mdsmap->get_data_pools()) { |
191 | need_to_purge[p].insert(sn); | |
192 | need_to_purge[p].insert(seq); | |
7c673cae FG |
193 | } |
194 | ||
195 | pending_destroy.erase(tid); | |
196 | } | |
197 | else if (pending_noop.count(tid)) { | |
198 | dout(7) << "commit " << tid << " noop" << dendl; | |
199 | pending_noop.erase(tid); | |
200 | } | |
201 | else | |
202 | ceph_abort(); | |
203 | ||
7c673cae | 204 | //dump(); |
7c673cae FG |
205 | } |
206 | ||
207 | void SnapServer::_rollback(version_t tid) | |
208 | { | |
209 | if (pending_update.count(tid)) { | |
210 | SnapInfo &info = pending_update[tid]; | |
211 | string opname; | |
11fdf7f2 TL |
212 | if (snaps.count(info.snapid)) |
213 | opname = "update"; | |
7c673cae | 214 | else |
11fdf7f2 | 215 | opname = "create"; |
7c673cae FG |
216 | dout(7) << "rollback " << tid << " " << opname << " " << info << dendl; |
217 | pending_update.erase(tid); | |
218 | } | |
219 | ||
220 | else if (pending_destroy.count(tid)) { | |
221 | dout(7) << "rollback " << tid << " destroy " << pending_destroy[tid] << dendl; | |
222 | pending_destroy.erase(tid); | |
223 | } | |
224 | ||
225 | else if (pending_noop.count(tid)) { | |
226 | dout(7) << "rollback " << tid << " noop" << dendl; | |
227 | pending_noop.erase(tid); | |
228 | } | |
229 | ||
230 | else | |
231 | ceph_abort(); | |
232 | ||
7c673cae FG |
233 | //dump(); |
234 | } | |
235 | ||
236 | void SnapServer::_server_update(bufferlist& bl) | |
237 | { | |
11fdf7f2 TL |
238 | using ceph::decode; |
239 | auto p = bl.cbegin(); | |
7c673cae | 240 | map<int, vector<snapid_t> > purge; |
11fdf7f2 | 241 | decode(purge, p); |
7c673cae FG |
242 | |
243 | dout(7) << "_server_update purged " << purge << dendl; | |
244 | for (map<int, vector<snapid_t> >::iterator p = purge.begin(); | |
245 | p != purge.end(); | |
246 | ++p) { | |
247 | for (vector<snapid_t>::iterator q = p->second.begin(); | |
248 | q != p->second.end(); | |
249 | ++q) | |
250 | need_to_purge[p->first].erase(*q); | |
251 | if (need_to_purge[p->first].empty()) | |
252 | need_to_purge.erase(p->first); | |
253 | } | |
7c673cae FG |
254 | } |
255 | ||
11fdf7f2 | 256 | bool SnapServer::_notify_prep(version_t tid) |
7c673cae | 257 | { |
11fdf7f2 TL |
258 | using ceph::encode; |
259 | bufferlist bl; | |
260 | char type = 'F'; | |
261 | encode(type, bl); | |
262 | encode(snaps, bl); | |
263 | encode(pending_update, bl); | |
264 | encode(pending_destroy, bl); | |
265 | encode(last_created, bl); | |
266 | encode(last_destroyed, bl); | |
267 | ceph_assert(version == tid); | |
268 | ||
269 | for (auto &p : active_clients) { | |
9f95a23c | 270 | auto m = make_message<MMDSTableRequest>(table, TABLESERVER_OP_NOTIFY_PREP, 0, version); |
11fdf7f2 TL |
271 | m->bl = bl; |
272 | mds->send_message_mds(m, p); | |
273 | } | |
274 | return true; | |
7c673cae FG |
275 | } |
276 | ||
9f95a23c | 277 | void SnapServer::handle_query(const cref_t<MMDSTableRequest> &req) |
11fdf7f2 TL |
278 | { |
279 | using ceph::encode; | |
280 | using ceph::decode; | |
281 | char op; | |
282 | auto p = req->bl.cbegin(); | |
283 | decode(op, p); | |
7c673cae | 284 | |
9f95a23c | 285 | auto reply = make_message<MMDSTableRequest>(table, TABLESERVER_OP_QUERY_REPLY, req->reqid, version); |
11fdf7f2 TL |
286 | |
287 | switch (op) { | |
288 | case 'F': // full | |
289 | version_t have_version; | |
290 | decode(have_version, p); | |
291 | ceph_assert(have_version <= version); | |
292 | if (have_version == version) { | |
293 | char type = 'U'; | |
294 | encode(type, reply->bl); | |
295 | } else { | |
296 | char type = 'F'; | |
297 | encode(type, reply->bl); | |
298 | encode(snaps, reply->bl); | |
299 | encode(pending_update, reply->bl); | |
300 | encode(pending_destroy, reply->bl); | |
301 | encode(last_created, reply->bl); | |
302 | encode(last_destroyed, reply->bl); | |
303 | } | |
304 | // FIXME: implement incremental change | |
305 | break; | |
306 | default: | |
307 | ceph_abort(); | |
308 | }; | |
309 | ||
310 | mds->send_message(reply, req->get_connection()); | |
311 | } | |
7c673cae FG |
312 | |
313 | void SnapServer::check_osd_map(bool force) | |
314 | { | |
315 | if (!force && version == last_checked_osdmap) { | |
316 | dout(10) << "check_osd_map - version unchanged" << dendl; | |
317 | return; | |
318 | } | |
319 | dout(10) << "check_osd_map need_to_purge=" << need_to_purge << dendl; | |
320 | ||
9f95a23c TL |
321 | map<int32_t, vector<snapid_t> > all_purge; |
322 | map<int32_t, vector<snapid_t> > all_purged; | |
7c673cae | 323 | |
9f95a23c TL |
324 | // NOTE: this is only needed for support during upgrades from pre-octopus, |
325 | // since starting with octopus we now get an explicit ack after we remove a | |
326 | // snap. | |
7c673cae FG |
327 | mds->objecter->with_osdmap( |
328 | [this, &all_purged, &all_purge](const OSDMap& osdmap) { | |
329 | for (const auto& p : need_to_purge) { | |
330 | int id = p.first; | |
331 | const pg_pool_t *pi = osdmap.get_pg_pool(id); | |
332 | if (pi == NULL) { | |
333 | // The pool is gone. So are the snapshots. | |
334 | all_purged[id] = std::vector<snapid_t>(p.second.begin(), | |
335 | p.second.end()); | |
336 | continue; | |
337 | } | |
338 | ||
339 | for (const auto& q : p.second) { | |
340 | if (pi->is_removed_snap(q)) { | |
341 | dout(10) << " osdmap marks " << q << " as removed" << dendl; | |
342 | all_purged[id].push_back(q); | |
343 | } else { | |
344 | all_purge[id].push_back(q); | |
345 | } | |
346 | } | |
347 | } | |
348 | }); | |
349 | ||
350 | if (!all_purged.empty()) { | |
351 | // prepare to remove from need_to_purge list | |
352 | bufferlist bl; | |
11fdf7f2 TL |
353 | using ceph::encode; |
354 | encode(all_purged, bl); | |
7c673cae FG |
355 | do_server_update(bl); |
356 | } | |
357 | ||
358 | if (!all_purge.empty()) { | |
359 | dout(10) << "requesting removal of " << all_purge << dendl; | |
9f95a23c | 360 | auto m = make_message<MRemoveSnaps>(all_purge); |
11fdf7f2 | 361 | mon_client->send_mon_message(m.detach()); |
7c673cae FG |
362 | } |
363 | ||
364 | last_checked_osdmap = version; | |
365 | } | |
366 | ||
9f95a23c TL |
367 | void SnapServer::handle_remove_snaps(const cref_t<MRemoveSnaps> &m) |
368 | { | |
369 | dout(10) << __func__ << " " << *m << dendl; | |
370 | ||
371 | map<int32_t, vector<snapid_t> > all_purged; | |
372 | int num = 0; | |
373 | ||
374 | for (const auto& [id, snaps] : need_to_purge) { | |
375 | auto i = m->snaps.find(id); | |
376 | if (i == m->snaps.end()) { | |
377 | continue; | |
378 | } | |
379 | for (const auto& q : snaps) { | |
380 | if (std::find(i->second.begin(), i->second.end(), q) != i->second.end()) { | |
381 | dout(10) << " mon reports " << q << " is removed" << dendl; | |
382 | all_purged[id].push_back(q); | |
383 | ++num; | |
384 | } | |
385 | } | |
386 | } | |
387 | ||
388 | dout(10) << __func__ << " " << num << " now removed" << dendl; | |
389 | if (num) { | |
390 | bufferlist bl; | |
391 | using ceph::encode; | |
392 | encode(all_purged, bl); | |
393 | do_server_update(bl); | |
394 | } | |
395 | } | |
396 | ||
7c673cae FG |
397 | |
398 | void SnapServer::dump(Formatter *f) const | |
399 | { | |
400 | f->open_object_section("snapserver"); | |
401 | ||
11fdf7f2 TL |
402 | f->dump_int("last_snap", last_snap); |
403 | f->dump_int("last_created", last_created); | |
404 | f->dump_int("last_destroyed", last_destroyed); | |
7c673cae FG |
405 | |
406 | f->open_array_section("pending_noop"); | |
407 | for(set<version_t>::const_iterator i = pending_noop.begin(); i != pending_noop.end(); ++i) { | |
408 | f->dump_unsigned("version", *i); | |
409 | } | |
410 | f->close_section(); | |
411 | ||
412 | f->open_array_section("snaps"); | |
413 | for (map<snapid_t, SnapInfo>::const_iterator i = snaps.begin(); i != snaps.end(); ++i) { | |
414 | f->open_object_section("snap"); | |
415 | i->second.dump(f); | |
416 | f->close_section(); | |
417 | } | |
418 | f->close_section(); | |
419 | ||
420 | f->open_object_section("need_to_purge"); | |
421 | for (map<int, set<snapid_t> >::const_iterator i = need_to_purge.begin(); i != need_to_purge.end(); ++i) { | |
f67539c2 TL |
422 | CachedStackStringStream css; |
423 | *css << i->first; | |
424 | f->open_array_section(css->strv()); | |
7c673cae FG |
425 | for (set<snapid_t>::const_iterator s = i->second.begin(); s != i->second.end(); ++s) { |
426 | f->dump_unsigned("snapid", s->val); | |
427 | } | |
428 | f->close_section(); | |
429 | } | |
430 | f->close_section(); | |
431 | ||
432 | f->open_array_section("pending_update"); | |
433 | for(map<version_t, SnapInfo>::const_iterator i = pending_update.begin(); i != pending_update.end(); ++i) { | |
434 | f->open_object_section("snap"); | |
435 | f->dump_unsigned("version", i->first); | |
436 | f->open_object_section("snapinfo"); | |
437 | i->second.dump(f); | |
438 | f->close_section(); | |
439 | f->close_section(); | |
440 | } | |
441 | f->close_section(); | |
442 | ||
443 | f->open_array_section("pending_destroy"); | |
444 | for(map<version_t, pair<snapid_t, snapid_t> >::const_iterator i = pending_destroy.begin(); i != pending_destroy.end(); ++i) { | |
445 | f->open_object_section("snap"); | |
446 | f->dump_unsigned("version", i->first); | |
447 | f->dump_unsigned("removed_snap", i->second.first); | |
448 | f->dump_unsigned("seq", i->second.second); | |
449 | f->close_section(); | |
450 | } | |
451 | f->close_section(); | |
452 | ||
453 | f->close_section(); | |
454 | } | |
455 | ||
9f95a23c | 456 | void SnapServer::generate_test_instances(std::list<SnapServer*>& ls) |
7c673cae FG |
457 | { |
458 | list<SnapInfo*> snapinfo_instances; | |
459 | SnapInfo::generate_test_instances(snapinfo_instances); | |
460 | SnapInfo populated_snapinfo = *(snapinfo_instances.back()); | |
9f95a23c TL |
461 | for (auto& info : snapinfo_instances) { |
462 | delete info; | |
463 | info = nullptr; | |
7c673cae FG |
464 | } |
465 | ||
466 | SnapServer *blank = new SnapServer(); | |
467 | ls.push_back(blank); | |
468 | SnapServer *populated = new SnapServer(); | |
469 | populated->last_snap = 123; | |
470 | populated->snaps[456] = populated_snapinfo; | |
471 | populated->need_to_purge[2].insert(012); | |
472 | populated->pending_update[234] = populated_snapinfo; | |
473 | populated->pending_destroy[345].first = 567; | |
474 | populated->pending_destroy[345].second = 768; | |
475 | populated->pending_noop.insert(890); | |
476 | ||
477 | ls.push_back(populated); | |
11fdf7f2 TL |
478 | } |
479 | ||
480 | bool SnapServer::force_update(snapid_t last, snapid_t v2_since, | |
481 | map<snapid_t, SnapInfo>& _snaps) | |
482 | { | |
483 | bool modified = false; | |
484 | if (last > last_snap) { | |
485 | derr << " updating last_snap " << last_snap << " -> " << last << dendl; | |
486 | last_snap = last; | |
487 | last_created = last; | |
488 | last_destroyed = last; | |
489 | modified = true; | |
490 | } | |
491 | if (v2_since > snaprealm_v2_since) { | |
492 | derr << " updating snaprealm_v2_since " << snaprealm_v2_since | |
493 | << " -> " << v2_since << dendl; | |
494 | snaprealm_v2_since = v2_since; | |
495 | modified = true; | |
496 | } | |
497 | if (snaps != _snaps) { | |
498 | derr << " updating snaps {" << snaps << "} -> {" << _snaps << "}" << dendl; | |
499 | snaps = _snaps; | |
500 | modified = true; | |
501 | } | |
7c673cae | 502 | |
11fdf7f2 TL |
503 | if (modified) { |
504 | need_to_purge.clear(); | |
505 | pending_update.clear(); | |
506 | pending_destroy.clear(); | |
507 | pending_noop.clear(); | |
508 | MDSTableServer::reset_state(); | |
509 | } | |
510 | return modified; | |
7c673cae | 511 | } |