]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/SnapServer.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / mds / SnapServer.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include "SnapServer.h"
16#include "MDSRank.h"
17#include "osd/OSDMap.h"
18#include "osdc/Objecter.h"
19#include "mon/MonClient.h"
20
21#include "include/types.h"
22#include "messages/MMDSTableRequest.h"
23#include "messages/MRemoveSnaps.h"
24
25#include "msg/Messenger.h"
26
27#include "common/config.h"
11fdf7f2 28#include "include/ceph_assert.h"
7c673cae
FG
29
30#define dout_context g_ceph_context
31#define dout_subsys ceph_subsys_mds
32#undef dout_prefix
33#define dout_prefix *_dout << "mds." << rank << ".snap "
34
35
36void SnapServer::reset_state()
37{
38 last_snap = 1; /* snapid 1 reserved for initial root snaprealm */
39 snaps.clear();
40 need_to_purge.clear();
11fdf7f2
TL
41 pending_update.clear();
42 pending_destroy.clear();
43 pending_noop.clear();
7c673cae
FG
44
45 // find any removed snapshot in data pools
46 if (mds) { // only if I'm running in a live MDS
47 snapid_t first_free = 0;
48 mds->objecter->with_osdmap([&](const OSDMap& o) {
49 for (const auto p : mds->mdsmap->get_data_pools()) {
50 const pg_pool_t *pi = o.get_pg_pool(p);
51 if (!pi) {
52 // If pool isn't in OSDMap yet then can't have any snaps
53 // needing removal, skip.
54 continue;
55 }
11fdf7f2
TL
56 if (pi->snap_seq > first_free) {
57 first_free = pi->snap_seq;
58 }
7c673cae
FG
59 }
60 });
61 if (first_free > last_snap)
62 last_snap = first_free;
63 }
11fdf7f2
TL
64 last_created = last_snap;
65 last_destroyed = last_snap;
66 snaprealm_v2_since = last_snap + 1;
67
68 MDSTableServer::reset_state();
7c673cae
FG
69}
70
71
72// SERVER
73
11fdf7f2 74void SnapServer::_prepare(const bufferlist& bl, uint64_t reqid, mds_rank_t bymds, bufferlist& out)
7c673cae 75{
11fdf7f2
TL
76 using ceph::decode;
77 using ceph::encode;
78 auto p = bl.cbegin();
7c673cae 79 __u32 op;
11fdf7f2 80 decode(op, p);
7c673cae
FG
81
82 switch (op) {
83 case TABLE_OP_CREATE:
84 {
7c673cae 85 SnapInfo info;
11fdf7f2 86 decode(info.ino, p);
7c673cae 87 if (!p.end()) {
11fdf7f2
TL
88 decode(info.name, p);
89 decode(info.stamp, p);
7c673cae 90 info.snapid = ++last_snap;
7c673cae
FG
91 pending_update[version] = info;
92 dout(10) << "prepare v" << version << " create " << info << dendl;
93 } else {
94 pending_noop.insert(version);
95 dout(10) << "prepare v" << version << " noop" << dendl;
96 }
11fdf7f2
TL
97
98 encode(last_snap, out);
7c673cae
FG
99 }
100 break;
101
102 case TABLE_OP_DESTROY:
103 {
104 inodeno_t ino;
105 snapid_t snapid;
11fdf7f2
TL
106 decode(ino, p); // not used, currently.
107 decode(snapid, p);
7c673cae
FG
108
109 // bump last_snap... we use it as a version value on the snaprealm.
110 ++last_snap;
111
112 pending_destroy[version] = pair<snapid_t,snapid_t>(snapid, last_snap);
113 dout(10) << "prepare v" << version << " destroy " << snapid << " seq " << last_snap << dendl;
114
11fdf7f2 115 encode(last_snap, out);
7c673cae
FG
116 }
117 break;
118
119 case TABLE_OP_UPDATE:
120 {
121 SnapInfo info;
11fdf7f2
TL
122 decode(info.ino, p);
123 decode(info.snapid, p);
124 decode(info.name, p);
125 decode(info.stamp, p);
7c673cae 126
7c673cae
FG
127 pending_update[version] = info;
128 dout(10) << "prepare v" << version << " update " << info << dendl;
7c673cae
FG
129 }
130 break;
131
132 default:
133 ceph_abort();
134 }
135 //dump();
136}
137
11fdf7f2 138void SnapServer::_get_reply_buffer(version_t tid, bufferlist *pbl) const
7c673cae 139{
11fdf7f2
TL
140 using ceph::encode;
141 auto p = pending_update.find(tid);
142 if (p != pending_update.end()) {
143 if (pbl && !snaps.count(p->second.snapid)) // create
144 encode(p->second.snapid, *pbl);
145 return;
146 }
147 auto q = pending_destroy.find(tid);
148 if (q != pending_destroy.end()) {
149 if (pbl)
150 encode(q->second.second, *pbl);
151 return;
152 }
153 auto r = pending_noop.find(tid);
154 if (r != pending_noop.end()) {
155 if (pbl)
156 encode(last_snap, *pbl);
157 return;
158 }
159 assert (0 == "tid not found");
7c673cae
FG
160}
161
9f95a23c 162void SnapServer::_commit(version_t tid, cref_t<MMDSTableRequest> req)
7c673cae
FG
163{
164 if (pending_update.count(tid)) {
165 SnapInfo &info = pending_update[tid];
166 string opname;
11fdf7f2
TL
167 if (snaps.count(info.snapid)) {
168 opname = "update";
169 if (info.stamp == utime_t())
170 info.stamp = snaps[info.snapid].stamp;
171 } else {
7c673cae 172 opname = "create";
11fdf7f2
TL
173 if (info.snapid > last_created)
174 last_created = info.snapid;
175 }
7c673cae
FG
176 dout(7) << "commit " << tid << " " << opname << " " << info << dendl;
177 snaps[info.snapid] = info;
178 pending_update.erase(tid);
179 }
180
181 else if (pending_destroy.count(tid)) {
182 snapid_t sn = pending_destroy[tid].first;
183 snapid_t seq = pending_destroy[tid].second;
184 dout(7) << "commit " << tid << " destroy " << sn << " seq " << seq << dendl;
185 snaps.erase(sn);
11fdf7f2
TL
186 if (seq > last_destroyed)
187 last_destroyed = seq;
7c673cae 188
31f18b77
FG
189 for (const auto p : mds->mdsmap->get_data_pools()) {
190 need_to_purge[p].insert(sn);
191 need_to_purge[p].insert(seq);
7c673cae
FG
192 }
193
194 pending_destroy.erase(tid);
195 }
196 else if (pending_noop.count(tid)) {
197 dout(7) << "commit " << tid << " noop" << dendl;
198 pending_noop.erase(tid);
199 }
200 else
201 ceph_abort();
202
7c673cae 203 //dump();
7c673cae
FG
204}
205
206void SnapServer::_rollback(version_t tid)
207{
208 if (pending_update.count(tid)) {
209 SnapInfo &info = pending_update[tid];
210 string opname;
11fdf7f2
TL
211 if (snaps.count(info.snapid))
212 opname = "update";
7c673cae 213 else
11fdf7f2 214 opname = "create";
7c673cae
FG
215 dout(7) << "rollback " << tid << " " << opname << " " << info << dendl;
216 pending_update.erase(tid);
217 }
218
219 else if (pending_destroy.count(tid)) {
220 dout(7) << "rollback " << tid << " destroy " << pending_destroy[tid] << dendl;
221 pending_destroy.erase(tid);
222 }
223
224 else if (pending_noop.count(tid)) {
225 dout(7) << "rollback " << tid << " noop" << dendl;
226 pending_noop.erase(tid);
227 }
228
229 else
230 ceph_abort();
231
7c673cae
FG
232 //dump();
233}
234
235void SnapServer::_server_update(bufferlist& bl)
236{
11fdf7f2
TL
237 using ceph::decode;
238 auto p = bl.cbegin();
7c673cae 239 map<int, vector<snapid_t> > purge;
11fdf7f2 240 decode(purge, p);
7c673cae
FG
241
242 dout(7) << "_server_update purged " << purge << dendl;
243 for (map<int, vector<snapid_t> >::iterator p = purge.begin();
244 p != purge.end();
245 ++p) {
246 for (vector<snapid_t>::iterator q = p->second.begin();
247 q != p->second.end();
248 ++q)
249 need_to_purge[p->first].erase(*q);
250 if (need_to_purge[p->first].empty())
251 need_to_purge.erase(p->first);
252 }
7c673cae
FG
253}
254
11fdf7f2 255bool SnapServer::_notify_prep(version_t tid)
7c673cae 256{
11fdf7f2
TL
257 using ceph::encode;
258 bufferlist bl;
259 char type = 'F';
260 encode(type, bl);
261 encode(snaps, bl);
262 encode(pending_update, bl);
263 encode(pending_destroy, bl);
264 encode(last_created, bl);
265 encode(last_destroyed, bl);
266 ceph_assert(version == tid);
267
268 for (auto &p : active_clients) {
9f95a23c 269 auto m = make_message<MMDSTableRequest>(table, TABLESERVER_OP_NOTIFY_PREP, 0, version);
11fdf7f2
TL
270 m->bl = bl;
271 mds->send_message_mds(m, p);
272 }
273 return true;
7c673cae
FG
274}
275
9f95a23c 276void SnapServer::handle_query(const cref_t<MMDSTableRequest> &req)
11fdf7f2
TL
277{
278 using ceph::encode;
279 using ceph::decode;
280 char op;
281 auto p = req->bl.cbegin();
282 decode(op, p);
7c673cae 283
9f95a23c 284 auto reply = make_message<MMDSTableRequest>(table, TABLESERVER_OP_QUERY_REPLY, req->reqid, version);
11fdf7f2
TL
285
286 switch (op) {
287 case 'F': // full
288 version_t have_version;
289 decode(have_version, p);
290 ceph_assert(have_version <= version);
291 if (have_version == version) {
292 char type = 'U';
293 encode(type, reply->bl);
294 } else {
295 char type = 'F';
296 encode(type, reply->bl);
297 encode(snaps, reply->bl);
298 encode(pending_update, reply->bl);
299 encode(pending_destroy, reply->bl);
300 encode(last_created, reply->bl);
301 encode(last_destroyed, reply->bl);
302 }
303 // FIXME: implement incremental change
304 break;
305 default:
306 ceph_abort();
307 };
308
309 mds->send_message(reply, req->get_connection());
310}
7c673cae
FG
311
312void SnapServer::check_osd_map(bool force)
313{
314 if (!force && version == last_checked_osdmap) {
315 dout(10) << "check_osd_map - version unchanged" << dendl;
316 return;
317 }
318 dout(10) << "check_osd_map need_to_purge=" << need_to_purge << dendl;
319
9f95a23c
TL
320 map<int32_t, vector<snapid_t> > all_purge;
321 map<int32_t, vector<snapid_t> > all_purged;
7c673cae 322
9f95a23c
TL
323 // NOTE: this is only needed for support during upgrades from pre-octopus,
324 // since starting with octopus we now get an explicit ack after we remove a
325 // snap.
7c673cae
FG
326 mds->objecter->with_osdmap(
327 [this, &all_purged, &all_purge](const OSDMap& osdmap) {
328 for (const auto& p : need_to_purge) {
329 int id = p.first;
330 const pg_pool_t *pi = osdmap.get_pg_pool(id);
331 if (pi == NULL) {
332 // The pool is gone. So are the snapshots.
333 all_purged[id] = std::vector<snapid_t>(p.second.begin(),
334 p.second.end());
335 continue;
336 }
337
338 for (const auto& q : p.second) {
339 if (pi->is_removed_snap(q)) {
340 dout(10) << " osdmap marks " << q << " as removed" << dendl;
341 all_purged[id].push_back(q);
342 } else {
343 all_purge[id].push_back(q);
344 }
345 }
346 }
347 });
348
349 if (!all_purged.empty()) {
350 // prepare to remove from need_to_purge list
351 bufferlist bl;
11fdf7f2
TL
352 using ceph::encode;
353 encode(all_purged, bl);
7c673cae
FG
354 do_server_update(bl);
355 }
356
357 if (!all_purge.empty()) {
358 dout(10) << "requesting removal of " << all_purge << dendl;
9f95a23c 359 auto m = make_message<MRemoveSnaps>(all_purge);
11fdf7f2 360 mon_client->send_mon_message(m.detach());
7c673cae
FG
361 }
362
363 last_checked_osdmap = version;
364}
365
9f95a23c
TL
366void SnapServer::handle_remove_snaps(const cref_t<MRemoveSnaps> &m)
367{
368 dout(10) << __func__ << " " << *m << dendl;
369
370 map<int32_t, vector<snapid_t> > all_purged;
371 int num = 0;
372
373 for (const auto& [id, snaps] : need_to_purge) {
374 auto i = m->snaps.find(id);
375 if (i == m->snaps.end()) {
376 continue;
377 }
378 for (const auto& q : snaps) {
379 if (std::find(i->second.begin(), i->second.end(), q) != i->second.end()) {
380 dout(10) << " mon reports " << q << " is removed" << dendl;
381 all_purged[id].push_back(q);
382 ++num;
383 }
384 }
385 }
386
387 dout(10) << __func__ << " " << num << " now removed" << dendl;
388 if (num) {
389 bufferlist bl;
390 using ceph::encode;
391 encode(all_purged, bl);
392 do_server_update(bl);
393 }
394}
395
7c673cae
FG
396
397void SnapServer::dump(Formatter *f) const
398{
399 f->open_object_section("snapserver");
400
11fdf7f2
TL
401 f->dump_int("last_snap", last_snap);
402 f->dump_int("last_created", last_created);
403 f->dump_int("last_destroyed", last_destroyed);
7c673cae
FG
404
405 f->open_array_section("pending_noop");
406 for(set<version_t>::const_iterator i = pending_noop.begin(); i != pending_noop.end(); ++i) {
407 f->dump_unsigned("version", *i);
408 }
409 f->close_section();
410
411 f->open_array_section("snaps");
412 for (map<snapid_t, SnapInfo>::const_iterator i = snaps.begin(); i != snaps.end(); ++i) {
413 f->open_object_section("snap");
414 i->second.dump(f);
415 f->close_section();
416 }
417 f->close_section();
418
419 f->open_object_section("need_to_purge");
420 for (map<int, set<snapid_t> >::const_iterator i = need_to_purge.begin(); i != need_to_purge.end(); ++i) {
f67539c2
TL
421 CachedStackStringStream css;
422 *css << i->first;
423 f->open_array_section(css->strv());
7c673cae
FG
424 for (set<snapid_t>::const_iterator s = i->second.begin(); s != i->second.end(); ++s) {
425 f->dump_unsigned("snapid", s->val);
426 }
427 f->close_section();
428 }
429 f->close_section();
430
431 f->open_array_section("pending_update");
432 for(map<version_t, SnapInfo>::const_iterator i = pending_update.begin(); i != pending_update.end(); ++i) {
433 f->open_object_section("snap");
434 f->dump_unsigned("version", i->first);
435 f->open_object_section("snapinfo");
436 i->second.dump(f);
437 f->close_section();
438 f->close_section();
439 }
440 f->close_section();
441
442 f->open_array_section("pending_destroy");
443 for(map<version_t, pair<snapid_t, snapid_t> >::const_iterator i = pending_destroy.begin(); i != pending_destroy.end(); ++i) {
444 f->open_object_section("snap");
445 f->dump_unsigned("version", i->first);
446 f->dump_unsigned("removed_snap", i->second.first);
447 f->dump_unsigned("seq", i->second.second);
448 f->close_section();
449 }
450 f->close_section();
451
452 f->close_section();
453}
454
9f95a23c 455void SnapServer::generate_test_instances(std::list<SnapServer*>& ls)
7c673cae
FG
456{
457 list<SnapInfo*> snapinfo_instances;
458 SnapInfo::generate_test_instances(snapinfo_instances);
459 SnapInfo populated_snapinfo = *(snapinfo_instances.back());
9f95a23c
TL
460 for (auto& info : snapinfo_instances) {
461 delete info;
462 info = nullptr;
7c673cae
FG
463 }
464
465 SnapServer *blank = new SnapServer();
466 ls.push_back(blank);
467 SnapServer *populated = new SnapServer();
468 populated->last_snap = 123;
469 populated->snaps[456] = populated_snapinfo;
470 populated->need_to_purge[2].insert(012);
471 populated->pending_update[234] = populated_snapinfo;
472 populated->pending_destroy[345].first = 567;
473 populated->pending_destroy[345].second = 768;
474 populated->pending_noop.insert(890);
475
476 ls.push_back(populated);
11fdf7f2
TL
477}
478
479bool SnapServer::force_update(snapid_t last, snapid_t v2_since,
480 map<snapid_t, SnapInfo>& _snaps)
481{
482 bool modified = false;
483 if (last > last_snap) {
484 derr << " updating last_snap " << last_snap << " -> " << last << dendl;
485 last_snap = last;
486 last_created = last;
487 last_destroyed = last;
488 modified = true;
489 }
490 if (v2_since > snaprealm_v2_since) {
491 derr << " updating snaprealm_v2_since " << snaprealm_v2_since
492 << " -> " << v2_since << dendl;
493 snaprealm_v2_since = v2_since;
494 modified = true;
495 }
496 if (snaps != _snaps) {
497 derr << " updating snaps {" << snaps << "} -> {" << _snaps << "}" << dendl;
498 snaps = _snaps;
499 modified = true;
500 }
7c673cae 501
11fdf7f2
TL
502 if (modified) {
503 need_to_purge.clear();
504 pending_update.clear();
505 pending_destroy.clear();
506 pending_noop.clear();
507 MDSTableServer::reset_state();
508 }
509 return modified;
7c673cae 510}