1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include "SnapServer.h"
17 #include "osd/OSDMap.h"
18 #include "osdc/Objecter.h"
19 #include "mon/MonClient.h"
21 #include "include/types.h"
22 #include "messages/MMDSTableRequest.h"
23 #include "messages/MRemoveSnaps.h"
25 #include "msg/Messenger.h"
27 #include "common/config.h"
28 #include "include/ceph_assert.h"
30 #define dout_context g_ceph_context
31 #define dout_subsys ceph_subsys_mds
33 #define dout_prefix *_dout << "mds." << rank << ".snap "
37 void SnapServer::reset_state()
39 last_snap
= 1; /* snapid 1 reserved for initial root snaprealm */
41 need_to_purge
.clear();
42 pending_update
.clear();
43 pending_destroy
.clear();
46 // find any removed snapshot in data pools
47 if (mds
) { // only if I'm running in a live MDS
48 snapid_t first_free
= 0;
49 mds
->objecter
->with_osdmap([&](const OSDMap
& o
) {
50 for (const auto p
: mds
->mdsmap
->get_data_pools()) {
51 const pg_pool_t
*pi
= o
.get_pg_pool(p
);
53 // If pool isn't in OSDMap yet then can't have any snaps
54 // needing removal, skip.
57 if (pi
->snap_seq
> first_free
) {
58 first_free
= pi
->snap_seq
;
62 if (first_free
> last_snap
)
63 last_snap
= first_free
;
65 last_created
= last_snap
;
66 last_destroyed
= last_snap
;
67 snaprealm_v2_since
= last_snap
+ 1;
69 MDSTableServer::reset_state();
75 void SnapServer::_prepare(const bufferlist
& bl
, uint64_t reqid
, mds_rank_t bymds
, bufferlist
& out
)
90 decode(info
.stamp
, p
);
91 info
.snapid
= ++last_snap
;
92 pending_update
[version
] = info
;
93 dout(10) << "prepare v" << version
<< " create " << info
<< dendl
;
95 pending_noop
.insert(version
);
96 dout(10) << "prepare v" << version
<< " noop" << dendl
;
99 encode(last_snap
, out
);
103 case TABLE_OP_DESTROY
:
107 decode(ino
, p
); // not used, currently.
110 // bump last_snap... we use it as a version value on the snaprealm.
113 pending_destroy
[version
] = {snapid
, last_snap
};
114 dout(10) << "prepare v" << version
<< " destroy " << snapid
<< " seq " << last_snap
<< dendl
;
116 encode(last_snap
, out
);
120 case TABLE_OP_UPDATE
:
124 decode(info
.snapid
, p
);
125 decode(info
.name
, p
);
126 decode(info
.stamp
, p
);
128 pending_update
[version
] = info
;
129 dout(10) << "prepare v" << version
<< " update " << info
<< dendl
;
139 void SnapServer::_get_reply_buffer(version_t tid
, bufferlist
*pbl
) const
142 auto p
= pending_update
.find(tid
);
143 if (p
!= pending_update
.end()) {
144 if (pbl
&& !snaps
.count(p
->second
.snapid
)) // create
145 encode(p
->second
.snapid
, *pbl
);
148 auto q
= pending_destroy
.find(tid
);
149 if (q
!= pending_destroy
.end()) {
151 encode(q
->second
.second
, *pbl
);
154 auto r
= pending_noop
.find(tid
);
155 if (r
!= pending_noop
.end()) {
157 encode(last_snap
, *pbl
);
160 assert (0 == "tid not found");
163 void SnapServer::_commit(version_t tid
, cref_t
<MMDSTableRequest
> req
)
165 if (pending_update
.count(tid
)) {
166 SnapInfo
&info
= pending_update
[tid
];
168 if (snaps
.count(info
.snapid
)) {
170 if (info
.stamp
== utime_t())
171 info
.stamp
= snaps
[info
.snapid
].stamp
;
174 if (info
.snapid
> last_created
)
175 last_created
= info
.snapid
;
177 dout(7) << "commit " << tid
<< " " << opname
<< " " << info
<< dendl
;
178 snaps
[info
.snapid
] = info
;
179 pending_update
.erase(tid
);
182 else if (pending_destroy
.count(tid
)) {
183 snapid_t sn
= pending_destroy
[tid
].first
;
184 snapid_t seq
= pending_destroy
[tid
].second
;
185 dout(7) << "commit " << tid
<< " destroy " << sn
<< " seq " << seq
<< dendl
;
187 if (seq
> last_destroyed
)
188 last_destroyed
= seq
;
190 for (const auto p
: mds
->mdsmap
->get_data_pools()) {
191 need_to_purge
[p
].insert(sn
);
192 need_to_purge
[p
].insert(seq
);
195 pending_destroy
.erase(tid
);
197 else if (pending_noop
.count(tid
)) {
198 dout(7) << "commit " << tid
<< " noop" << dendl
;
199 pending_noop
.erase(tid
);
207 void SnapServer::_rollback(version_t tid
)
209 if (pending_update
.count(tid
)) {
210 SnapInfo
&info
= pending_update
[tid
];
212 if (snaps
.count(info
.snapid
))
216 dout(7) << "rollback " << tid
<< " " << opname
<< " " << info
<< dendl
;
217 pending_update
.erase(tid
);
220 else if (pending_destroy
.count(tid
)) {
221 dout(7) << "rollback " << tid
<< " destroy " << pending_destroy
[tid
] << dendl
;
222 pending_destroy
.erase(tid
);
225 else if (pending_noop
.count(tid
)) {
226 dout(7) << "rollback " << tid
<< " noop" << dendl
;
227 pending_noop
.erase(tid
);
236 void SnapServer::_server_update(bufferlist
& bl
)
239 auto p
= bl
.cbegin();
240 map
<int, vector
<snapid_t
> > purge
;
243 dout(7) << "_server_update purged " << purge
<< dendl
;
244 for (map
<int, vector
<snapid_t
> >::iterator p
= purge
.begin();
247 for (vector
<snapid_t
>::iterator q
= p
->second
.begin();
248 q
!= p
->second
.end();
250 need_to_purge
[p
->first
].erase(*q
);
251 if (need_to_purge
[p
->first
].empty())
252 need_to_purge
.erase(p
->first
);
256 bool SnapServer::_notify_prep(version_t tid
)
263 encode(pending_update
, bl
);
264 encode(pending_destroy
, bl
);
265 encode(last_created
, bl
);
266 encode(last_destroyed
, bl
);
267 ceph_assert(version
== tid
);
269 for (auto &p
: active_clients
) {
270 auto m
= make_message
<MMDSTableRequest
>(table
, TABLESERVER_OP_NOTIFY_PREP
, 0, version
);
272 mds
->send_message_mds(m
, p
);
277 void SnapServer::handle_query(const cref_t
<MMDSTableRequest
> &req
)
282 auto p
= req
->bl
.cbegin();
285 auto reply
= make_message
<MMDSTableRequest
>(table
, TABLESERVER_OP_QUERY_REPLY
, req
->reqid
, version
);
289 version_t have_version
;
290 decode(have_version
, p
);
291 ceph_assert(have_version
<= version
);
292 if (have_version
== version
) {
294 encode(type
, reply
->bl
);
297 encode(type
, reply
->bl
);
298 encode(snaps
, reply
->bl
);
299 encode(pending_update
, reply
->bl
);
300 encode(pending_destroy
, reply
->bl
);
301 encode(last_created
, reply
->bl
);
302 encode(last_destroyed
, reply
->bl
);
304 // FIXME: implement incremental change
310 mds
->send_message(reply
, req
->get_connection());
313 void SnapServer::check_osd_map(bool force
)
315 if (!force
&& version
== last_checked_osdmap
) {
316 dout(10) << "check_osd_map - version unchanged" << dendl
;
319 dout(10) << "check_osd_map need_to_purge=" << need_to_purge
<< dendl
;
321 map
<int32_t, vector
<snapid_t
> > all_purge
;
322 map
<int32_t, vector
<snapid_t
> > all_purged
;
324 // NOTE: this is only needed for support during upgrades from pre-octopus,
325 // since starting with octopus we now get an explicit ack after we remove a
327 mds
->objecter
->with_osdmap(
328 [this, &all_purged
, &all_purge
](const OSDMap
& osdmap
) {
329 for (const auto& p
: need_to_purge
) {
331 const pg_pool_t
*pi
= osdmap
.get_pg_pool(id
);
333 // The pool is gone. So are the snapshots.
334 all_purged
[id
] = std::vector
<snapid_t
>(p
.second
.begin(),
339 for (const auto& q
: p
.second
) {
340 if (pi
->is_removed_snap(q
)) {
341 dout(10) << " osdmap marks " << q
<< " as removed" << dendl
;
342 all_purged
[id
].push_back(q
);
344 all_purge
[id
].push_back(q
);
350 if (!all_purged
.empty()) {
351 // prepare to remove from need_to_purge list
354 encode(all_purged
, bl
);
355 do_server_update(bl
);
358 if (!all_purge
.empty()) {
359 dout(10) << "requesting removal of " << all_purge
<< dendl
;
360 auto m
= make_message
<MRemoveSnaps
>(all_purge
);
361 mon_client
->send_mon_message(m
.detach());
364 last_checked_osdmap
= version
;
367 void SnapServer::handle_remove_snaps(const cref_t
<MRemoveSnaps
> &m
)
369 dout(10) << __func__
<< " " << *m
<< dendl
;
371 map
<int32_t, vector
<snapid_t
> > all_purged
;
374 for (const auto& [id
, snaps
] : need_to_purge
) {
375 auto i
= m
->snaps
.find(id
);
376 if (i
== m
->snaps
.end()) {
379 for (const auto& q
: snaps
) {
380 if (std::find(i
->second
.begin(), i
->second
.end(), q
) != i
->second
.end()) {
381 dout(10) << " mon reports " << q
<< " is removed" << dendl
;
382 all_purged
[id
].push_back(q
);
388 dout(10) << __func__
<< " " << num
<< " now removed" << dendl
;
392 encode(all_purged
, bl
);
393 do_server_update(bl
);
398 void SnapServer::dump(Formatter
*f
) const
400 f
->open_object_section("snapserver");
402 f
->dump_int("last_snap", last_snap
);
403 f
->dump_int("last_created", last_created
);
404 f
->dump_int("last_destroyed", last_destroyed
);
406 f
->open_array_section("pending_noop");
407 for(set
<version_t
>::const_iterator i
= pending_noop
.begin(); i
!= pending_noop
.end(); ++i
) {
408 f
->dump_unsigned("version", *i
);
412 f
->open_array_section("snaps");
413 for (map
<snapid_t
, SnapInfo
>::const_iterator i
= snaps
.begin(); i
!= snaps
.end(); ++i
) {
414 f
->open_object_section("snap");
420 f
->open_object_section("need_to_purge");
421 for (map
<int, set
<snapid_t
> >::const_iterator i
= need_to_purge
.begin(); i
!= need_to_purge
.end(); ++i
) {
422 CachedStackStringStream css
;
424 f
->open_array_section(css
->strv());
425 for (set
<snapid_t
>::const_iterator s
= i
->second
.begin(); s
!= i
->second
.end(); ++s
) {
426 f
->dump_unsigned("snapid", s
->val
);
432 f
->open_array_section("pending_update");
433 for(map
<version_t
, SnapInfo
>::const_iterator i
= pending_update
.begin(); i
!= pending_update
.end(); ++i
) {
434 f
->open_object_section("snap");
435 f
->dump_unsigned("version", i
->first
);
436 f
->open_object_section("snapinfo");
443 f
->open_array_section("pending_destroy");
444 for(map
<version_t
, pair
<snapid_t
, snapid_t
> >::const_iterator i
= pending_destroy
.begin(); i
!= pending_destroy
.end(); ++i
) {
445 f
->open_object_section("snap");
446 f
->dump_unsigned("version", i
->first
);
447 f
->dump_unsigned("removed_snap", i
->second
.first
);
448 f
->dump_unsigned("seq", i
->second
.second
);
456 void SnapServer::generate_test_instances(std::list
<SnapServer
*>& ls
)
458 list
<SnapInfo
*> snapinfo_instances
;
459 SnapInfo::generate_test_instances(snapinfo_instances
);
460 SnapInfo populated_snapinfo
= *(snapinfo_instances
.back());
461 for (auto& info
: snapinfo_instances
) {
466 SnapServer
*blank
= new SnapServer();
468 SnapServer
*populated
= new SnapServer();
469 populated
->last_snap
= 123;
470 populated
->snaps
[456] = populated_snapinfo
;
471 populated
->need_to_purge
[2].insert(012);
472 populated
->pending_update
[234] = populated_snapinfo
;
473 populated
->pending_destroy
[345].first
= 567;
474 populated
->pending_destroy
[345].second
= 768;
475 populated
->pending_noop
.insert(890);
477 ls
.push_back(populated
);
480 bool SnapServer::force_update(snapid_t last
, snapid_t v2_since
,
481 map
<snapid_t
, SnapInfo
>& _snaps
)
483 bool modified
= false;
484 if (last
> last_snap
) {
485 derr
<< " updating last_snap " << last_snap
<< " -> " << last
<< dendl
;
488 last_destroyed
= last
;
491 if (v2_since
> snaprealm_v2_since
) {
492 derr
<< " updating snaprealm_v2_since " << snaprealm_v2_since
493 << " -> " << v2_since
<< dendl
;
494 snaprealm_v2_since
= v2_since
;
497 if (snaps
!= _snaps
) {
498 derr
<< " updating snaps {" << snaps
<< "} -> {" << _snaps
<< "}" << dendl
;
504 need_to_purge
.clear();
505 pending_update
.clear();
506 pending_destroy
.clear();
507 pending_noop
.clear();
508 MDSTableServer::reset_state();