1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include "SnapServer.h"
17 #include "osd/OSDMap.h"
18 #include "osdc/Objecter.h"
19 #include "mon/MonClient.h"
21 #include "include/types.h"
22 #include "messages/MMDSTableRequest.h"
23 #include "messages/MRemoveSnaps.h"
25 #include "msg/Messenger.h"
27 #include "common/config.h"
28 #include "include/ceph_assert.h"
30 #define dout_context g_ceph_context
31 #define dout_subsys ceph_subsys_mds
33 #define dout_prefix *_dout << "mds." << rank << ".snap "
36 void SnapServer::reset_state()
38 last_snap
= 1; /* snapid 1 reserved for initial root snaprealm */
40 need_to_purge
.clear();
41 pending_update
.clear();
42 pending_destroy
.clear();
45 // find any removed snapshot in data pools
46 if (mds
) { // only if I'm running in a live MDS
47 snapid_t first_free
= 0;
48 mds
->objecter
->with_osdmap([&](const OSDMap
& o
) {
49 for (const auto p
: mds
->mdsmap
->get_data_pools()) {
50 const pg_pool_t
*pi
= o
.get_pg_pool(p
);
52 // If pool isn't in OSDMap yet then can't have any snaps
53 // needing removal, skip.
56 if (pi
->snap_seq
> first_free
) {
57 first_free
= pi
->snap_seq
;
61 if (first_free
> last_snap
)
62 last_snap
= first_free
;
64 last_created
= last_snap
;
65 last_destroyed
= last_snap
;
66 snaprealm_v2_since
= last_snap
+ 1;
68 MDSTableServer::reset_state();
74 void SnapServer::_prepare(const bufferlist
& bl
, uint64_t reqid
, mds_rank_t bymds
, bufferlist
& out
)
89 decode(info
.stamp
, p
);
90 info
.snapid
= ++last_snap
;
91 pending_update
[version
] = info
;
92 dout(10) << "prepare v" << version
<< " create " << info
<< dendl
;
94 pending_noop
.insert(version
);
95 dout(10) << "prepare v" << version
<< " noop" << dendl
;
98 encode(last_snap
, out
);
102 case TABLE_OP_DESTROY
:
106 decode(ino
, p
); // not used, currently.
109 // bump last_snap... we use it as a version value on the snaprealm.
112 pending_destroy
[version
] = pair
<snapid_t
,snapid_t
>(snapid
, last_snap
);
113 dout(10) << "prepare v" << version
<< " destroy " << snapid
<< " seq " << last_snap
<< dendl
;
115 encode(last_snap
, out
);
119 case TABLE_OP_UPDATE
:
123 decode(info
.snapid
, p
);
124 decode(info
.name
, p
);
125 decode(info
.stamp
, p
);
127 pending_update
[version
] = info
;
128 dout(10) << "prepare v" << version
<< " update " << info
<< dendl
;
138 void SnapServer::_get_reply_buffer(version_t tid
, bufferlist
*pbl
) const
141 auto p
= pending_update
.find(tid
);
142 if (p
!= pending_update
.end()) {
143 if (pbl
&& !snaps
.count(p
->second
.snapid
)) // create
144 encode(p
->second
.snapid
, *pbl
);
147 auto q
= pending_destroy
.find(tid
);
148 if (q
!= pending_destroy
.end()) {
150 encode(q
->second
.second
, *pbl
);
153 auto r
= pending_noop
.find(tid
);
154 if (r
!= pending_noop
.end()) {
156 encode(last_snap
, *pbl
);
159 assert (0 == "tid not found");
162 void SnapServer::_commit(version_t tid
, cref_t
<MMDSTableRequest
> req
)
164 if (pending_update
.count(tid
)) {
165 SnapInfo
&info
= pending_update
[tid
];
167 if (snaps
.count(info
.snapid
)) {
169 if (info
.stamp
== utime_t())
170 info
.stamp
= snaps
[info
.snapid
].stamp
;
173 if (info
.snapid
> last_created
)
174 last_created
= info
.snapid
;
176 dout(7) << "commit " << tid
<< " " << opname
<< " " << info
<< dendl
;
177 snaps
[info
.snapid
] = info
;
178 pending_update
.erase(tid
);
181 else if (pending_destroy
.count(tid
)) {
182 snapid_t sn
= pending_destroy
[tid
].first
;
183 snapid_t seq
= pending_destroy
[tid
].second
;
184 dout(7) << "commit " << tid
<< " destroy " << sn
<< " seq " << seq
<< dendl
;
186 if (seq
> last_destroyed
)
187 last_destroyed
= seq
;
189 for (const auto p
: mds
->mdsmap
->get_data_pools()) {
190 need_to_purge
[p
].insert(sn
);
191 need_to_purge
[p
].insert(seq
);
194 pending_destroy
.erase(tid
);
196 else if (pending_noop
.count(tid
)) {
197 dout(7) << "commit " << tid
<< " noop" << dendl
;
198 pending_noop
.erase(tid
);
206 void SnapServer::_rollback(version_t tid
)
208 if (pending_update
.count(tid
)) {
209 SnapInfo
&info
= pending_update
[tid
];
211 if (snaps
.count(info
.snapid
))
215 dout(7) << "rollback " << tid
<< " " << opname
<< " " << info
<< dendl
;
216 pending_update
.erase(tid
);
219 else if (pending_destroy
.count(tid
)) {
220 dout(7) << "rollback " << tid
<< " destroy " << pending_destroy
[tid
] << dendl
;
221 pending_destroy
.erase(tid
);
224 else if (pending_noop
.count(tid
)) {
225 dout(7) << "rollback " << tid
<< " noop" << dendl
;
226 pending_noop
.erase(tid
);
235 void SnapServer::_server_update(bufferlist
& bl
)
238 auto p
= bl
.cbegin();
239 map
<int, vector
<snapid_t
> > purge
;
242 dout(7) << "_server_update purged " << purge
<< dendl
;
243 for (map
<int, vector
<snapid_t
> >::iterator p
= purge
.begin();
246 for (vector
<snapid_t
>::iterator q
= p
->second
.begin();
247 q
!= p
->second
.end();
249 need_to_purge
[p
->first
].erase(*q
);
250 if (need_to_purge
[p
->first
].empty())
251 need_to_purge
.erase(p
->first
);
255 bool SnapServer::_notify_prep(version_t tid
)
262 encode(pending_update
, bl
);
263 encode(pending_destroy
, bl
);
264 encode(last_created
, bl
);
265 encode(last_destroyed
, bl
);
266 ceph_assert(version
== tid
);
268 for (auto &p
: active_clients
) {
269 auto m
= make_message
<MMDSTableRequest
>(table
, TABLESERVER_OP_NOTIFY_PREP
, 0, version
);
271 mds
->send_message_mds(m
, p
);
276 void SnapServer::handle_query(const cref_t
<MMDSTableRequest
> &req
)
281 auto p
= req
->bl
.cbegin();
284 auto reply
= make_message
<MMDSTableRequest
>(table
, TABLESERVER_OP_QUERY_REPLY
, req
->reqid
, version
);
288 version_t have_version
;
289 decode(have_version
, p
);
290 ceph_assert(have_version
<= version
);
291 if (have_version
== version
) {
293 encode(type
, reply
->bl
);
296 encode(type
, reply
->bl
);
297 encode(snaps
, reply
->bl
);
298 encode(pending_update
, reply
->bl
);
299 encode(pending_destroy
, reply
->bl
);
300 encode(last_created
, reply
->bl
);
301 encode(last_destroyed
, reply
->bl
);
303 // FIXME: implement incremental change
309 mds
->send_message(reply
, req
->get_connection());
312 void SnapServer::check_osd_map(bool force
)
314 if (!force
&& version
== last_checked_osdmap
) {
315 dout(10) << "check_osd_map - version unchanged" << dendl
;
318 dout(10) << "check_osd_map need_to_purge=" << need_to_purge
<< dendl
;
320 map
<int32_t, vector
<snapid_t
> > all_purge
;
321 map
<int32_t, vector
<snapid_t
> > all_purged
;
323 // NOTE: this is only needed for support during upgrades from pre-octopus,
324 // since starting with octopus we now get an explicit ack after we remove a
326 mds
->objecter
->with_osdmap(
327 [this, &all_purged
, &all_purge
](const OSDMap
& osdmap
) {
328 for (const auto& p
: need_to_purge
) {
330 const pg_pool_t
*pi
= osdmap
.get_pg_pool(id
);
332 // The pool is gone. So are the snapshots.
333 all_purged
[id
] = std::vector
<snapid_t
>(p
.second
.begin(),
338 for (const auto& q
: p
.second
) {
339 if (pi
->is_removed_snap(q
)) {
340 dout(10) << " osdmap marks " << q
<< " as removed" << dendl
;
341 all_purged
[id
].push_back(q
);
343 all_purge
[id
].push_back(q
);
349 if (!all_purged
.empty()) {
350 // prepare to remove from need_to_purge list
353 encode(all_purged
, bl
);
354 do_server_update(bl
);
357 if (!all_purge
.empty()) {
358 dout(10) << "requesting removal of " << all_purge
<< dendl
;
359 auto m
= make_message
<MRemoveSnaps
>(all_purge
);
360 mon_client
->send_mon_message(m
.detach());
363 last_checked_osdmap
= version
;
366 void SnapServer::handle_remove_snaps(const cref_t
<MRemoveSnaps
> &m
)
368 dout(10) << __func__
<< " " << *m
<< dendl
;
370 map
<int32_t, vector
<snapid_t
> > all_purged
;
373 for (const auto& [id
, snaps
] : need_to_purge
) {
374 auto i
= m
->snaps
.find(id
);
375 if (i
== m
->snaps
.end()) {
378 for (const auto& q
: snaps
) {
379 if (std::find(i
->second
.begin(), i
->second
.end(), q
) != i
->second
.end()) {
380 dout(10) << " mon reports " << q
<< " is removed" << dendl
;
381 all_purged
[id
].push_back(q
);
387 dout(10) << __func__
<< " " << num
<< " now removed" << dendl
;
391 encode(all_purged
, bl
);
392 do_server_update(bl
);
397 void SnapServer::dump(Formatter
*f
) const
399 f
->open_object_section("snapserver");
401 f
->dump_int("last_snap", last_snap
);
402 f
->dump_int("last_created", last_created
);
403 f
->dump_int("last_destroyed", last_destroyed
);
405 f
->open_array_section("pending_noop");
406 for(set
<version_t
>::const_iterator i
= pending_noop
.begin(); i
!= pending_noop
.end(); ++i
) {
407 f
->dump_unsigned("version", *i
);
411 f
->open_array_section("snaps");
412 for (map
<snapid_t
, SnapInfo
>::const_iterator i
= snaps
.begin(); i
!= snaps
.end(); ++i
) {
413 f
->open_object_section("snap");
419 f
->open_object_section("need_to_purge");
420 for (map
<int, set
<snapid_t
> >::const_iterator i
= need_to_purge
.begin(); i
!= need_to_purge
.end(); ++i
) {
421 CachedStackStringStream css
;
423 f
->open_array_section(css
->strv());
424 for (set
<snapid_t
>::const_iterator s
= i
->second
.begin(); s
!= i
->second
.end(); ++s
) {
425 f
->dump_unsigned("snapid", s
->val
);
431 f
->open_array_section("pending_update");
432 for(map
<version_t
, SnapInfo
>::const_iterator i
= pending_update
.begin(); i
!= pending_update
.end(); ++i
) {
433 f
->open_object_section("snap");
434 f
->dump_unsigned("version", i
->first
);
435 f
->open_object_section("snapinfo");
442 f
->open_array_section("pending_destroy");
443 for(map
<version_t
, pair
<snapid_t
, snapid_t
> >::const_iterator i
= pending_destroy
.begin(); i
!= pending_destroy
.end(); ++i
) {
444 f
->open_object_section("snap");
445 f
->dump_unsigned("version", i
->first
);
446 f
->dump_unsigned("removed_snap", i
->second
.first
);
447 f
->dump_unsigned("seq", i
->second
.second
);
455 void SnapServer::generate_test_instances(std::list
<SnapServer
*>& ls
)
457 list
<SnapInfo
*> snapinfo_instances
;
458 SnapInfo::generate_test_instances(snapinfo_instances
);
459 SnapInfo populated_snapinfo
= *(snapinfo_instances
.back());
460 for (auto& info
: snapinfo_instances
) {
465 SnapServer
*blank
= new SnapServer();
467 SnapServer
*populated
= new SnapServer();
468 populated
->last_snap
= 123;
469 populated
->snaps
[456] = populated_snapinfo
;
470 populated
->need_to_purge
[2].insert(012);
471 populated
->pending_update
[234] = populated_snapinfo
;
472 populated
->pending_destroy
[345].first
= 567;
473 populated
->pending_destroy
[345].second
= 768;
474 populated
->pending_noop
.insert(890);
476 ls
.push_back(populated
);
479 bool SnapServer::force_update(snapid_t last
, snapid_t v2_since
,
480 map
<snapid_t
, SnapInfo
>& _snaps
)
482 bool modified
= false;
483 if (last
> last_snap
) {
484 derr
<< " updating last_snap " << last_snap
<< " -> " << last
<< dendl
;
487 last_destroyed
= last
;
490 if (v2_since
> snaprealm_v2_since
) {
491 derr
<< " updating snaprealm_v2_since " << snaprealm_v2_since
492 << " -> " << v2_since
<< dendl
;
493 snaprealm_v2_since
= v2_since
;
496 if (snaps
!= _snaps
) {
497 derr
<< " updating snaps {" << snaps
<< "} -> {" << _snaps
<< "}" << dendl
;
503 need_to_purge
.clear();
504 pending_update
.clear();
505 pending_destroy
.clear();
506 pending_noop
.clear();
507 MDSTableServer::reset_state();