]>
Commit | Line | Data |
---|---|---|
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- | |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "SnapServer.h" | |
16 | #include "MDSRank.h" | |
17 | #include "osd/OSDMap.h" | |
18 | #include "osdc/Objecter.h" | |
19 | #include "mon/MonClient.h" | |
20 | ||
21 | #include "include/types.h" | |
22 | #include "messages/MMDSTableRequest.h" | |
23 | #include "messages/MRemoveSnaps.h" | |
24 | ||
25 | #include "msg/Messenger.h" | |
26 | ||
27 | #include "common/config.h" | |
28 | #include "include/ceph_assert.h" | |
29 | ||
30 | #define dout_context g_ceph_context | |
31 | #define dout_subsys ceph_subsys_mds | |
32 | #undef dout_prefix | |
33 | #define dout_prefix *_dout << "mds." << rank << ".snap " | |
34 | ||
35 | using namespace std; | |
36 | ||
37 | void SnapServer::reset_state() | |
38 | { | |
39 | last_snap = 1; /* snapid 1 reserved for initial root snaprealm */ | |
40 | snaps.clear(); | |
41 | need_to_purge.clear(); | |
42 | pending_update.clear(); | |
43 | pending_destroy.clear(); | |
44 | pending_noop.clear(); | |
45 | ||
46 | // find any removed snapshot in data pools | |
47 | if (mds) { // only if I'm running in a live MDS | |
48 | snapid_t first_free = 0; | |
49 | mds->objecter->with_osdmap([&](const OSDMap& o) { | |
50 | for (const auto p : mds->mdsmap->get_data_pools()) { | |
51 | const pg_pool_t *pi = o.get_pg_pool(p); | |
52 | if (!pi) { | |
53 | // If pool isn't in OSDMap yet then can't have any snaps | |
54 | // needing removal, skip. | |
55 | continue; | |
56 | } | |
57 | if (pi->snap_seq > first_free) { | |
58 | first_free = pi->snap_seq; | |
59 | } | |
60 | } | |
61 | }); | |
62 | if (first_free > last_snap) | |
63 | last_snap = first_free; | |
64 | } | |
65 | last_created = last_snap; | |
66 | last_destroyed = last_snap; | |
67 | snaprealm_v2_since = last_snap + 1; | |
68 | ||
69 | MDSTableServer::reset_state(); | |
70 | } | |
71 | ||
72 | ||
73 | // SERVER | |
74 | ||
75 | void SnapServer::_prepare(const bufferlist& bl, uint64_t reqid, mds_rank_t bymds, bufferlist& out) | |
76 | { | |
77 | using ceph::decode; | |
78 | using ceph::encode; | |
79 | auto p = bl.cbegin(); | |
80 | __u32 op; | |
81 | decode(op, p); | |
82 | ||
83 | switch (op) { | |
84 | case TABLE_OP_CREATE: | |
85 | { | |
86 | SnapInfo info; | |
87 | decode(info.ino, p); | |
88 | if (!p.end()) { | |
89 | decode(info.name, p); | |
90 | decode(info.stamp, p); | |
91 | info.snapid = ++last_snap; | |
92 | pending_update[version] = info; | |
93 | dout(10) << "prepare v" << version << " create " << info << dendl; | |
94 | } else { | |
95 | pending_noop.insert(version); | |
96 | dout(10) << "prepare v" << version << " noop" << dendl; | |
97 | } | |
98 | ||
99 | encode(last_snap, out); | |
100 | } | |
101 | break; | |
102 | ||
103 | case TABLE_OP_DESTROY: | |
104 | { | |
105 | inodeno_t ino; | |
106 | snapid_t snapid; | |
107 | decode(ino, p); // not used, currently. | |
108 | decode(snapid, p); | |
109 | ||
110 | // bump last_snap... we use it as a version value on the snaprealm. | |
111 | ++last_snap; | |
112 | ||
113 | pending_destroy[version] = {snapid, last_snap}; | |
114 | dout(10) << "prepare v" << version << " destroy " << snapid << " seq " << last_snap << dendl; | |
115 | ||
116 | encode(last_snap, out); | |
117 | } | |
118 | break; | |
119 | ||
120 | case TABLE_OP_UPDATE: | |
121 | { | |
122 | SnapInfo info; | |
123 | decode(info.ino, p); | |
124 | decode(info.snapid, p); | |
125 | decode(info.name, p); | |
126 | decode(info.stamp, p); | |
127 | ||
128 | pending_update[version] = info; | |
129 | dout(10) << "prepare v" << version << " update " << info << dendl; | |
130 | } | |
131 | break; | |
132 | ||
133 | default: | |
134 | ceph_abort(); | |
135 | } | |
136 | //dump(); | |
137 | } | |
138 | ||
139 | void SnapServer::_get_reply_buffer(version_t tid, bufferlist *pbl) const | |
140 | { | |
141 | using ceph::encode; | |
142 | auto p = pending_update.find(tid); | |
143 | if (p != pending_update.end()) { | |
144 | if (pbl && !snaps.count(p->second.snapid)) // create | |
145 | encode(p->second.snapid, *pbl); | |
146 | return; | |
147 | } | |
148 | auto q = pending_destroy.find(tid); | |
149 | if (q != pending_destroy.end()) { | |
150 | if (pbl) | |
151 | encode(q->second.second, *pbl); | |
152 | return; | |
153 | } | |
154 | auto r = pending_noop.find(tid); | |
155 | if (r != pending_noop.end()) { | |
156 | if (pbl) | |
157 | encode(last_snap, *pbl); | |
158 | return; | |
159 | } | |
160 | assert (0 == "tid not found"); | |
161 | } | |
162 | ||
163 | void SnapServer::_commit(version_t tid, cref_t<MMDSTableRequest> req) | |
164 | { | |
165 | if (pending_update.count(tid)) { | |
166 | SnapInfo &info = pending_update[tid]; | |
167 | string opname; | |
168 | if (snaps.count(info.snapid)) { | |
169 | opname = "update"; | |
170 | if (info.stamp == utime_t()) | |
171 | info.stamp = snaps[info.snapid].stamp; | |
172 | } else { | |
173 | opname = "create"; | |
174 | if (info.snapid > last_created) | |
175 | last_created = info.snapid; | |
176 | } | |
177 | dout(7) << "commit " << tid << " " << opname << " " << info << dendl; | |
178 | snaps[info.snapid] = info; | |
179 | pending_update.erase(tid); | |
180 | } | |
181 | ||
182 | else if (pending_destroy.count(tid)) { | |
183 | snapid_t sn = pending_destroy[tid].first; | |
184 | snapid_t seq = pending_destroy[tid].second; | |
185 | dout(7) << "commit " << tid << " destroy " << sn << " seq " << seq << dendl; | |
186 | snaps.erase(sn); | |
187 | if (seq > last_destroyed) | |
188 | last_destroyed = seq; | |
189 | ||
190 | for (const auto p : mds->mdsmap->get_data_pools()) { | |
191 | need_to_purge[p].insert(sn); | |
192 | need_to_purge[p].insert(seq); | |
193 | } | |
194 | ||
195 | pending_destroy.erase(tid); | |
196 | } | |
197 | else if (pending_noop.count(tid)) { | |
198 | dout(7) << "commit " << tid << " noop" << dendl; | |
199 | pending_noop.erase(tid); | |
200 | } | |
201 | else | |
202 | ceph_abort(); | |
203 | ||
204 | //dump(); | |
205 | } | |
206 | ||
207 | void SnapServer::_rollback(version_t tid) | |
208 | { | |
209 | if (pending_update.count(tid)) { | |
210 | SnapInfo &info = pending_update[tid]; | |
211 | string opname; | |
212 | if (snaps.count(info.snapid)) | |
213 | opname = "update"; | |
214 | else | |
215 | opname = "create"; | |
216 | dout(7) << "rollback " << tid << " " << opname << " " << info << dendl; | |
217 | pending_update.erase(tid); | |
218 | } | |
219 | ||
220 | else if (pending_destroy.count(tid)) { | |
221 | dout(7) << "rollback " << tid << " destroy " << pending_destroy[tid] << dendl; | |
222 | pending_destroy.erase(tid); | |
223 | } | |
224 | ||
225 | else if (pending_noop.count(tid)) { | |
226 | dout(7) << "rollback " << tid << " noop" << dendl; | |
227 | pending_noop.erase(tid); | |
228 | } | |
229 | ||
230 | else | |
231 | ceph_abort(); | |
232 | ||
233 | //dump(); | |
234 | } | |
235 | ||
236 | void SnapServer::_server_update(bufferlist& bl) | |
237 | { | |
238 | using ceph::decode; | |
239 | auto p = bl.cbegin(); | |
240 | map<int, vector<snapid_t> > purge; | |
241 | decode(purge, p); | |
242 | ||
243 | dout(7) << "_server_update purged " << purge << dendl; | |
244 | for (map<int, vector<snapid_t> >::iterator p = purge.begin(); | |
245 | p != purge.end(); | |
246 | ++p) { | |
247 | for (vector<snapid_t>::iterator q = p->second.begin(); | |
248 | q != p->second.end(); | |
249 | ++q) | |
250 | need_to_purge[p->first].erase(*q); | |
251 | if (need_to_purge[p->first].empty()) | |
252 | need_to_purge.erase(p->first); | |
253 | } | |
254 | } | |
255 | ||
256 | bool SnapServer::_notify_prep(version_t tid) | |
257 | { | |
258 | using ceph::encode; | |
259 | bufferlist bl; | |
260 | char type = 'F'; | |
261 | encode(type, bl); | |
262 | encode(snaps, bl); | |
263 | encode(pending_update, bl); | |
264 | encode(pending_destroy, bl); | |
265 | encode(last_created, bl); | |
266 | encode(last_destroyed, bl); | |
267 | ceph_assert(version == tid); | |
268 | ||
269 | for (auto &p : active_clients) { | |
270 | auto m = make_message<MMDSTableRequest>(table, TABLESERVER_OP_NOTIFY_PREP, 0, version); | |
271 | m->bl = bl; | |
272 | mds->send_message_mds(m, p); | |
273 | } | |
274 | return true; | |
275 | } | |
276 | ||
277 | void SnapServer::handle_query(const cref_t<MMDSTableRequest> &req) | |
278 | { | |
279 | using ceph::encode; | |
280 | using ceph::decode; | |
281 | char op; | |
282 | auto p = req->bl.cbegin(); | |
283 | decode(op, p); | |
284 | ||
285 | auto reply = make_message<MMDSTableRequest>(table, TABLESERVER_OP_QUERY_REPLY, req->reqid, version); | |
286 | ||
287 | switch (op) { | |
288 | case 'F': // full | |
289 | version_t have_version; | |
290 | decode(have_version, p); | |
291 | ceph_assert(have_version <= version); | |
292 | if (have_version == version) { | |
293 | char type = 'U'; | |
294 | encode(type, reply->bl); | |
295 | } else { | |
296 | char type = 'F'; | |
297 | encode(type, reply->bl); | |
298 | encode(snaps, reply->bl); | |
299 | encode(pending_update, reply->bl); | |
300 | encode(pending_destroy, reply->bl); | |
301 | encode(last_created, reply->bl); | |
302 | encode(last_destroyed, reply->bl); | |
303 | } | |
304 | // FIXME: implement incremental change | |
305 | break; | |
306 | default: | |
307 | ceph_abort(); | |
308 | }; | |
309 | ||
310 | mds->send_message(reply, req->get_connection()); | |
311 | } | |
312 | ||
313 | void SnapServer::check_osd_map(bool force) | |
314 | { | |
315 | if (!force && version == last_checked_osdmap) { | |
316 | dout(10) << "check_osd_map - version unchanged" << dendl; | |
317 | return; | |
318 | } | |
319 | dout(10) << "check_osd_map need_to_purge=" << need_to_purge << dendl; | |
320 | ||
321 | map<int32_t, vector<snapid_t> > all_purge; | |
322 | map<int32_t, vector<snapid_t> > all_purged; | |
323 | ||
324 | // NOTE: this is only needed for support during upgrades from pre-octopus, | |
325 | // since starting with octopus we now get an explicit ack after we remove a | |
326 | // snap. | |
327 | mds->objecter->with_osdmap( | |
328 | [this, &all_purged, &all_purge](const OSDMap& osdmap) { | |
329 | for (const auto& p : need_to_purge) { | |
330 | int id = p.first; | |
331 | const pg_pool_t *pi = osdmap.get_pg_pool(id); | |
332 | if (pi == NULL) { | |
333 | // The pool is gone. So are the snapshots. | |
334 | all_purged[id] = std::vector<snapid_t>(p.second.begin(), | |
335 | p.second.end()); | |
336 | continue; | |
337 | } | |
338 | ||
339 | for (const auto& q : p.second) { | |
340 | if (pi->is_removed_snap(q)) { | |
341 | dout(10) << " osdmap marks " << q << " as removed" << dendl; | |
342 | all_purged[id].push_back(q); | |
343 | } else { | |
344 | all_purge[id].push_back(q); | |
345 | } | |
346 | } | |
347 | } | |
348 | }); | |
349 | ||
350 | if (!all_purged.empty()) { | |
351 | // prepare to remove from need_to_purge list | |
352 | bufferlist bl; | |
353 | using ceph::encode; | |
354 | encode(all_purged, bl); | |
355 | do_server_update(bl); | |
356 | } | |
357 | ||
358 | if (!all_purge.empty()) { | |
359 | dout(10) << "requesting removal of " << all_purge << dendl; | |
360 | auto m = make_message<MRemoveSnaps>(all_purge); | |
361 | mon_client->send_mon_message(m.detach()); | |
362 | } | |
363 | ||
364 | last_checked_osdmap = version; | |
365 | } | |
366 | ||
367 | void SnapServer::handle_remove_snaps(const cref_t<MRemoveSnaps> &m) | |
368 | { | |
369 | dout(10) << __func__ << " " << *m << dendl; | |
370 | ||
371 | map<int32_t, vector<snapid_t> > all_purged; | |
372 | int num = 0; | |
373 | ||
374 | for (const auto& [id, snaps] : need_to_purge) { | |
375 | auto i = m->snaps.find(id); | |
376 | if (i == m->snaps.end()) { | |
377 | continue; | |
378 | } | |
379 | for (const auto& q : snaps) { | |
380 | if (std::find(i->second.begin(), i->second.end(), q) != i->second.end()) { | |
381 | dout(10) << " mon reports " << q << " is removed" << dendl; | |
382 | all_purged[id].push_back(q); | |
383 | ++num; | |
384 | } | |
385 | } | |
386 | } | |
387 | ||
388 | dout(10) << __func__ << " " << num << " now removed" << dendl; | |
389 | if (num) { | |
390 | bufferlist bl; | |
391 | using ceph::encode; | |
392 | encode(all_purged, bl); | |
393 | do_server_update(bl); | |
394 | } | |
395 | } | |
396 | ||
397 | ||
398 | void SnapServer::dump(Formatter *f) const | |
399 | { | |
400 | f->open_object_section("snapserver"); | |
401 | ||
402 | f->dump_int("last_snap", last_snap); | |
403 | f->dump_int("last_created", last_created); | |
404 | f->dump_int("last_destroyed", last_destroyed); | |
405 | ||
406 | f->open_array_section("pending_noop"); | |
407 | for(set<version_t>::const_iterator i = pending_noop.begin(); i != pending_noop.end(); ++i) { | |
408 | f->dump_unsigned("version", *i); | |
409 | } | |
410 | f->close_section(); | |
411 | ||
412 | f->open_array_section("snaps"); | |
413 | for (map<snapid_t, SnapInfo>::const_iterator i = snaps.begin(); i != snaps.end(); ++i) { | |
414 | f->open_object_section("snap"); | |
415 | i->second.dump(f); | |
416 | f->close_section(); | |
417 | } | |
418 | f->close_section(); | |
419 | ||
420 | f->open_object_section("need_to_purge"); | |
421 | for (map<int, set<snapid_t> >::const_iterator i = need_to_purge.begin(); i != need_to_purge.end(); ++i) { | |
422 | CachedStackStringStream css; | |
423 | *css << i->first; | |
424 | f->open_array_section(css->strv()); | |
425 | for (set<snapid_t>::const_iterator s = i->second.begin(); s != i->second.end(); ++s) { | |
426 | f->dump_unsigned("snapid", s->val); | |
427 | } | |
428 | f->close_section(); | |
429 | } | |
430 | f->close_section(); | |
431 | ||
432 | f->open_array_section("pending_update"); | |
433 | for(map<version_t, SnapInfo>::const_iterator i = pending_update.begin(); i != pending_update.end(); ++i) { | |
434 | f->open_object_section("snap"); | |
435 | f->dump_unsigned("version", i->first); | |
436 | f->open_object_section("snapinfo"); | |
437 | i->second.dump(f); | |
438 | f->close_section(); | |
439 | f->close_section(); | |
440 | } | |
441 | f->close_section(); | |
442 | ||
443 | f->open_array_section("pending_destroy"); | |
444 | for(map<version_t, pair<snapid_t, snapid_t> >::const_iterator i = pending_destroy.begin(); i != pending_destroy.end(); ++i) { | |
445 | f->open_object_section("snap"); | |
446 | f->dump_unsigned("version", i->first); | |
447 | f->dump_unsigned("removed_snap", i->second.first); | |
448 | f->dump_unsigned("seq", i->second.second); | |
449 | f->close_section(); | |
450 | } | |
451 | f->close_section(); | |
452 | ||
453 | f->close_section(); | |
454 | } | |
455 | ||
456 | void SnapServer::generate_test_instances(std::list<SnapServer*>& ls) | |
457 | { | |
458 | list<SnapInfo*> snapinfo_instances; | |
459 | SnapInfo::generate_test_instances(snapinfo_instances); | |
460 | SnapInfo populated_snapinfo = *(snapinfo_instances.back()); | |
461 | for (auto& info : snapinfo_instances) { | |
462 | delete info; | |
463 | info = nullptr; | |
464 | } | |
465 | ||
466 | SnapServer *blank = new SnapServer(); | |
467 | ls.push_back(blank); | |
468 | SnapServer *populated = new SnapServer(); | |
469 | populated->last_snap = 123; | |
470 | populated->snaps[456] = populated_snapinfo; | |
471 | populated->need_to_purge[2].insert(012); | |
472 | populated->pending_update[234] = populated_snapinfo; | |
473 | populated->pending_destroy[345].first = 567; | |
474 | populated->pending_destroy[345].second = 768; | |
475 | populated->pending_noop.insert(890); | |
476 | ||
477 | ls.push_back(populated); | |
478 | } | |
479 | ||
480 | bool SnapServer::force_update(snapid_t last, snapid_t v2_since, | |
481 | map<snapid_t, SnapInfo>& _snaps) | |
482 | { | |
483 | bool modified = false; | |
484 | if (last > last_snap) { | |
485 | derr << " updating last_snap " << last_snap << " -> " << last << dendl; | |
486 | last_snap = last; | |
487 | last_created = last; | |
488 | last_destroyed = last; | |
489 | modified = true; | |
490 | } | |
491 | if (v2_since > snaprealm_v2_since) { | |
492 | derr << " updating snaprealm_v2_since " << snaprealm_v2_since | |
493 | << " -> " << v2_since << dendl; | |
494 | snaprealm_v2_since = v2_since; | |
495 | modified = true; | |
496 | } | |
497 | if (snaps != _snaps) { | |
498 | derr << " updating snaps {" << snaps << "} -> {" << _snaps << "}" << dendl; | |
499 | snaps = _snaps; | |
500 | modified = true; | |
501 | } | |
502 | ||
503 | if (modified) { | |
504 | need_to_purge.clear(); | |
505 | pending_update.clear(); | |
506 | pending_destroy.clear(); | |
507 | pending_noop.clear(); | |
508 | MDSTableServer::reset_state(); | |
509 | } | |
510 | return modified; | |
511 | } |