]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/SnapRealm.cc
update sources to v12.2.5
[ceph.git] / ceph / src / mds / SnapRealm.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include "SnapRealm.h"
16#include "MDCache.h"
17#include "MDSRank.h"
18
94b18763
FG
19#include <boost/utility/string_view.hpp>
20
7c673cae
FG
21#include "messages/MClientSnap.h"
22
23
24/*
25 * SnapRealm
26 */
27
28#define dout_context g_ceph_context
29#define dout_subsys ceph_subsys_mds
30#undef dout_prefix
31#define dout_prefix _prefix(_dout, mdcache->mds->get_nodeid(), inode, srnode.seq, this)
32static ostream& _prefix(std::ostream *_dout, int whoami, const CInode *inode,
33 uint64_t seq, const SnapRealm *realm) {
34 return *_dout << " mds." << whoami
35 << ".cache.snaprealm(" << inode->ino()
36 << " seq " << seq << " " << realm << ") ";
37}
38
39ostream& operator<<(ostream& out, const SnapRealm& realm)
40{
41 out << "snaprealm(" << realm.inode->ino()
42 << " seq " << realm.srnode.seq
43 << " lc " << realm.srnode.last_created
44 << " cr " << realm.srnode.created;
45 if (realm.srnode.created != realm.srnode.current_parent_since)
46 out << " cps " << realm.srnode.current_parent_since;
47 out << " snaps=" << realm.srnode.snaps;
48 if (realm.srnode.past_parents.size()) {
49 out << " past_parents=(";
50 for (map<snapid_t, snaplink_t>::const_iterator p = realm.srnode.past_parents.begin();
51 p != realm.srnode.past_parents.end();
52 ++p) {
53 if (p != realm.srnode.past_parents.begin()) out << ",";
54 out << p->second.first << "-" << p->first
55 << "=" << p->second.ino;
56 }
57 out << ")";
58 }
59 out << " " << &realm << ")";
60 return out;
61}
62
63
64void SnapRealm::add_open_past_parent(SnapRealm *parent, snapid_t last)
65{
66 auto p = open_past_parents.find(parent->inode->ino());
67 if (p != open_past_parents.end()) {
68 assert(p->second.second.count(last) == 0);
69 p->second.second.insert(last);
70 } else {
71 open_past_parents[parent->inode->ino()].first = parent;
72 open_past_parents[parent->inode->ino()].second.insert(last);
73 parent->open_past_children.insert(this);
74 parent->inode->get(CInode::PIN_PASTSNAPPARENT);
75 }
76 ++num_open_past_parents;
77}
78
79void SnapRealm::remove_open_past_parent(inodeno_t ino, snapid_t last)
80{
81 auto p = open_past_parents.find(ino);
82 assert(p != open_past_parents.end());
83 auto q = p->second.second.find(last);
84 assert(q != p->second.second.end());
85 p->second.second.erase(q);
86 --num_open_past_parents;
87 if (p->second.second.empty()) {
88 SnapRealm *parent = p->second.first;
89 open_past_parents.erase(p);
90 parent->open_past_children.erase(this);
91 parent->inode->put(CInode::PIN_PASTSNAPPARENT);
92 }
93}
94
95struct C_SR_RetryOpenParents : public MDSInternalContextBase {
96 SnapRealm *sr;
97 snapid_t first, last, parent_last;
98 inodeno_t parent;
99 MDSInternalContextBase* fin;
100 C_SR_RetryOpenParents(SnapRealm *s, snapid_t f, snapid_t l, snapid_t pl,
101 inodeno_t p, MDSInternalContextBase *c) :
102 sr(s), first(f), last(l), parent_last(pl), parent(p), fin(c) {
103 sr->inode->get(CInode::PIN_OPENINGSNAPPARENTS);
104 }
105 MDSRank *get_mds() override { return sr->mdcache->mds; }
106 void finish(int r) override {
107 if (r < 0)
108 sr->_remove_missing_parent(parent_last, parent, r);
109 if (sr->_open_parents(fin, first, last))
110 fin->complete(0);
111 sr->inode->put(CInode::PIN_OPENINGSNAPPARENTS);
112 }
113};
114
115void SnapRealm::_remove_missing_parent(snapid_t snapid, inodeno_t parent, int err)
116{
117 map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.find(snapid);
118 if (p != srnode.past_parents.end()) {
119 dout(10) << __func__ << " " << parent << " [" << p->second.first << ","
120 << p->first << "] errno " << err << dendl;
121 srnode.past_parents.erase(p);
122 } else {
123 dout(10) << __func__ << " " << parent << " not found" << dendl;
124 }
125}
126
127bool SnapRealm::_open_parents(MDSInternalContextBase *finish, snapid_t first, snapid_t last)
128{
129 dout(10) << "open_parents [" << first << "," << last << "]" << dendl;
130 if (open)
131 return true;
132
133 // make sure my current parents' parents are open...
134 if (parent) {
135 dout(10) << " current parent [" << srnode.current_parent_since << ",head] is " << *parent
136 << " on " << *parent->inode << dendl;
137 if (last >= srnode.current_parent_since &&
138 !parent->_open_parents(finish, MAX(first, srnode.current_parent_since), last))
139 return false;
140 }
141
142 // and my past parents too!
143 assert(srnode.past_parents.size() >= num_open_past_parents);
144 if (srnode.past_parents.size() > num_open_past_parents) {
145 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin();
146 p != srnode.past_parents.end(); ) {
147 dout(10) << " past_parent [" << p->second.first << "," << p->first << "] is "
148 << p->second.ino << dendl;
149 CInode *parent = mdcache->get_inode(p->second.ino);
150 if (!parent) {
151 C_SR_RetryOpenParents *fin = new C_SR_RetryOpenParents(this, first, last, p->first,
152 p->second.ino, finish);
153 mdcache->open_ino(p->second.ino, mdcache->mds->mdsmap->get_metadata_pool(), fin);
154 return false;
155 }
156 if (parent->state_test(CInode::STATE_PURGING)) {
157 dout(10) << " skip purging past_parent " << *parent << dendl;
158 srnode.past_parents.erase(p++);
159 continue;
160 }
161 assert(parent->snaprealm); // hmm!
162 if (!parent->snaprealm->_open_parents(finish, p->second.first, p->first))
163 return false;
164 auto q = open_past_parents.find(p->second.ino);
165 if (q == open_past_parents.end() ||
166 q->second.second.count(p->first) == 0) {
167 add_open_past_parent(parent->snaprealm, p->first);
168 }
169 ++p;
170 }
171 }
172
173 open = true;
174 return true;
175}
176
177bool SnapRealm::open_parents(MDSInternalContextBase *retryorfinish) {
178 if (!_open_parents(retryorfinish))
179 return false;
180 delete retryorfinish;
181 return true;
182}
183
184bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last)
185{
186 dout(10) << "have_past_parents_open [" << first << "," << last << "]" << dendl;
187 if (open)
188 return true;
189
190 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
191 p != srnode.past_parents.end();
192 ++p) {
193 if (p->second.first > last)
194 break;
195 dout(10) << " past parent [" << p->second.first << "," << p->first << "] was "
196 << p->second.ino << dendl;
197 if (open_past_parents.count(p->second.ino) == 0) {
198 dout(10) << " past parent " << p->second.ino << " is not open" << dendl;
199 return false;
200 }
201 SnapRealm *parent_realm = open_past_parents[p->second.ino].first;
202 if (!parent_realm->have_past_parents_open(MAX(first, p->second.first),
203 MIN(last, p->first)))
204 return false;
205 }
206
207 open = true;
208 return true;
209}
210
211void SnapRealm::close_parents()
212{
213 for (auto p = open_past_parents.begin(); p != open_past_parents.end(); ++p) {
214 num_open_past_parents -= p->second.second.size();
215 p->second.first->inode->put(CInode::PIN_PASTSNAPPARENT);
216 p->second.first->open_past_children.erase(this);
217 }
218 open_past_parents.clear();
219}
220
221
222/*
223 * get list of snaps for this realm. we must include parents' snaps
224 * for the intervals during which they were our parent.
225 */
226void SnapRealm::build_snap_set(set<snapid_t> &s,
227 snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed,
228 snapid_t first, snapid_t last) const
229{
230 dout(10) << "build_snap_set [" << first << "," << last << "] on " << *this << dendl;
231
232 if (srnode.seq > max_seq)
233 max_seq = srnode.seq;
234 if (srnode.last_created > max_last_created)
235 max_last_created = srnode.last_created;
236 if (srnode.last_destroyed > max_last_destroyed)
237 max_last_destroyed = srnode.last_destroyed;
238
239 // include my snaps within interval [first,last]
240 for (map<snapid_t, SnapInfo>::const_iterator p = srnode.snaps.lower_bound(first); // first element >= first
241 p != srnode.snaps.end() && p->first <= last;
242 ++p)
243 s.insert(p->first);
244
245 // include snaps for parents during intervals that intersect [first,last]
246 for (map<snapid_t, snaplink_t>::const_iterator p = srnode.past_parents.lower_bound(first);
247 p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
248 ++p) {
249 const CInode *oldparent = mdcache->get_inode(p->second.ino);
250 assert(oldparent); // call open_parents first!
251 assert(oldparent->snaprealm);
252 oldparent->snaprealm->build_snap_set(s, max_seq, max_last_created, max_last_destroyed,
253 MAX(first, p->second.first),
254 MIN(last, p->first));
255 }
256 if (srnode.current_parent_since <= last && parent)
257 parent->build_snap_set(s, max_seq, max_last_created, max_last_destroyed,
258 MAX(first, srnode.current_parent_since), last);
259}
260
261
262void SnapRealm::check_cache() const
263{
264 assert(open);
265 if (cached_seq >= srnode.seq)
266 return;
267
268 cached_snaps.clear();
269 cached_snap_context.clear();
270
271 cached_last_created = srnode.last_created;
272 cached_last_destroyed = srnode.last_destroyed;
273 cached_seq = srnode.seq;
274 build_snap_set(cached_snaps, cached_seq, cached_last_created, cached_last_destroyed,
275 0, CEPH_NOSNAP);
276
277 cached_snap_trace.clear();
278 build_snap_trace(cached_snap_trace);
279
280 dout(10) << "check_cache rebuilt " << cached_snaps
281 << " seq " << srnode.seq
282 << " cached_seq " << cached_seq
283 << " cached_last_created " << cached_last_created
284 << " cached_last_destroyed " << cached_last_destroyed
285 << ")" << dendl;
286}
287
288const set<snapid_t>& SnapRealm::get_snaps() const
289{
290 check_cache();
291 dout(10) << "get_snaps " << cached_snaps
292 << " (seq " << srnode.seq << " cached_seq " << cached_seq << ")"
293 << dendl;
294 return cached_snaps;
295}
296
297/*
298 * build vector in reverse sorted order
299 */
300const SnapContext& SnapRealm::get_snap_context() const
301{
302 check_cache();
303
304 if (!cached_snap_context.seq) {
305 cached_snap_context.seq = cached_seq;
306 cached_snap_context.snaps.resize(cached_snaps.size());
307 unsigned i = 0;
308 for (set<snapid_t>::reverse_iterator p = cached_snaps.rbegin();
309 p != cached_snaps.rend();
310 ++p)
311 cached_snap_context.snaps[i++] = *p;
312 }
313
314 return cached_snap_context;
315}
316
317void SnapRealm::get_snap_info(map<snapid_t,SnapInfo*>& infomap, snapid_t first, snapid_t last)
318{
319 const set<snapid_t>& snaps = get_snaps();
320 dout(10) << "get_snap_info snaps " << snaps << dendl;
321
322 // include my snaps within interval [first,last]
323 for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
324 p != srnode.snaps.end() && p->first <= last;
325 ++p)
326 infomap[p->first] = &p->second;
327
328 // include snaps for parents during intervals that intersect [first,last]
329 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
330 p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
331 ++p) {
332 CInode *oldparent = mdcache->get_inode(p->second.ino);
333 assert(oldparent); // call open_parents first!
334 assert(oldparent->snaprealm);
335 oldparent->snaprealm->get_snap_info(infomap,
336 MAX(first, p->second.first),
337 MIN(last, p->first));
338 }
339 if (srnode.current_parent_since <= last && parent)
340 parent->get_snap_info(infomap, MAX(first, srnode.current_parent_since), last);
341}
342
94b18763 343boost::string_view SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino)
7c673cae
FG
344{
345 auto srnode_snaps_entry = srnode.snaps.find(snapid);
346 if (srnode_snaps_entry != srnode.snaps.end()) {
347 if (atino == inode->ino())
348 return srnode_snaps_entry->second.name;
349 else
350 return srnode_snaps_entry->second.get_long_name();
351 }
352
353 map<snapid_t,snaplink_t>::iterator p = srnode.past_parents.lower_bound(snapid);
354 if (p != srnode.past_parents.end() && p->second.first <= snapid) {
355 CInode *oldparent = mdcache->get_inode(p->second.ino);
356 assert(oldparent); // call open_parents first!
357 assert(oldparent->snaprealm);
358 return oldparent->snaprealm->get_snapname(snapid, atino);
359 }
360
361 assert(srnode.current_parent_since <= snapid);
362 assert(parent);
363 return parent->get_snapname(snapid, atino);
364}
365
94b18763 366snapid_t SnapRealm::resolve_snapname(boost::string_view n, inodeno_t atino, snapid_t first, snapid_t last)
7c673cae
FG
367{
368 // first try me
369 dout(10) << "resolve_snapname '" << n << "' in [" << first << "," << last << "]" << dendl;
370
371 //snapid_t num;
372 //if (n[0] == '~') num = atoll(n.c_str()+1);
373
374 bool actual = (atino == inode->ino());
375 string pname;
376 inodeno_t pino;
377 if (!actual) {
378 if (!n.length() ||
379 n[0] != '_') return 0;
380 int next_ = n.find('_', 1);
381 if (next_ < 0) return 0;
94b18763
FG
382 pname = std::string(n.substr(1, next_ - 1));
383 pino = atoll(n.data() + next_ + 1);
7c673cae
FG
384 dout(10) << " " << n << " parses to name '" << pname << "' dirino " << pino << dendl;
385 }
386
387 for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first
388 p != srnode.snaps.end() && p->first <= last;
389 ++p) {
390 dout(15) << " ? " << p->second << dendl;
391 //if (num && p->second.snapid == num)
392 //return p->first;
393 if (actual && p->second.name == n)
394 return p->first;
395 if (!actual && p->second.name == pname && p->second.ino == pino)
396 return p->first;
397 }
398
399 // include snaps for parents during intervals that intersect [first,last]
400 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first);
401 p != srnode.past_parents.end() && p->first >= first && p->second.first <= last;
402 ++p) {
403 CInode *oldparent = mdcache->get_inode(p->second.ino);
404 assert(oldparent); // call open_parents first!
405 assert(oldparent->snaprealm);
406 snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino,
407 MAX(first, p->second.first),
408 MIN(last, p->first));
409 if (r)
410 return r;
411 }
412 if (parent && srnode.current_parent_since <= last)
413 return parent->resolve_snapname(n, atino, MAX(first, srnode.current_parent_since), last);
414 return 0;
415}
416
417
418void SnapRealm::adjust_parent()
419{
420 SnapRealm *newparent = inode->get_parent_dn()->get_dir()->get_inode()->find_snaprealm();
421 if (newparent != parent) {
422 dout(10) << "adjust_parent " << parent << " -> " << newparent << dendl;
423 if (parent)
424 parent->open_children.erase(this);
425 parent = newparent;
426 if (parent)
427 parent->open_children.insert(this);
428
429 invalidate_cached_snaps();
430 }
431}
432
433void SnapRealm::split_at(SnapRealm *child)
434{
435 dout(10) << "split_at " << *child
436 << " on " << *child->inode << dendl;
437
438 if (inode->is_mdsdir() || !child->inode->is_dir()) {
439 // it's not a dir.
440 if (child->inode->containing_realm) {
441 // - no open children.
442 // - only need to move this child's inode's caps.
443 child->inode->move_to_realm(child);
444 } else {
445 // no caps, nothing to move/split.
446 dout(20) << " split no-op, no caps to move on file " << *child->inode << dendl;
447 assert(!child->inode->is_any_caps());
448 }
449 return;
450 }
451
452 // it's a dir.
453
454 // split open_children
455 dout(10) << " open_children are " << open_children << dendl;
456 for (set<SnapRealm*>::iterator p = open_children.begin();
457 p != open_children.end(); ) {
458 SnapRealm *realm = *p;
459 if (realm != child &&
460 child->inode->is_projected_ancestor_of(realm->inode)) {
461 dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl;
462 realm->parent = child;
463 child->open_children.insert(realm);
464 open_children.erase(p++);
465 } else {
466 dout(20) << " keeping child realm " << *realm << " on " << *realm->inode << dendl;
467 ++p;
468 }
469 }
470
471 // split inodes_with_caps
472 elist<CInode*>::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps));
473 while (!p.end()) {
474 CInode *in = *p;
475 ++p;
476
477 // does inode fall within the child realm?
478 bool under_child = false;
479
480 if (in == child->inode) {
481 under_child = true;
482 } else {
483 CInode *t = in;
484 while (t->get_parent_dn()) {
485 t = t->get_parent_dn()->get_dir()->get_inode();
486 if (t == child->inode) {
487 under_child = true;
488 break;
489 }
490 if (t == in)
491 break;
492 }
493 }
494 if (under_child) {
495 dout(20) << " child gets " << *in << dendl;
496 in->move_to_realm(child);
497 } else {
498 dout(20) << " keeping " << *in << dendl;
499 }
500 }
501
502}
503
504const bufferlist& SnapRealm::get_snap_trace()
505{
506 check_cache();
507 return cached_snap_trace;
508}
509
510void SnapRealm::build_snap_trace(bufferlist& snapbl) const
511{
512 SnapRealmInfo info(inode->ino(), srnode.created, srnode.seq, srnode.current_parent_since);
513
514 if (parent) {
515 info.h.parent = parent->inode->ino();
516 if (!srnode.past_parents.empty()) {
517 snapid_t last = srnode.past_parents.rbegin()->first;
518 set<snapid_t> past;
519 snapid_t max_seq, max_last_created, max_last_destroyed;
520 build_snap_set(past, max_seq, max_last_created, max_last_destroyed, 0, last);
521 info.prior_parent_snaps.reserve(past.size());
522 for (set<snapid_t>::reverse_iterator p = past.rbegin(); p != past.rend(); ++p)
523 info.prior_parent_snaps.push_back(*p);
524 dout(10) << "build_snap_trace prior_parent_snaps from [1," << last << "] "
525 << info.prior_parent_snaps << dendl;
526 }
527 } else
528 info.h.parent = 0;
529
530 info.my_snaps.reserve(srnode.snaps.size());
531 for (map<snapid_t,SnapInfo>::const_reverse_iterator p = srnode.snaps.rbegin();
532 p != srnode.snaps.rend();
533 ++p)
534 info.my_snaps.push_back(p->first);
535 dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl;
536
537 ::encode(info, snapbl);
538
539 if (parent)
540 parent->build_snap_trace(snapbl);
541}
542
543
544
545void SnapRealm::prune_past_parents()
546{
547 dout(10) << "prune_past_parents" << dendl;
548 check_cache();
549 assert(open);
550
551 map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin();
552 while (p != srnode.past_parents.end()) {
553 set<snapid_t>::iterator q = cached_snaps.lower_bound(p->second.first);
554 if (q == cached_snaps.end() ||
555 *q > p->first) {
556 dout(10) << "prune_past_parents pruning [" << p->second.first << "," << p->first
557 << "] " << p->second.ino << dendl;
558 remove_open_past_parent(p->second.ino, p->first);
559 srnode.past_parents.erase(p++);
560 } else {
561 dout(10) << "prune_past_parents keeping [" << p->second.first << "," << p->first
562 << "] " << p->second.ino << dendl;
563 ++p;
564 }
565 }
566}
567