]>
git.proxmox.com Git - ceph.git/blob - ceph/src/mds/SnapRealm.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #include "SnapRealm.h"
19 #include <boost/utility/string_view.hpp>
21 #include "messages/MClientSnap.h"
28 #define dout_context g_ceph_context
29 #define dout_subsys ceph_subsys_mds
31 #define dout_prefix _prefix(_dout, mdcache->mds->get_nodeid(), inode, srnode.seq, this)
32 static ostream
& _prefix(std::ostream
*_dout
, int whoami
, const CInode
*inode
,
33 uint64_t seq
, const SnapRealm
*realm
) {
34 return *_dout
<< " mds." << whoami
35 << ".cache.snaprealm(" << inode
->ino()
36 << " seq " << seq
<< " " << realm
<< ") ";
39 ostream
& operator<<(ostream
& out
, const SnapRealm
& realm
)
41 out
<< "snaprealm(" << realm
.inode
->ino()
42 << " seq " << realm
.srnode
.seq
43 << " lc " << realm
.srnode
.last_created
44 << " cr " << realm
.srnode
.created
;
45 if (realm
.srnode
.created
!= realm
.srnode
.current_parent_since
)
46 out
<< " cps " << realm
.srnode
.current_parent_since
;
47 out
<< " snaps=" << realm
.srnode
.snaps
;
48 if (realm
.srnode
.past_parents
.size()) {
49 out
<< " past_parents=(";
50 for (map
<snapid_t
, snaplink_t
>::const_iterator p
= realm
.srnode
.past_parents
.begin();
51 p
!= realm
.srnode
.past_parents
.end();
53 if (p
!= realm
.srnode
.past_parents
.begin()) out
<< ",";
54 out
<< p
->second
.first
<< "-" << p
->first
55 << "=" << p
->second
.ino
;
59 out
<< " " << &realm
<< ")";
64 void SnapRealm::add_open_past_parent(SnapRealm
*parent
, snapid_t last
)
66 auto p
= open_past_parents
.find(parent
->inode
->ino());
67 if (p
!= open_past_parents
.end()) {
68 assert(p
->second
.second
.count(last
) == 0);
69 p
->second
.second
.insert(last
);
71 open_past_parents
[parent
->inode
->ino()].first
= parent
;
72 open_past_parents
[parent
->inode
->ino()].second
.insert(last
);
73 parent
->open_past_children
.insert(this);
74 parent
->inode
->get(CInode::PIN_PASTSNAPPARENT
);
76 ++num_open_past_parents
;
79 void SnapRealm::remove_open_past_parent(inodeno_t ino
, snapid_t last
)
81 auto p
= open_past_parents
.find(ino
);
82 assert(p
!= open_past_parents
.end());
83 auto q
= p
->second
.second
.find(last
);
84 assert(q
!= p
->second
.second
.end());
85 p
->second
.second
.erase(q
);
86 --num_open_past_parents
;
87 if (p
->second
.second
.empty()) {
88 SnapRealm
*parent
= p
->second
.first
;
89 open_past_parents
.erase(p
);
90 parent
->open_past_children
.erase(this);
91 parent
->inode
->put(CInode::PIN_PASTSNAPPARENT
);
95 struct C_SR_RetryOpenParents
: public MDSInternalContextBase
{
97 snapid_t first
, last
, parent_last
;
99 MDSInternalContextBase
* fin
;
100 C_SR_RetryOpenParents(SnapRealm
*s
, snapid_t f
, snapid_t l
, snapid_t pl
,
101 inodeno_t p
, MDSInternalContextBase
*c
) :
102 sr(s
), first(f
), last(l
), parent_last(pl
), parent(p
), fin(c
) {
103 sr
->inode
->get(CInode::PIN_OPENINGSNAPPARENTS
);
105 MDSRank
*get_mds() override
{ return sr
->mdcache
->mds
; }
106 void finish(int r
) override
{
108 sr
->_remove_missing_parent(parent_last
, parent
, r
);
109 if (sr
->_open_parents(fin
, first
, last
))
111 sr
->inode
->put(CInode::PIN_OPENINGSNAPPARENTS
);
115 void SnapRealm::_remove_missing_parent(snapid_t snapid
, inodeno_t parent
, int err
)
117 map
<snapid_t
, snaplink_t
>::iterator p
= srnode
.past_parents
.find(snapid
);
118 if (p
!= srnode
.past_parents
.end()) {
119 dout(10) << __func__
<< " " << parent
<< " [" << p
->second
.first
<< ","
120 << p
->first
<< "] errno " << err
<< dendl
;
121 srnode
.past_parents
.erase(p
);
123 dout(10) << __func__
<< " " << parent
<< " not found" << dendl
;
127 bool SnapRealm::_open_parents(MDSInternalContextBase
*finish
, snapid_t first
, snapid_t last
)
129 dout(10) << "open_parents [" << first
<< "," << last
<< "]" << dendl
;
133 // make sure my current parents' parents are open...
135 dout(10) << " current parent [" << srnode
.current_parent_since
<< ",head] is " << *parent
136 << " on " << *parent
->inode
<< dendl
;
137 if (last
>= srnode
.current_parent_since
&&
138 !parent
->_open_parents(finish
, MAX(first
, srnode
.current_parent_since
), last
))
142 // and my past parents too!
143 assert(srnode
.past_parents
.size() >= num_open_past_parents
);
144 if (srnode
.past_parents
.size() > num_open_past_parents
) {
145 for (map
<snapid_t
, snaplink_t
>::iterator p
= srnode
.past_parents
.begin();
146 p
!= srnode
.past_parents
.end(); ) {
147 dout(10) << " past_parent [" << p
->second
.first
<< "," << p
->first
<< "] is "
148 << p
->second
.ino
<< dendl
;
149 CInode
*parent
= mdcache
->get_inode(p
->second
.ino
);
151 C_SR_RetryOpenParents
*fin
= new C_SR_RetryOpenParents(this, first
, last
, p
->first
,
152 p
->second
.ino
, finish
);
153 mdcache
->open_ino(p
->second
.ino
, mdcache
->mds
->mdsmap
->get_metadata_pool(), fin
);
156 if (parent
->state_test(CInode::STATE_PURGING
)) {
157 dout(10) << " skip purging past_parent " << *parent
<< dendl
;
158 srnode
.past_parents
.erase(p
++);
161 assert(parent
->snaprealm
); // hmm!
162 if (!parent
->snaprealm
->_open_parents(finish
, p
->second
.first
, p
->first
))
164 auto q
= open_past_parents
.find(p
->second
.ino
);
165 if (q
== open_past_parents
.end() ||
166 q
->second
.second
.count(p
->first
) == 0) {
167 add_open_past_parent(parent
->snaprealm
, p
->first
);
177 bool SnapRealm::open_parents(MDSInternalContextBase
*retryorfinish
) {
178 if (!_open_parents(retryorfinish
))
180 delete retryorfinish
;
184 bool SnapRealm::have_past_parents_open(snapid_t first
, snapid_t last
)
186 dout(10) << "have_past_parents_open [" << first
<< "," << last
<< "]" << dendl
;
190 for (map
<snapid_t
, snaplink_t
>::iterator p
= srnode
.past_parents
.lower_bound(first
);
191 p
!= srnode
.past_parents
.end();
193 if (p
->second
.first
> last
)
195 dout(10) << " past parent [" << p
->second
.first
<< "," << p
->first
<< "] was "
196 << p
->second
.ino
<< dendl
;
197 if (open_past_parents
.count(p
->second
.ino
) == 0) {
198 dout(10) << " past parent " << p
->second
.ino
<< " is not open" << dendl
;
201 SnapRealm
*parent_realm
= open_past_parents
[p
->second
.ino
].first
;
202 if (!parent_realm
->have_past_parents_open(MAX(first
, p
->second
.first
),
203 MIN(last
, p
->first
)))
211 void SnapRealm::close_parents()
213 for (auto p
= open_past_parents
.begin(); p
!= open_past_parents
.end(); ++p
) {
214 num_open_past_parents
-= p
->second
.second
.size();
215 p
->second
.first
->inode
->put(CInode::PIN_PASTSNAPPARENT
);
216 p
->second
.first
->open_past_children
.erase(this);
218 open_past_parents
.clear();
223 * get list of snaps for this realm. we must include parents' snaps
224 * for the intervals during which they were our parent.
226 void SnapRealm::build_snap_set(set
<snapid_t
> &s
,
227 snapid_t
& max_seq
, snapid_t
& max_last_created
, snapid_t
& max_last_destroyed
,
228 snapid_t first
, snapid_t last
) const
230 dout(10) << "build_snap_set [" << first
<< "," << last
<< "] on " << *this << dendl
;
232 if (srnode
.seq
> max_seq
)
233 max_seq
= srnode
.seq
;
234 if (srnode
.last_created
> max_last_created
)
235 max_last_created
= srnode
.last_created
;
236 if (srnode
.last_destroyed
> max_last_destroyed
)
237 max_last_destroyed
= srnode
.last_destroyed
;
239 // include my snaps within interval [first,last]
240 for (map
<snapid_t
, SnapInfo
>::const_iterator p
= srnode
.snaps
.lower_bound(first
); // first element >= first
241 p
!= srnode
.snaps
.end() && p
->first
<= last
;
245 // include snaps for parents during intervals that intersect [first,last]
246 for (map
<snapid_t
, snaplink_t
>::const_iterator p
= srnode
.past_parents
.lower_bound(first
);
247 p
!= srnode
.past_parents
.end() && p
->first
>= first
&& p
->second
.first
<= last
;
249 const CInode
*oldparent
= mdcache
->get_inode(p
->second
.ino
);
250 assert(oldparent
); // call open_parents first!
251 assert(oldparent
->snaprealm
);
252 oldparent
->snaprealm
->build_snap_set(s
, max_seq
, max_last_created
, max_last_destroyed
,
253 MAX(first
, p
->second
.first
),
254 MIN(last
, p
->first
));
256 if (srnode
.current_parent_since
<= last
&& parent
)
257 parent
->build_snap_set(s
, max_seq
, max_last_created
, max_last_destroyed
,
258 MAX(first
, srnode
.current_parent_since
), last
);
262 void SnapRealm::check_cache() const
265 if (cached_seq
>= srnode
.seq
)
268 cached_snaps
.clear();
269 cached_snap_context
.clear();
271 cached_last_created
= srnode
.last_created
;
272 cached_last_destroyed
= srnode
.last_destroyed
;
273 cached_seq
= srnode
.seq
;
274 build_snap_set(cached_snaps
, cached_seq
, cached_last_created
, cached_last_destroyed
,
277 cached_snap_trace
.clear();
278 build_snap_trace(cached_snap_trace
);
280 dout(10) << "check_cache rebuilt " << cached_snaps
281 << " seq " << srnode
.seq
282 << " cached_seq " << cached_seq
283 << " cached_last_created " << cached_last_created
284 << " cached_last_destroyed " << cached_last_destroyed
288 const set
<snapid_t
>& SnapRealm::get_snaps() const
291 dout(10) << "get_snaps " << cached_snaps
292 << " (seq " << srnode
.seq
<< " cached_seq " << cached_seq
<< ")"
298 * build vector in reverse sorted order
300 const SnapContext
& SnapRealm::get_snap_context() const
304 if (!cached_snap_context
.seq
) {
305 cached_snap_context
.seq
= cached_seq
;
306 cached_snap_context
.snaps
.resize(cached_snaps
.size());
308 for (set
<snapid_t
>::reverse_iterator p
= cached_snaps
.rbegin();
309 p
!= cached_snaps
.rend();
311 cached_snap_context
.snaps
[i
++] = *p
;
314 return cached_snap_context
;
317 void SnapRealm::get_snap_info(map
<snapid_t
,SnapInfo
*>& infomap
, snapid_t first
, snapid_t last
)
319 const set
<snapid_t
>& snaps
= get_snaps();
320 dout(10) << "get_snap_info snaps " << snaps
<< dendl
;
322 // include my snaps within interval [first,last]
323 for (map
<snapid_t
, SnapInfo
>::iterator p
= srnode
.snaps
.lower_bound(first
); // first element >= first
324 p
!= srnode
.snaps
.end() && p
->first
<= last
;
326 infomap
[p
->first
] = &p
->second
;
328 // include snaps for parents during intervals that intersect [first,last]
329 for (map
<snapid_t
, snaplink_t
>::iterator p
= srnode
.past_parents
.lower_bound(first
);
330 p
!= srnode
.past_parents
.end() && p
->first
>= first
&& p
->second
.first
<= last
;
332 CInode
*oldparent
= mdcache
->get_inode(p
->second
.ino
);
333 assert(oldparent
); // call open_parents first!
334 assert(oldparent
->snaprealm
);
335 oldparent
->snaprealm
->get_snap_info(infomap
,
336 MAX(first
, p
->second
.first
),
337 MIN(last
, p
->first
));
339 if (srnode
.current_parent_since
<= last
&& parent
)
340 parent
->get_snap_info(infomap
, MAX(first
, srnode
.current_parent_since
), last
);
343 boost::string_view
SnapRealm::get_snapname(snapid_t snapid
, inodeno_t atino
)
345 auto srnode_snaps_entry
= srnode
.snaps
.find(snapid
);
346 if (srnode_snaps_entry
!= srnode
.snaps
.end()) {
347 if (atino
== inode
->ino())
348 return srnode_snaps_entry
->second
.name
;
350 return srnode_snaps_entry
->second
.get_long_name();
353 map
<snapid_t
,snaplink_t
>::iterator p
= srnode
.past_parents
.lower_bound(snapid
);
354 if (p
!= srnode
.past_parents
.end() && p
->second
.first
<= snapid
) {
355 CInode
*oldparent
= mdcache
->get_inode(p
->second
.ino
);
356 assert(oldparent
); // call open_parents first!
357 assert(oldparent
->snaprealm
);
358 return oldparent
->snaprealm
->get_snapname(snapid
, atino
);
361 assert(srnode
.current_parent_since
<= snapid
);
363 return parent
->get_snapname(snapid
, atino
);
366 snapid_t
SnapRealm::resolve_snapname(boost::string_view n
, inodeno_t atino
, snapid_t first
, snapid_t last
)
369 dout(10) << "resolve_snapname '" << n
<< "' in [" << first
<< "," << last
<< "]" << dendl
;
372 //if (n[0] == '~') num = atoll(n.c_str()+1);
374 bool actual
= (atino
== inode
->ino());
379 n
[0] != '_') return 0;
380 int next_
= n
.find('_', 1);
381 if (next_
< 0) return 0;
382 pname
= std::string(n
.substr(1, next_
- 1));
383 pino
= atoll(n
.data() + next_
+ 1);
384 dout(10) << " " << n
<< " parses to name '" << pname
<< "' dirino " << pino
<< dendl
;
387 for (map
<snapid_t
, SnapInfo
>::iterator p
= srnode
.snaps
.lower_bound(first
); // first element >= first
388 p
!= srnode
.snaps
.end() && p
->first
<= last
;
390 dout(15) << " ? " << p
->second
<< dendl
;
391 //if (num && p->second.snapid == num)
393 if (actual
&& p
->second
.name
== n
)
395 if (!actual
&& p
->second
.name
== pname
&& p
->second
.ino
== pino
)
399 // include snaps for parents during intervals that intersect [first,last]
400 for (map
<snapid_t
, snaplink_t
>::iterator p
= srnode
.past_parents
.lower_bound(first
);
401 p
!= srnode
.past_parents
.end() && p
->first
>= first
&& p
->second
.first
<= last
;
403 CInode
*oldparent
= mdcache
->get_inode(p
->second
.ino
);
404 assert(oldparent
); // call open_parents first!
405 assert(oldparent
->snaprealm
);
406 snapid_t r
= oldparent
->snaprealm
->resolve_snapname(n
, atino
,
407 MAX(first
, p
->second
.first
),
408 MIN(last
, p
->first
));
412 if (parent
&& srnode
.current_parent_since
<= last
)
413 return parent
->resolve_snapname(n
, atino
, MAX(first
, srnode
.current_parent_since
), last
);
418 void SnapRealm::adjust_parent()
420 SnapRealm
*newparent
= inode
->get_parent_dn()->get_dir()->get_inode()->find_snaprealm();
421 if (newparent
!= parent
) {
422 dout(10) << "adjust_parent " << parent
<< " -> " << newparent
<< dendl
;
424 parent
->open_children
.erase(this);
427 parent
->open_children
.insert(this);
429 invalidate_cached_snaps();
433 void SnapRealm::split_at(SnapRealm
*child
)
435 dout(10) << "split_at " << *child
436 << " on " << *child
->inode
<< dendl
;
438 if (inode
->is_mdsdir() || !child
->inode
->is_dir()) {
440 if (child
->inode
->containing_realm
) {
441 // - no open children.
442 // - only need to move this child's inode's caps.
443 child
->inode
->move_to_realm(child
);
445 // no caps, nothing to move/split.
446 dout(20) << " split no-op, no caps to move on file " << *child
->inode
<< dendl
;
447 assert(!child
->inode
->is_any_caps());
454 // split open_children
455 dout(10) << " open_children are " << open_children
<< dendl
;
456 for (set
<SnapRealm
*>::iterator p
= open_children
.begin();
457 p
!= open_children
.end(); ) {
458 SnapRealm
*realm
= *p
;
459 if (realm
!= child
&&
460 child
->inode
->is_projected_ancestor_of(realm
->inode
)) {
461 dout(20) << " child gets child realm " << *realm
<< " on " << *realm
->inode
<< dendl
;
462 realm
->parent
= child
;
463 child
->open_children
.insert(realm
);
464 open_children
.erase(p
++);
466 dout(20) << " keeping child realm " << *realm
<< " on " << *realm
->inode
<< dendl
;
471 // split inodes_with_caps
472 elist
<CInode
*>::iterator p
= inodes_with_caps
.begin(member_offset(CInode
, item_caps
));
477 // does inode fall within the child realm?
478 bool under_child
= false;
480 if (in
== child
->inode
) {
484 while (t
->get_parent_dn()) {
485 t
= t
->get_parent_dn()->get_dir()->get_inode();
486 if (t
== child
->inode
) {
495 dout(20) << " child gets " << *in
<< dendl
;
496 in
->move_to_realm(child
);
498 dout(20) << " keeping " << *in
<< dendl
;
504 const bufferlist
& SnapRealm::get_snap_trace()
507 return cached_snap_trace
;
510 void SnapRealm::build_snap_trace(bufferlist
& snapbl
) const
512 SnapRealmInfo
info(inode
->ino(), srnode
.created
, srnode
.seq
, srnode
.current_parent_since
);
515 info
.h
.parent
= parent
->inode
->ino();
516 if (!srnode
.past_parents
.empty()) {
517 snapid_t last
= srnode
.past_parents
.rbegin()->first
;
519 snapid_t max_seq
, max_last_created
, max_last_destroyed
;
520 build_snap_set(past
, max_seq
, max_last_created
, max_last_destroyed
, 0, last
);
521 info
.prior_parent_snaps
.reserve(past
.size());
522 for (set
<snapid_t
>::reverse_iterator p
= past
.rbegin(); p
!= past
.rend(); ++p
)
523 info
.prior_parent_snaps
.push_back(*p
);
524 dout(10) << "build_snap_trace prior_parent_snaps from [1," << last
<< "] "
525 << info
.prior_parent_snaps
<< dendl
;
530 info
.my_snaps
.reserve(srnode
.snaps
.size());
531 for (map
<snapid_t
,SnapInfo
>::const_reverse_iterator p
= srnode
.snaps
.rbegin();
532 p
!= srnode
.snaps
.rend();
534 info
.my_snaps
.push_back(p
->first
);
535 dout(10) << "build_snap_trace my_snaps " << info
.my_snaps
<< dendl
;
537 ::encode(info
, snapbl
);
540 parent
->build_snap_trace(snapbl
);
545 void SnapRealm::prune_past_parents()
547 dout(10) << "prune_past_parents" << dendl
;
551 map
<snapid_t
, snaplink_t
>::iterator p
= srnode
.past_parents
.begin();
552 while (p
!= srnode
.past_parents
.end()) {
553 set
<snapid_t
>::iterator q
= cached_snaps
.lower_bound(p
->second
.first
);
554 if (q
== cached_snaps
.end() ||
556 dout(10) << "prune_past_parents pruning [" << p
->second
.first
<< "," << p
->first
557 << "] " << p
->second
.ino
<< dendl
;
558 remove_open_past_parent(p
->second
.ino
, p
->first
);
559 srnode
.past_parents
.erase(p
++);
561 dout(10) << "prune_past_parents keeping [" << p
->second
.first
<< "," << p
->first
562 << "] " << p
->second
.ino
<< dendl
;