]> git.proxmox.com Git - ceph.git/blame_incremental - ceph/src/mds/SnapRealm.cc
bump version to 18.2.4-pve3
[ceph.git] / ceph / src / mds / SnapRealm.cc
... / ...
CommitLineData
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include "SnapRealm.h"
16#include "MDCache.h"
17#include "MDSRank.h"
18#include "SnapClient.h"
19
20#include <string_view>
21
22
23/*
24 * SnapRealm
25 */
26
27#define dout_context g_ceph_context
28#define dout_subsys ceph_subsys_mds
29#undef dout_prefix
30#define dout_prefix _prefix(_dout, mdcache->mds->get_nodeid(), inode, srnode.seq, this)
31
32using namespace std;
33
34static std::ostream& _prefix(std::ostream *_dout, int whoami, const CInode *inode,
35 uint64_t seq, const SnapRealm *realm) {
36 return *_dout << " mds." << whoami
37 << ".cache.snaprealm(" << inode->ino()
38 << " seq " << seq << " " << realm << ") ";
39}
40
41ostream& operator<<(ostream& out, const SnapRealm& realm)
42{
43 out << "snaprealm(" << realm.inode->ino()
44 << " seq " << realm.srnode.seq
45 << " lc " << realm.srnode.last_created
46 << " cr " << realm.srnode.created;
47 if (realm.srnode.created != realm.srnode.current_parent_since)
48 out << " cps " << realm.srnode.current_parent_since;
49 out << " snaps=" << realm.srnode.snaps;
50 if (realm.srnode.past_parent_snaps.size() > 0) {
51 out << " past_parent_snaps=" << realm.srnode.past_parent_snaps;
52 }
53
54 if (realm.srnode.is_parent_global())
55 out << " global ";
56 out << " last_modified " << realm.srnode.last_modified
57 << " change_attr " << realm.srnode.change_attr;
58 out << " " << &realm << ")";
59 return out;
60}
61
62SnapRealm::SnapRealm(MDCache *c, CInode *in) :
63 mdcache(c), inode(in), inodes_with_caps(member_offset(CInode, item_caps))
64{
65 global = (inode->ino() == CEPH_INO_GLOBAL_SNAPREALM);
66 if (inode->ino() == CEPH_INO_ROOT) {
67 srnode.last_modified = in->get_inode()->mtime;
68 }
69}
70
71/*
72 * get list of snaps for this realm. we must include parents' snaps
73 * for the intervals during which they were our parent.
74 */
75void SnapRealm::build_snap_set() const
76{
77 dout(10) << "build_snap_set on " << *this << dendl;
78
79 cached_snaps.clear();
80
81 if (global) {
82 mdcache->mds->snapclient->get_snaps(cached_snaps);
83 return;
84 }
85
86 // include my snaps
87 for (const auto& p : srnode.snaps)
88 cached_snaps.insert(p.first);
89
90 if (!srnode.past_parent_snaps.empty()) {
91 set<snapid_t> snaps = mdcache->mds->snapclient->filter(srnode.past_parent_snaps);
92 if (!snaps.empty()) {
93 snapid_t last = *snaps.rbegin();
94 cached_seq = std::max(cached_seq, last);
95 cached_last_created = std::max(cached_last_created, last);
96 }
97 cached_snaps.insert(snaps.begin(), snaps.end());
98 }
99
100 snapid_t parent_seq = parent ? parent->get_newest_seq() : snapid_t(0);
101 if (parent_seq >= srnode.current_parent_since) {
102 auto& snaps = parent->get_snaps();
103 auto p = snaps.lower_bound(srnode.current_parent_since);
104 cached_snaps.insert(p, snaps.end());
105 cached_seq = std::max(cached_seq, parent_seq);
106 cached_last_created = std::max(cached_last_created, parent->get_last_created());
107 }
108}
109
110void SnapRealm::check_cache() const
111{
112 snapid_t seq;
113 snapid_t last_created;
114 snapid_t last_destroyed = mdcache->mds->snapclient->get_last_destroyed();
115 if (global || srnode.is_parent_global()) {
116 last_created = mdcache->mds->snapclient->get_last_created();
117 seq = std::max(last_created, last_destroyed);
118 } else {
119 last_created = srnode.last_created;
120 seq = srnode.seq;
121 }
122 if (cached_seq >= seq &&
123 cached_last_destroyed == last_destroyed)
124 return;
125
126 cached_snap_context.clear();
127
128 cached_seq = seq;
129 cached_last_created = last_created;
130 cached_last_destroyed = last_destroyed;
131
132 cached_subvolume_ino = 0;
133 if (parent)
134 cached_subvolume_ino = parent->get_subvolume_ino();
135 if (!cached_subvolume_ino && srnode.is_subvolume())
136 cached_subvolume_ino = inode->ino();
137
138 build_snap_set();
139
140 build_snap_trace();
141
142 dout(10) << "check_cache rebuilt " << cached_snaps
143 << " seq " << seq
144 << " cached_seq " << cached_seq
145 << " cached_last_created " << cached_last_created
146 << " cached_last_destroyed " << cached_last_destroyed
147 << ")" << dendl;
148}
149
150const set<snapid_t>& SnapRealm::get_snaps() const
151{
152 check_cache();
153 dout(10) << "get_snaps " << cached_snaps
154 << " (seq " << srnode.seq << " cached_seq " << cached_seq << ")"
155 << dendl;
156 return cached_snaps;
157}
158
159/*
160 * build vector in reverse sorted order
161 */
162const SnapContext& SnapRealm::get_snap_context() const
163{
164 check_cache();
165
166 if (!cached_snap_context.seq) {
167 cached_snap_context.seq = cached_seq;
168 cached_snap_context.snaps.resize(cached_snaps.size());
169 unsigned i = 0;
170 for (set<snapid_t>::reverse_iterator p = cached_snaps.rbegin();
171 p != cached_snaps.rend();
172 ++p)
173 cached_snap_context.snaps[i++] = *p;
174 }
175
176 return cached_snap_context;
177}
178
179void SnapRealm::get_snap_info(map<snapid_t, const SnapInfo*>& infomap, snapid_t first, snapid_t last)
180{
181 const set<snapid_t>& snaps = get_snaps();
182 dout(10) << "get_snap_info snaps " << snaps << dendl;
183
184 // include my snaps within interval [first,last]
185 for (auto p = srnode.snaps.lower_bound(first); // first element >= first
186 p != srnode.snaps.end() && p->first <= last;
187 ++p)
188 infomap[p->first] = &p->second;
189
190 if (!srnode.past_parent_snaps.empty()) {
191 set<snapid_t> snaps;
192 for (auto p = srnode.past_parent_snaps.lower_bound(first); // first element >= first
193 p != srnode.past_parent_snaps.end() && *p <= last;
194 ++p) {
195 snaps.insert(*p);
196 }
197
198 map<snapid_t, const SnapInfo*> _infomap;
199 mdcache->mds->snapclient->get_snap_infos(_infomap, snaps);
200 infomap.insert(_infomap.begin(), _infomap.end());
201 }
202
203 if (srnode.current_parent_since <= last && parent)
204 parent->get_snap_info(infomap, std::max(first, srnode.current_parent_since), last);
205}
206
207std::string_view SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino)
208{
209 auto srnode_snaps_entry = srnode.snaps.find(snapid);
210 if (srnode_snaps_entry != srnode.snaps.end()) {
211 if (atino == inode->ino())
212 return srnode_snaps_entry->second.name;
213 else
214 return srnode_snaps_entry->second.get_long_name();
215 }
216
217 if (!srnode.past_parent_snaps.empty()) {
218 if (srnode.past_parent_snaps.count(snapid)) {
219 const SnapInfo *sinfo = mdcache->mds->snapclient->get_snap_info(snapid);
220 if (sinfo) {
221 if (atino == sinfo->ino)
222 return sinfo->name;
223 else
224 return sinfo->get_long_name();
225 }
226 }
227 }
228
229 ceph_assert(srnode.current_parent_since <= snapid);
230 ceph_assert(parent);
231 return parent->get_snapname(snapid, atino);
232}
233
234snapid_t SnapRealm::resolve_snapname(std::string_view n, inodeno_t atino, snapid_t first, snapid_t last)
235{
236 // first try me
237 dout(10) << "resolve_snapname '" << n << "' in [" << first << "," << last << "]" << dendl;
238
239 bool actual = (atino == inode->ino());
240 string pname;
241 inodeno_t pino;
242 if (n.length() && n[0] == '_') {
243 size_t next_ = n.find_last_of('_');
244 if (next_ > 1 && next_ + 1 < n.length()) {
245 pname = n.substr(1, next_ - 1);
246 pino = atoll(n.data() + next_ + 1);
247 dout(10) << " " << n << " parses to name '" << pname << "' dirino " << pino << dendl;
248 }
249 }
250
251 for (auto p = srnode.snaps.lower_bound(first); // first element >= first
252 p != srnode.snaps.end() && p->first <= last;
253 ++p) {
254 dout(15) << " ? " << p->second << dendl;
255 //if (num && p->second.snapid == num)
256 //return p->first;
257 if (actual && p->second.name == n)
258 return p->first;
259 if (!actual && p->second.name == pname && p->second.ino == pino)
260 return p->first;
261 }
262
263 if (!srnode.past_parent_snaps.empty()) {
264 set<snapid_t> snaps;
265 for (auto p = srnode.past_parent_snaps.lower_bound(first); // first element >= first
266 p != srnode.past_parent_snaps.end() && *p <= last;
267 ++p)
268 snaps.insert(*p);
269
270 map<snapid_t, const SnapInfo*> _infomap;
271 mdcache->mds->snapclient->get_snap_infos(_infomap, snaps);
272
273 for (auto& it : _infomap) {
274 dout(15) << " ? " << *it.second << dendl;
275 actual = (it.second->ino == atino);
276 if (actual && it.second->name == n)
277 return it.first;
278 if (!actual && it.second->name == pname && it.second->ino == pino)
279 return it.first;
280 }
281 }
282
283 if (parent && srnode.current_parent_since <= last)
284 return parent->resolve_snapname(n, atino, std::max(first, srnode.current_parent_since), last);
285 return 0;
286}
287
288
289void SnapRealm::adjust_parent()
290{
291 SnapRealm *newparent;
292 if (srnode.is_parent_global()) {
293 newparent = mdcache->get_global_snaprealm();
294 } else {
295 CDentry *pdn = inode->get_parent_dn();
296 newparent = pdn ? pdn->get_dir()->get_inode()->find_snaprealm() : NULL;
297 }
298 if (newparent != parent) {
299 dout(10) << "adjust_parent " << parent << " -> " << newparent << dendl;
300 if (parent)
301 parent->open_children.erase(this);
302 parent = newparent;
303 if (parent)
304 parent->open_children.insert(this);
305
306 invalidate_cached_snaps();
307 }
308}
309
310void SnapRealm::split_at(SnapRealm *child)
311{
312 dout(10) << __func__ << ": " << *child
313 << " on " << *child->inode << dendl;
314
315 if (inode->is_mdsdir() || !child->inode->is_dir()) {
316 // it's not a dir.
317 if (child->inode->containing_realm) {
318 // - no open children.
319 // - only need to move this child's inode's caps.
320 child->inode->move_to_realm(child);
321 } else {
322 // no caps, nothing to move/split.
323 dout(20) << " split no-op, no caps to move on file " << *child->inode << dendl;
324 ceph_assert(!child->inode->is_any_caps());
325 }
326 return;
327 }
328
329 // it's a dir.
330
331 if (child->inode->get_projected_parent_dir()->inode->is_stray()) {
332 if (child->inode->containing_realm) {
333 dout(10) << " moving unlinked directory inode" << dendl;
334 child->inode->move_to_realm(child);
335 } else {
336 /* This shouldn't happen because an unlinked directory will have caps
337 * issued to the caller executing rmdir (for today's clients).
338 */
339 dout(10) << " skipping unlinked directory inode w/o caps" << dendl;
340 }
341 return;
342 }
343
344 // split open_children
345 if (!open_children.empty()) {
346 dout(10) << " open_children are " << open_children << dendl;
347 }
348 for (set<SnapRealm*>::iterator p = open_children.begin();
349 p != open_children.end(); ) {
350 SnapRealm *realm = *p;
351 if (realm != child &&
352 child->inode->is_ancestor_of(realm->inode)) {
353 dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl;
354 realm->parent = child;
355 child->open_children.insert(realm);
356 open_children.erase(p++);
357 } else {
358 dout(20) << " keeping child realm " << *realm << " on " << *realm->inode << dendl;
359 ++p;
360 }
361 }
362
363 // split inodes_with_caps
364 std::unordered_map<CInode const*,bool> visited;
365 uint64_t count = 0;
366 dout(20) << " reserving space for " << CDir::count() << " dirs" << dendl;
367 visited.reserve(CDir::count()); /* a reasonable starting poing: keep in mind there may be CInode directories without fragments in cache */
368 for (auto p = inodes_with_caps.begin(); !p.end(); ) {
369 CInode *in = *p;
370 ++p;
371 // does inode fall within the child realm?
372 if (child->inode->is_ancestor_of(in, &visited)) {
373 dout(25) << " child gets " << *in << dendl;
374 in->move_to_realm(child);
375 ++count;
376 } else {
377 dout(25) << " keeping " << *in << dendl;
378 }
379 }
380 dout(20) << " visited " << visited.size() << " directories" << dendl;
381
382 dout(10) << __func__ << ": split " << count << " inodes" << dendl;
383}
384
385void SnapRealm::merge_to(SnapRealm *newparent)
386{
387 if (!newparent)
388 newparent = parent;
389 dout(10) << "merge to " << *newparent << " on " << *newparent->inode << dendl;
390
391 dout(10) << " open_children are " << open_children << dendl;
392 for (auto realm : open_children) {
393 dout(20) << " child realm " << *realm << " on " << *realm->inode << dendl;
394 newparent->open_children.insert(realm);
395 realm->parent = newparent;
396 }
397 open_children.clear();
398
399 for (auto p = inodes_with_caps.begin(); !p.end(); ) {
400 CInode *in = *p;
401 ++p;
402 in->move_to_realm(newparent);
403 }
404 ceph_assert(inodes_with_caps.empty());
405
406 // delete this
407 inode->close_snaprealm();
408}
409
410const bufferlist& SnapRealm::get_snap_trace() const
411{
412 check_cache();
413 return cached_snap_trace;
414}
415
416const bufferlist& SnapRealm::get_snap_trace_new() const
417{
418 check_cache();
419 return cached_snap_trace_new;
420}
421
422void SnapRealm::build_snap_trace() const
423{
424 cached_snap_trace.clear();
425 cached_snap_trace_new.clear();
426
427 if (global) {
428 SnapRealmInfo info(inode->ino(), 0, cached_seq, 0);
429 info.my_snaps.reserve(cached_snaps.size());
430 for (auto p = cached_snaps.rbegin(); p != cached_snaps.rend(); ++p)
431 info.my_snaps.push_back(*p);
432
433 dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl;
434
435 SnapRealmInfoNew ninfo(info, srnode.last_modified, srnode.change_attr);
436 encode(info, cached_snap_trace);
437 encode(ninfo, cached_snap_trace_new);
438 return;
439 }
440
441 SnapRealmInfo info(inode->ino(), srnode.created, srnode.seq, srnode.current_parent_since);
442 if (parent) {
443 info.h.parent = parent->inode->ino();
444
445 set<snapid_t> past;
446 if (!srnode.past_parent_snaps.empty()) {
447 past = mdcache->mds->snapclient->filter(srnode.past_parent_snaps);
448 if (srnode.is_parent_global()) {
449 auto p = past.lower_bound(srnode.current_parent_since);
450 past.erase(p, past.end());
451 }
452 }
453
454 if (!past.empty()) {
455 info.prior_parent_snaps.reserve(past.size());
456 for (set<snapid_t>::reverse_iterator p = past.rbegin(); p != past.rend(); ++p)
457 info.prior_parent_snaps.push_back(*p);
458 dout(10) << "build_snap_trace prior_parent_snaps from [1," << *past.rbegin() << "] "
459 << info.prior_parent_snaps << dendl;
460 }
461 }
462
463 info.my_snaps.reserve(srnode.snaps.size());
464 for (auto p = srnode.snaps.rbegin();
465 p != srnode.snaps.rend();
466 ++p)
467 info.my_snaps.push_back(p->first);
468 dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl;
469
470 SnapRealmInfoNew ninfo(info, srnode.last_modified, srnode.change_attr);
471
472 encode(info, cached_snap_trace);
473 encode(ninfo, cached_snap_trace_new);
474
475 if (parent) {
476 cached_snap_trace.append(parent->get_snap_trace());
477 cached_snap_trace_new.append(parent->get_snap_trace_new());
478 }
479}
480
481void SnapRealm::prune_past_parent_snaps()
482{
483 dout(10) << __func__ << dendl;
484 check_cache();
485
486 for (auto p = srnode.past_parent_snaps.begin();
487 p != srnode.past_parent_snaps.end(); ) {
488 auto q = cached_snaps.find(*p);
489 if (q == cached_snaps.end()) {
490 dout(10) << __func__ << " pruning " << *p << dendl;
491 srnode.past_parent_snaps.erase(p++);
492 } else {
493 dout(10) << __func__ << " keeping " << *p << dendl;
494 ++p;
495 }
496 }
497}
498