]>
Commit | Line | Data |
---|---|---|
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- | |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "SnapRealm.h" | |
16 | #include "MDCache.h" | |
17 | #include "MDSRank.h" | |
18 | #include "SnapClient.h" | |
19 | ||
20 | #include <string_view> | |
21 | ||
22 | ||
23 | /* | |
24 | * SnapRealm | |
25 | */ | |
26 | ||
27 | #define dout_context g_ceph_context | |
28 | #define dout_subsys ceph_subsys_mds | |
29 | #undef dout_prefix | |
30 | #define dout_prefix _prefix(_dout, mdcache->mds->get_nodeid(), inode, srnode.seq, this) | |
31 | ||
32 | using namespace std; | |
33 | ||
34 | static std::ostream& _prefix(std::ostream *_dout, int whoami, const CInode *inode, | |
35 | uint64_t seq, const SnapRealm *realm) { | |
36 | return *_dout << " mds." << whoami | |
37 | << ".cache.snaprealm(" << inode->ino() | |
38 | << " seq " << seq << " " << realm << ") "; | |
39 | } | |
40 | ||
41 | ostream& operator<<(ostream& out, const SnapRealm& realm) | |
42 | { | |
43 | out << "snaprealm(" << realm.inode->ino() | |
44 | << " seq " << realm.srnode.seq | |
45 | << " lc " << realm.srnode.last_created | |
46 | << " cr " << realm.srnode.created; | |
47 | if (realm.srnode.created != realm.srnode.current_parent_since) | |
48 | out << " cps " << realm.srnode.current_parent_since; | |
49 | out << " snaps=" << realm.srnode.snaps; | |
50 | if (realm.srnode.past_parent_snaps.size() > 0) { | |
51 | out << " past_parent_snaps=" << realm.srnode.past_parent_snaps; | |
52 | } | |
53 | ||
54 | if (realm.srnode.is_parent_global()) | |
55 | out << " global "; | |
56 | out << " last_modified " << realm.srnode.last_modified | |
57 | << " change_attr " << realm.srnode.change_attr; | |
58 | out << " " << &realm << ")"; | |
59 | return out; | |
60 | } | |
61 | ||
62 | SnapRealm::SnapRealm(MDCache *c, CInode *in) : | |
63 | mdcache(c), inode(in), inodes_with_caps(member_offset(CInode, item_caps)) | |
64 | { | |
65 | global = (inode->ino() == CEPH_INO_GLOBAL_SNAPREALM); | |
66 | if (inode->ino() == CEPH_INO_ROOT) { | |
67 | srnode.last_modified = in->get_inode()->mtime; | |
68 | } | |
69 | } | |
70 | ||
71 | /* | |
72 | * get list of snaps for this realm. we must include parents' snaps | |
73 | * for the intervals during which they were our parent. | |
74 | */ | |
75 | void SnapRealm::build_snap_set() const | |
76 | { | |
77 | dout(10) << "build_snap_set on " << *this << dendl; | |
78 | ||
79 | cached_snaps.clear(); | |
80 | ||
81 | if (global) { | |
82 | mdcache->mds->snapclient->get_snaps(cached_snaps); | |
83 | return; | |
84 | } | |
85 | ||
86 | // include my snaps | |
87 | for (const auto& p : srnode.snaps) | |
88 | cached_snaps.insert(p.first); | |
89 | ||
90 | if (!srnode.past_parent_snaps.empty()) { | |
91 | set<snapid_t> snaps = mdcache->mds->snapclient->filter(srnode.past_parent_snaps); | |
92 | if (!snaps.empty()) { | |
93 | snapid_t last = *snaps.rbegin(); | |
94 | cached_seq = std::max(cached_seq, last); | |
95 | cached_last_created = std::max(cached_last_created, last); | |
96 | } | |
97 | cached_snaps.insert(snaps.begin(), snaps.end()); | |
98 | } | |
99 | ||
100 | snapid_t parent_seq = parent ? parent->get_newest_seq() : snapid_t(0); | |
101 | if (parent_seq >= srnode.current_parent_since) { | |
102 | auto& snaps = parent->get_snaps(); | |
103 | auto p = snaps.lower_bound(srnode.current_parent_since); | |
104 | cached_snaps.insert(p, snaps.end()); | |
105 | cached_seq = std::max(cached_seq, parent_seq); | |
106 | cached_last_created = std::max(cached_last_created, parent->get_last_created()); | |
107 | } | |
108 | } | |
109 | ||
110 | void SnapRealm::check_cache() const | |
111 | { | |
112 | snapid_t seq; | |
113 | snapid_t last_created; | |
114 | snapid_t last_destroyed = mdcache->mds->snapclient->get_last_destroyed(); | |
115 | if (global || srnode.is_parent_global()) { | |
116 | last_created = mdcache->mds->snapclient->get_last_created(); | |
117 | seq = std::max(last_created, last_destroyed); | |
118 | } else { | |
119 | last_created = srnode.last_created; | |
120 | seq = srnode.seq; | |
121 | } | |
122 | if (cached_seq >= seq && | |
123 | cached_last_destroyed == last_destroyed) | |
124 | return; | |
125 | ||
126 | cached_snap_context.clear(); | |
127 | ||
128 | cached_seq = seq; | |
129 | cached_last_created = last_created; | |
130 | cached_last_destroyed = last_destroyed; | |
131 | ||
132 | cached_subvolume_ino = 0; | |
133 | if (parent) | |
134 | cached_subvolume_ino = parent->get_subvolume_ino(); | |
135 | if (!cached_subvolume_ino && srnode.is_subvolume()) | |
136 | cached_subvolume_ino = inode->ino(); | |
137 | ||
138 | build_snap_set(); | |
139 | ||
140 | build_snap_trace(); | |
141 | ||
142 | dout(10) << "check_cache rebuilt " << cached_snaps | |
143 | << " seq " << seq | |
144 | << " cached_seq " << cached_seq | |
145 | << " cached_last_created " << cached_last_created | |
146 | << " cached_last_destroyed " << cached_last_destroyed | |
147 | << ")" << dendl; | |
148 | } | |
149 | ||
150 | const set<snapid_t>& SnapRealm::get_snaps() const | |
151 | { | |
152 | check_cache(); | |
153 | dout(10) << "get_snaps " << cached_snaps | |
154 | << " (seq " << srnode.seq << " cached_seq " << cached_seq << ")" | |
155 | << dendl; | |
156 | return cached_snaps; | |
157 | } | |
158 | ||
159 | /* | |
160 | * build vector in reverse sorted order | |
161 | */ | |
162 | const SnapContext& SnapRealm::get_snap_context() const | |
163 | { | |
164 | check_cache(); | |
165 | ||
166 | if (!cached_snap_context.seq) { | |
167 | cached_snap_context.seq = cached_seq; | |
168 | cached_snap_context.snaps.resize(cached_snaps.size()); | |
169 | unsigned i = 0; | |
170 | for (set<snapid_t>::reverse_iterator p = cached_snaps.rbegin(); | |
171 | p != cached_snaps.rend(); | |
172 | ++p) | |
173 | cached_snap_context.snaps[i++] = *p; | |
174 | } | |
175 | ||
176 | return cached_snap_context; | |
177 | } | |
178 | ||
179 | void SnapRealm::get_snap_info(map<snapid_t, const SnapInfo*>& infomap, snapid_t first, snapid_t last) | |
180 | { | |
181 | const set<snapid_t>& snaps = get_snaps(); | |
182 | dout(10) << "get_snap_info snaps " << snaps << dendl; | |
183 | ||
184 | // include my snaps within interval [first,last] | |
185 | for (auto p = srnode.snaps.lower_bound(first); // first element >= first | |
186 | p != srnode.snaps.end() && p->first <= last; | |
187 | ++p) | |
188 | infomap[p->first] = &p->second; | |
189 | ||
190 | if (!srnode.past_parent_snaps.empty()) { | |
191 | set<snapid_t> snaps; | |
192 | for (auto p = srnode.past_parent_snaps.lower_bound(first); // first element >= first | |
193 | p != srnode.past_parent_snaps.end() && *p <= last; | |
194 | ++p) { | |
195 | snaps.insert(*p); | |
196 | } | |
197 | ||
198 | map<snapid_t, const SnapInfo*> _infomap; | |
199 | mdcache->mds->snapclient->get_snap_infos(_infomap, snaps); | |
200 | infomap.insert(_infomap.begin(), _infomap.end()); | |
201 | } | |
202 | ||
203 | if (srnode.current_parent_since <= last && parent) | |
204 | parent->get_snap_info(infomap, std::max(first, srnode.current_parent_since), last); | |
205 | } | |
206 | ||
207 | std::string_view SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino) | |
208 | { | |
209 | auto srnode_snaps_entry = srnode.snaps.find(snapid); | |
210 | if (srnode_snaps_entry != srnode.snaps.end()) { | |
211 | if (atino == inode->ino()) | |
212 | return srnode_snaps_entry->second.name; | |
213 | else | |
214 | return srnode_snaps_entry->second.get_long_name(); | |
215 | } | |
216 | ||
217 | if (!srnode.past_parent_snaps.empty()) { | |
218 | if (srnode.past_parent_snaps.count(snapid)) { | |
219 | const SnapInfo *sinfo = mdcache->mds->snapclient->get_snap_info(snapid); | |
220 | if (sinfo) { | |
221 | if (atino == sinfo->ino) | |
222 | return sinfo->name; | |
223 | else | |
224 | return sinfo->get_long_name(); | |
225 | } | |
226 | } | |
227 | } | |
228 | ||
229 | ceph_assert(srnode.current_parent_since <= snapid); | |
230 | ceph_assert(parent); | |
231 | return parent->get_snapname(snapid, atino); | |
232 | } | |
233 | ||
234 | snapid_t SnapRealm::resolve_snapname(std::string_view n, inodeno_t atino, snapid_t first, snapid_t last) | |
235 | { | |
236 | // first try me | |
237 | dout(10) << "resolve_snapname '" << n << "' in [" << first << "," << last << "]" << dendl; | |
238 | ||
239 | bool actual = (atino == inode->ino()); | |
240 | string pname; | |
241 | inodeno_t pino; | |
242 | if (n.length() && n[0] == '_') { | |
243 | size_t next_ = n.find_last_of('_'); | |
244 | if (next_ > 1 && next_ + 1 < n.length()) { | |
245 | pname = n.substr(1, next_ - 1); | |
246 | pino = atoll(n.data() + next_ + 1); | |
247 | dout(10) << " " << n << " parses to name '" << pname << "' dirino " << pino << dendl; | |
248 | } | |
249 | } | |
250 | ||
251 | for (auto p = srnode.snaps.lower_bound(first); // first element >= first | |
252 | p != srnode.snaps.end() && p->first <= last; | |
253 | ++p) { | |
254 | dout(15) << " ? " << p->second << dendl; | |
255 | //if (num && p->second.snapid == num) | |
256 | //return p->first; | |
257 | if (actual && p->second.name == n) | |
258 | return p->first; | |
259 | if (!actual && p->second.name == pname && p->second.ino == pino) | |
260 | return p->first; | |
261 | } | |
262 | ||
263 | if (!srnode.past_parent_snaps.empty()) { | |
264 | set<snapid_t> snaps; | |
265 | for (auto p = srnode.past_parent_snaps.lower_bound(first); // first element >= first | |
266 | p != srnode.past_parent_snaps.end() && *p <= last; | |
267 | ++p) | |
268 | snaps.insert(*p); | |
269 | ||
270 | map<snapid_t, const SnapInfo*> _infomap; | |
271 | mdcache->mds->snapclient->get_snap_infos(_infomap, snaps); | |
272 | ||
273 | for (auto& it : _infomap) { | |
274 | dout(15) << " ? " << *it.second << dendl; | |
275 | actual = (it.second->ino == atino); | |
276 | if (actual && it.second->name == n) | |
277 | return it.first; | |
278 | if (!actual && it.second->name == pname && it.second->ino == pino) | |
279 | return it.first; | |
280 | } | |
281 | } | |
282 | ||
283 | if (parent && srnode.current_parent_since <= last) | |
284 | return parent->resolve_snapname(n, atino, std::max(first, srnode.current_parent_since), last); | |
285 | return 0; | |
286 | } | |
287 | ||
288 | ||
289 | void SnapRealm::adjust_parent() | |
290 | { | |
291 | SnapRealm *newparent; | |
292 | if (srnode.is_parent_global()) { | |
293 | newparent = mdcache->get_global_snaprealm(); | |
294 | } else { | |
295 | CDentry *pdn = inode->get_parent_dn(); | |
296 | newparent = pdn ? pdn->get_dir()->get_inode()->find_snaprealm() : NULL; | |
297 | } | |
298 | if (newparent != parent) { | |
299 | dout(10) << "adjust_parent " << parent << " -> " << newparent << dendl; | |
300 | if (parent) | |
301 | parent->open_children.erase(this); | |
302 | parent = newparent; | |
303 | if (parent) | |
304 | parent->open_children.insert(this); | |
305 | ||
306 | invalidate_cached_snaps(); | |
307 | } | |
308 | } | |
309 | ||
310 | void SnapRealm::split_at(SnapRealm *child) | |
311 | { | |
312 | dout(10) << __func__ << ": " << *child | |
313 | << " on " << *child->inode << dendl; | |
314 | ||
315 | if (inode->is_mdsdir() || !child->inode->is_dir()) { | |
316 | // it's not a dir. | |
317 | if (child->inode->containing_realm) { | |
318 | // - no open children. | |
319 | // - only need to move this child's inode's caps. | |
320 | child->inode->move_to_realm(child); | |
321 | } else { | |
322 | // no caps, nothing to move/split. | |
323 | dout(20) << " split no-op, no caps to move on file " << *child->inode << dendl; | |
324 | ceph_assert(!child->inode->is_any_caps()); | |
325 | } | |
326 | return; | |
327 | } | |
328 | ||
329 | // it's a dir. | |
330 | ||
331 | if (child->inode->get_projected_parent_dir()->inode->is_stray()) { | |
332 | if (child->inode->containing_realm) { | |
333 | dout(10) << " moving unlinked directory inode" << dendl; | |
334 | child->inode->move_to_realm(child); | |
335 | } else { | |
336 | /* This shouldn't happen because an unlinked directory will have caps | |
337 | * issued to the caller executing rmdir (for today's clients). | |
338 | */ | |
339 | dout(10) << " skipping unlinked directory inode w/o caps" << dendl; | |
340 | } | |
341 | return; | |
342 | } | |
343 | ||
344 | // split open_children | |
345 | if (!open_children.empty()) { | |
346 | dout(10) << " open_children are " << open_children << dendl; | |
347 | } | |
348 | for (set<SnapRealm*>::iterator p = open_children.begin(); | |
349 | p != open_children.end(); ) { | |
350 | SnapRealm *realm = *p; | |
351 | if (realm != child && | |
352 | child->inode->is_ancestor_of(realm->inode)) { | |
353 | dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl; | |
354 | realm->parent = child; | |
355 | child->open_children.insert(realm); | |
356 | open_children.erase(p++); | |
357 | } else { | |
358 | dout(20) << " keeping child realm " << *realm << " on " << *realm->inode << dendl; | |
359 | ++p; | |
360 | } | |
361 | } | |
362 | ||
363 | // split inodes_with_caps | |
364 | std::unordered_map<CInode const*,bool> visited; | |
365 | uint64_t count = 0; | |
366 | dout(20) << " reserving space for " << CDir::count() << " dirs" << dendl; | |
367 | visited.reserve(CDir::count()); /* a reasonable starting poing: keep in mind there may be CInode directories without fragments in cache */ | |
368 | for (auto p = inodes_with_caps.begin(); !p.end(); ) { | |
369 | CInode *in = *p; | |
370 | ++p; | |
371 | // does inode fall within the child realm? | |
372 | if (child->inode->is_ancestor_of(in, &visited)) { | |
373 | dout(25) << " child gets " << *in << dendl; | |
374 | in->move_to_realm(child); | |
375 | ++count; | |
376 | } else { | |
377 | dout(25) << " keeping " << *in << dendl; | |
378 | } | |
379 | } | |
380 | dout(20) << " visited " << visited.size() << " directories" << dendl; | |
381 | ||
382 | dout(10) << __func__ << ": split " << count << " inodes" << dendl; | |
383 | } | |
384 | ||
385 | void SnapRealm::merge_to(SnapRealm *newparent) | |
386 | { | |
387 | if (!newparent) | |
388 | newparent = parent; | |
389 | dout(10) << "merge to " << *newparent << " on " << *newparent->inode << dendl; | |
390 | ||
391 | dout(10) << " open_children are " << open_children << dendl; | |
392 | for (auto realm : open_children) { | |
393 | dout(20) << " child realm " << *realm << " on " << *realm->inode << dendl; | |
394 | newparent->open_children.insert(realm); | |
395 | realm->parent = newparent; | |
396 | } | |
397 | open_children.clear(); | |
398 | ||
399 | for (auto p = inodes_with_caps.begin(); !p.end(); ) { | |
400 | CInode *in = *p; | |
401 | ++p; | |
402 | in->move_to_realm(newparent); | |
403 | } | |
404 | ceph_assert(inodes_with_caps.empty()); | |
405 | ||
406 | // delete this | |
407 | inode->close_snaprealm(); | |
408 | } | |
409 | ||
410 | const bufferlist& SnapRealm::get_snap_trace() const | |
411 | { | |
412 | check_cache(); | |
413 | return cached_snap_trace; | |
414 | } | |
415 | ||
416 | const bufferlist& SnapRealm::get_snap_trace_new() const | |
417 | { | |
418 | check_cache(); | |
419 | return cached_snap_trace_new; | |
420 | } | |
421 | ||
422 | void SnapRealm::build_snap_trace() const | |
423 | { | |
424 | cached_snap_trace.clear(); | |
425 | cached_snap_trace_new.clear(); | |
426 | ||
427 | if (global) { | |
428 | SnapRealmInfo info(inode->ino(), 0, cached_seq, 0); | |
429 | info.my_snaps.reserve(cached_snaps.size()); | |
430 | for (auto p = cached_snaps.rbegin(); p != cached_snaps.rend(); ++p) | |
431 | info.my_snaps.push_back(*p); | |
432 | ||
433 | dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl; | |
434 | ||
435 | SnapRealmInfoNew ninfo(info, srnode.last_modified, srnode.change_attr); | |
436 | encode(info, cached_snap_trace); | |
437 | encode(ninfo, cached_snap_trace_new); | |
438 | return; | |
439 | } | |
440 | ||
441 | SnapRealmInfo info(inode->ino(), srnode.created, srnode.seq, srnode.current_parent_since); | |
442 | if (parent) { | |
443 | info.h.parent = parent->inode->ino(); | |
444 | ||
445 | set<snapid_t> past; | |
446 | if (!srnode.past_parent_snaps.empty()) { | |
447 | past = mdcache->mds->snapclient->filter(srnode.past_parent_snaps); | |
448 | if (srnode.is_parent_global()) { | |
449 | auto p = past.lower_bound(srnode.current_parent_since); | |
450 | past.erase(p, past.end()); | |
451 | } | |
452 | } | |
453 | ||
454 | if (!past.empty()) { | |
455 | info.prior_parent_snaps.reserve(past.size()); | |
456 | for (set<snapid_t>::reverse_iterator p = past.rbegin(); p != past.rend(); ++p) | |
457 | info.prior_parent_snaps.push_back(*p); | |
458 | dout(10) << "build_snap_trace prior_parent_snaps from [1," << *past.rbegin() << "] " | |
459 | << info.prior_parent_snaps << dendl; | |
460 | } | |
461 | } | |
462 | ||
463 | info.my_snaps.reserve(srnode.snaps.size()); | |
464 | for (auto p = srnode.snaps.rbegin(); | |
465 | p != srnode.snaps.rend(); | |
466 | ++p) | |
467 | info.my_snaps.push_back(p->first); | |
468 | dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl; | |
469 | ||
470 | SnapRealmInfoNew ninfo(info, srnode.last_modified, srnode.change_attr); | |
471 | ||
472 | encode(info, cached_snap_trace); | |
473 | encode(ninfo, cached_snap_trace_new); | |
474 | ||
475 | if (parent) { | |
476 | cached_snap_trace.append(parent->get_snap_trace()); | |
477 | cached_snap_trace_new.append(parent->get_snap_trace_new()); | |
478 | } | |
479 | } | |
480 | ||
481 | void SnapRealm::prune_past_parent_snaps() | |
482 | { | |
483 | dout(10) << __func__ << dendl; | |
484 | check_cache(); | |
485 | ||
486 | for (auto p = srnode.past_parent_snaps.begin(); | |
487 | p != srnode.past_parent_snaps.end(); ) { | |
488 | auto q = cached_snaps.find(*p); | |
489 | if (q == cached_snaps.end()) { | |
490 | dout(10) << __func__ << " pruning " << *p << dendl; | |
491 | srnode.past_parent_snaps.erase(p++); | |
492 | } else { | |
493 | dout(10) << __func__ << " keeping " << *p << dendl; | |
494 | ++p; | |
495 | } | |
496 | } | |
497 | } | |
498 |