]> git.proxmox.com Git - ceph.git/blob - ceph/src/client/Inode.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / client / Inode.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "Client.h"
5 #include "Inode.h"
6 #include "Dentry.h"
7 #include "Dir.h"
8 #include "Fh.h"
9 #include "MetaSession.h"
10 #include "ClientSnapRealm.h"
11
12 #include "mds/flock.h"
13
14 Inode::~Inode()
15 {
16 cap_item.remove_myself();
17 snaprealm_item.remove_myself();
18
19 if (snapdir_parent) {
20 snapdir_parent->flags &= ~I_SNAPDIR_OPEN;
21 snapdir_parent.reset();
22 }
23
24 if (!oset.objects.empty()) {
25 lsubdout(client->cct, client, 0) << __func__ << ": leftover objects on inode 0x"
26 << std::hex << ino << std::dec << dendl;
27 assert(oset.objects.empty());
28 }
29
30 delete fcntl_locks;
31 delete flock_locks;
32 }
33
34 ostream& operator<<(ostream &out, const Inode &in)
35 {
36 out << in.vino() << "("
37 << "faked_ino=" << in.faked_ino
38 << " ref=" << in._ref
39 << " ll_ref=" << in.ll_ref
40 << " cap_refs=" << in.cap_refs
41 << " open=" << in.open_by_mode
42 << " mode=" << oct << in.mode << dec
43 << " size=" << in.size << "/" << in.max_size
44 << " mtime=" << in.mtime
45 << " caps=" << ccap_string(in.caps_issued());
46 if (!in.caps.empty()) {
47 out << "(";
48 for (auto p = in.caps.begin(); p != in.caps.end(); ++p) {
49 if (p != in.caps.begin())
50 out << ',';
51 out << p->first << '=' << ccap_string(p->second->issued);
52 }
53 out << ")";
54 }
55 if (in.dirty_caps)
56 out << " dirty_caps=" << ccap_string(in.dirty_caps);
57 if (in.flushing_caps)
58 out << " flushing_caps=" << ccap_string(in.flushing_caps);
59
60 if (in.flags & I_COMPLETE)
61 out << " COMPLETE";
62
63 if (in.is_file())
64 out << " " << in.oset;
65
66 if (!in.dn_set.empty())
67 out << " parents=" << in.dn_set;
68
69 if (in.is_dir() && in.has_dir_layout())
70 out << " has_dir_layout";
71
72 if (in.quota.is_enable())
73 out << " " << in.quota;
74
75 out << ' ' << &in << ")";
76 return out;
77 }
78
79
80 void Inode::make_long_path(filepath& p)
81 {
82 if (!dn_set.empty()) {
83 assert((*dn_set.begin())->dir && (*dn_set.begin())->dir->parent_inode);
84 (*dn_set.begin())->dir->parent_inode->make_long_path(p);
85 p.push_dentry((*dn_set.begin())->name);
86 } else if (snapdir_parent) {
87 snapdir_parent->make_nosnap_relative_path(p);
88 string empty;
89 p.push_dentry(empty);
90 } else
91 p = filepath(ino);
92 }
93
94 /*
95 * make a filepath suitable for an mds request:
96 * - if we are non-snapped/live, the ino is sufficient, e.g. #1234
97 * - if we are snapped, make filepath relative to first non-snapped parent.
98 */
99 void Inode::make_nosnap_relative_path(filepath& p)
100 {
101 if (snapid == CEPH_NOSNAP) {
102 p = filepath(ino);
103 } else if (snapdir_parent) {
104 snapdir_parent->make_nosnap_relative_path(p);
105 string empty;
106 p.push_dentry(empty);
107 } else if (!dn_set.empty()) {
108 assert((*dn_set.begin())->dir && (*dn_set.begin())->dir->parent_inode);
109 (*dn_set.begin())->dir->parent_inode->make_nosnap_relative_path(p);
110 p.push_dentry((*dn_set.begin())->name);
111 } else {
112 p = filepath(ino);
113 }
114 }
115
116 void Inode::get_open_ref(int mode)
117 {
118 open_by_mode[mode]++;
119 }
120
121 bool Inode::put_open_ref(int mode)
122 {
123 //cout << "open_by_mode[" << mode << "] " << open_by_mode[mode] << " -> " << (open_by_mode[mode]-1) << std::endl;
124 if (--open_by_mode[mode] == 0)
125 return true;
126 return false;
127 }
128
129 void Inode::get_cap_ref(int cap)
130 {
131 int n = 0;
132 while (cap) {
133 if (cap & 1) {
134 int c = 1 << n;
135 cap_refs[c]++;
136 //cout << "inode " << *this << " get " << cap_string(c) << " " << (cap_refs[c]-1) << " -> " << cap_refs[c] << std::endl;
137 }
138 cap >>= 1;
139 n++;
140 }
141 }
142
143 int Inode::put_cap_ref(int cap)
144 {
145 // if cap is always a single bit (which it seems to be)
146 // all this logic is equivalent to:
147 // if (--cap_refs[c]) return false; else return true;
148 int last = 0;
149 int n = 0;
150 while (cap) {
151 if (cap & 1) {
152 int c = 1 << n;
153 if (cap_refs[c] <= 0) {
154 lderr(client->cct) << "put_cap_ref " << ccap_string(c) << " went negative on " << *this << dendl;
155 assert(cap_refs[c] > 0);
156 }
157 if (--cap_refs[c] == 0)
158 last |= c;
159 //cout << "inode " << *this << " put " << cap_string(c) << " " << (cap_refs[c]+1) << " -> " << cap_refs[c] << std::endl;
160 }
161 cap >>= 1;
162 n++;
163 }
164 return last;
165 }
166
167 bool Inode::is_any_caps()
168 {
169 return !caps.empty() || snap_caps;
170 }
171
172 bool Inode::cap_is_valid(Cap* cap) const
173 {
174 /*cout << "cap_gen " << cap->session-> cap_gen << std::endl
175 << "session gen " << cap->gen << std::endl
176 << "cap expire " << cap->session->cap_ttl << std::endl
177 << "cur time " << ceph_clock_now(cct) << std::endl;*/
178 if ((cap->session->cap_gen <= cap->gen)
179 && (ceph_clock_now() < cap->session->cap_ttl)) {
180 return true;
181 }
182 return false;
183 }
184
185 int Inode::caps_issued(int *implemented) const
186 {
187 int c = snap_caps;
188 int i = 0;
189 for (map<mds_rank_t,Cap*>::const_iterator it = caps.begin();
190 it != caps.end();
191 ++it)
192 if (cap_is_valid(it->second)) {
193 c |= it->second->issued;
194 i |= it->second->implemented;
195 }
196 if (implemented)
197 *implemented = i;
198 return c;
199 }
200
201 void Inode::touch_cap(Cap *cap)
202 {
203 // move to back of LRU
204 cap->session->caps.push_back(&cap->cap_item);
205 }
206
207 void Inode::try_touch_cap(mds_rank_t mds)
208 {
209 if (caps.count(mds))
210 touch_cap(caps[mds]);
211 }
212
213 bool Inode::caps_issued_mask(unsigned mask)
214 {
215 int c = snap_caps;
216 if ((c & mask) == mask)
217 return true;
218 // prefer auth cap
219 if (auth_cap &&
220 cap_is_valid(auth_cap) &&
221 (auth_cap->issued & mask) == mask) {
222 touch_cap(auth_cap);
223 return true;
224 }
225 // try any cap
226 for (map<mds_rank_t,Cap*>::iterator it = caps.begin();
227 it != caps.end();
228 ++it) {
229 if (cap_is_valid(it->second)) {
230 if ((it->second->issued & mask) == mask) {
231 touch_cap(it->second);
232 return true;
233 }
234 c |= it->second->issued;
235 }
236 }
237 if ((c & mask) == mask) {
238 // bah.. touch them all
239 for (map<mds_rank_t,Cap*>::iterator it = caps.begin();
240 it != caps.end();
241 ++it)
242 touch_cap(it->second);
243 return true;
244 }
245 return false;
246 }
247
248 int Inode::caps_used()
249 {
250 int w = 0;
251 for (map<int,int>::iterator p = cap_refs.begin();
252 p != cap_refs.end();
253 ++p)
254 if (p->second)
255 w |= p->first;
256 return w;
257 }
258
259 int Inode::caps_file_wanted()
260 {
261 int want = 0;
262 for (map<int,int>::iterator p = open_by_mode.begin();
263 p != open_by_mode.end();
264 ++p)
265 if (p->second)
266 want |= ceph_caps_for_mode(p->first);
267 return want;
268 }
269
270 int Inode::caps_wanted()
271 {
272 int want = caps_file_wanted() | caps_used();
273 if (want & CEPH_CAP_FILE_BUFFER)
274 want |= CEPH_CAP_FILE_EXCL;
275 return want;
276 }
277
278 int Inode::caps_mds_wanted()
279 {
280 int want = 0;
281 for (auto it = caps.begin(); it != caps.end(); ++it)
282 want |= it->second->wanted;
283 return want;
284 }
285
286 int Inode::caps_dirty()
287 {
288 return dirty_caps | flushing_caps;
289 }
290
291 const UserPerm* Inode::get_best_perms()
292 {
293 const UserPerm *perms = NULL;
294 for (const auto ci : caps) {
295 const UserPerm& iperm = ci.second->latest_perms;
296 if (!perms) { // we don't have any, take what's present
297 perms = &iperm;
298 } else if (iperm.uid() == uid) {
299 if (iperm.gid() == gid) { // we have the best possible, return
300 return &iperm;
301 }
302 if (perms->uid() != uid) { // take uid > gid every time
303 perms = &iperm;
304 }
305 } else if (perms->uid() != uid && iperm.gid() == gid) {
306 perms = &iperm; // a matching gid is better than nothing
307 }
308 }
309 return perms;
310 }
311
312 bool Inode::have_valid_size()
313 {
314 // RD+RDCACHE or WR+WRBUFFER => valid size
315 if (caps_issued() & (CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL))
316 return true;
317 return false;
318 }
319
320 // open Dir for an inode. if it's not open, allocated it (and pin dentry in memory).
321 Dir *Inode::open_dir()
322 {
323 if (!dir) {
324 dir = new Dir(this);
325 lsubdout(client->cct, client, 15) << "open_dir " << dir << " on " << this << dendl;
326 assert(dn_set.size() < 2); // dirs can't be hard-linked
327 if (!dn_set.empty())
328 (*dn_set.begin())->get(); // pin dentry
329 get(); // pin inode
330 }
331 return dir;
332 }
333
334 bool Inode::check_mode(const UserPerm& perms, unsigned want)
335 {
336 if (uid == perms.uid()) {
337 // if uid is owner, owner entry determines access
338 want = want << 6;
339 } else if (perms.gid_in_groups(gid)) {
340 // if a gid or sgid matches the owning group, group entry determines access
341 want = want << 3;
342 }
343
344 return (mode & want) == want;
345 }
346
347 void Inode::get() {
348 _ref++;
349 lsubdout(client->cct, client, 15) << "inode.get on " << this << " " << ino << '.' << snapid
350 << " now " << _ref << dendl;
351 }
352
353 //private method to put a reference; see Client::put_inode()
354 int Inode::_put(int n) {
355 _ref -= n;
356 lsubdout(client->cct, client, 15) << "inode.put on " << this << " " << ino << '.' << snapid
357 << " now " << _ref << dendl;
358 assert(_ref >= 0);
359 return _ref;
360 }
361
362
363 void Inode::dump(Formatter *f) const
364 {
365 f->dump_stream("ino") << ino;
366 f->dump_stream("snapid") << snapid;
367 if (rdev)
368 f->dump_unsigned("rdev", rdev);
369 f->dump_stream("ctime") << ctime;
370 f->dump_stream("btime") << btime;
371 f->dump_stream("mode") << '0' << std::oct << mode << std::dec;
372 f->dump_unsigned("uid", uid);
373 f->dump_unsigned("gid", gid);
374 f->dump_unsigned("nlink", nlink);
375
376 f->dump_int("size", size);
377 f->dump_int("max_size", max_size);
378 f->dump_int("truncate_seq", truncate_seq);
379 f->dump_int("truncate_size", truncate_size);
380 f->dump_stream("mtime") << mtime;
381 f->dump_stream("atime") << atime;
382 f->dump_int("time_warp_seq", time_warp_seq);
383 f->dump_int("change_attr", change_attr);
384
385 f->dump_object("layout", layout);
386 if (is_dir()) {
387 f->open_object_section("dir_layout");
388 ::dump(dir_layout, f);
389 f->close_section();
390
391 f->dump_bool("complete", flags & I_COMPLETE);
392 f->dump_bool("ordered", flags & I_DIR_ORDERED);
393
394 /* FIXME when wip-mds-encoding is merged ***
395 f->open_object_section("dir_stat");
396 dirstat.dump(f);
397 f->close_section();
398
399 f->open_object_section("rstat");
400 rstat.dump(f);
401 f->close_section();
402 */
403 }
404
405 f->dump_unsigned("version", version);
406 f->dump_unsigned("xattr_version", xattr_version);
407 f->dump_unsigned("flags", flags);
408
409 if (is_dir()) {
410 if (!dir_contacts.empty()) {
411 f->open_object_section("dir_contants");
412 for (set<int>::iterator p = dir_contacts.begin(); p != dir_contacts.end(); ++p)
413 f->dump_int("mds", *p);
414 f->close_section();
415 }
416 f->dump_int("dir_hashed", (int)dir_hashed);
417 f->dump_int("dir_replicated", (int)dir_replicated);
418 }
419
420 f->open_array_section("caps");
421 for (map<mds_rank_t,Cap*>::const_iterator p = caps.begin(); p != caps.end(); ++p) {
422 f->open_object_section("cap");
423 f->dump_int("mds", p->first);
424 if (p->second == auth_cap)
425 f->dump_int("auth", 1);
426 p->second->dump(f);
427 f->close_section();
428 }
429 f->close_section();
430 if (auth_cap)
431 f->dump_int("auth_cap", auth_cap->session->mds_num);
432
433 f->dump_stream("dirty_caps") << ccap_string(dirty_caps);
434 if (flushing_caps) {
435 f->dump_stream("flushings_caps") << ccap_string(flushing_caps);
436 f->open_object_section("flushing_cap_tid");
437 for (map<ceph_tid_t, int>::const_iterator p = flushing_cap_tids.begin();
438 p != flushing_cap_tids.end();
439 ++p) {
440 string n(ccap_string(p->second));
441 f->dump_unsigned(n.c_str(), p->first);
442 }
443 f->close_section();
444 }
445 f->dump_int("shared_gen", shared_gen);
446 f->dump_int("cache_gen", cache_gen);
447 if (snap_caps) {
448 f->dump_int("snap_caps", snap_caps);
449 f->dump_int("snap_cap_refs", snap_cap_refs);
450 }
451
452 f->dump_stream("hold_caps_until") << hold_caps_until;
453
454 if (snaprealm) {
455 f->open_object_section("snaprealm");
456 snaprealm->dump(f);
457 f->close_section();
458 }
459 if (!cap_snaps.empty()) {
460 for (const auto &p : cap_snaps) {
461 f->open_object_section("cap_snap");
462 f->dump_stream("follows") << p.first;
463 p.second.dump(f);
464 f->close_section();
465 }
466 }
467
468 // open
469 if (!open_by_mode.empty()) {
470 f->open_array_section("open_by_mode");
471 for (map<int,int>::const_iterator p = open_by_mode.begin(); p != open_by_mode.end(); ++p) {
472 f->open_object_section("ref");
473 f->dump_unsigned("mode", p->first);
474 f->dump_unsigned("refs", p->second);
475 f->close_section();
476 }
477 f->close_section();
478 }
479 if (!cap_refs.empty()) {
480 f->open_array_section("cap_refs");
481 for (map<int,int>::const_iterator p = cap_refs.begin(); p != cap_refs.end(); ++p) {
482 f->open_object_section("cap_ref");
483 f->dump_stream("cap") << ccap_string(p->first);
484 f->dump_int("refs", p->second);
485 f->close_section();
486 }
487 f->close_section();
488 }
489
490 f->dump_unsigned("reported_size", reported_size);
491 if (wanted_max_size != max_size)
492 f->dump_unsigned("wanted_max_size", wanted_max_size);
493 if (requested_max_size != max_size)
494 f->dump_unsigned("requested_max_size", requested_max_size);
495
496 f->dump_int("ref", _ref);
497 f->dump_int("ll_ref", ll_ref);
498
499 if (!dn_set.empty()) {
500 f->open_array_section("parents");
501 for (set<Dentry*>::const_iterator p = dn_set.begin(); p != dn_set.end(); ++p) {
502 f->open_object_section("dentry");
503 f->dump_stream("dir_ino") << (*p)->dir->parent_inode->ino;
504 f->dump_string("name", (*p)->name);
505 f->close_section();
506 }
507 f->close_section();
508 }
509 }
510
511 void Cap::dump(Formatter *f) const
512 {
513 f->dump_int("mds", session->mds_num);
514 f->dump_stream("ino") << inode->ino;
515 f->dump_unsigned("cap_id", cap_id);
516 f->dump_stream("issued") << ccap_string(issued);
517 if (implemented != issued)
518 f->dump_stream("implemented") << ccap_string(implemented);
519 f->dump_stream("wanted") << ccap_string(wanted);
520 f->dump_unsigned("seq", seq);
521 f->dump_unsigned("issue_seq", issue_seq);
522 f->dump_unsigned("mseq", mseq);
523 f->dump_unsigned("gen", gen);
524 }
525
526 void CapSnap::dump(Formatter *f) const
527 {
528 f->dump_stream("ino") << in->ino;
529 f->dump_stream("issued") << ccap_string(issued);
530 f->dump_stream("dirty") << ccap_string(dirty);
531 f->dump_unsigned("size", size);
532 f->dump_stream("ctime") << ctime;
533 f->dump_stream("mtime") << mtime;
534 f->dump_stream("atime") << atime;
535 f->dump_int("time_warp_seq", time_warp_seq);
536 f->dump_stream("mode") << '0' << std::oct << mode << std::dec;
537 f->dump_unsigned("uid", uid);
538 f->dump_unsigned("gid", gid);
539 if (!xattrs.empty()) {
540 f->open_object_section("xattr_lens");
541 for (map<string,bufferptr>::const_iterator p = xattrs.begin(); p != xattrs.end(); ++p)
542 f->dump_int(p->first.c_str(), p->second.length());
543 f->close_section();
544 }
545 f->dump_unsigned("xattr_version", xattr_version);
546 f->dump_int("writing", (int)writing);
547 f->dump_int("dirty_data", (int)dirty_data);
548 f->dump_unsigned("flush_tid", flush_tid);
549 }
550
551 void Inode::set_async_err(int r)
552 {
553 for (const auto &fh : fhs) {
554 fh->async_err = r;
555 }
556 }
557