]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_file.cc
c204c1886f2c37393bdb374fb16eb92387285f71
[ceph.git] / ceph / src / rgw / rgw_file.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
3
4 #include "include/compat.h"
5 #include "include/rados/rgw_file.h"
6
7 #include <sys/types.h>
8 #include <sys/stat.h>
9
10 #include "rgw_lib.h"
11 #include "rgw_rados.h"
12 #include "rgw_resolve.h"
13 #include "rgw_op.h"
14 #include "rgw_rest.h"
15 #include "rgw_acl.h"
16 #include "rgw_acl_s3.h"
17 #include "rgw_frontend.h"
18 #include "rgw_request.h"
19 #include "rgw_process.h"
20 #include "rgw_rest_user.h"
21 #include "rgw_rest_s3.h"
22 #include "rgw_os_lib.h"
23 #include "rgw_auth_s3.h"
24 #include "rgw_user.h"
25 #include "rgw_bucket.h"
26 #include "rgw_zone.h"
27 #include "rgw_file.h"
28 #include "rgw_lib_frontend.h"
29 #include "rgw_perf_counters.h"
30 #include "common/errno.h"
31
32 #include "services/svc_zone.h"
33
34 #include <atomic>
35
36 #define dout_subsys ceph_subsys_rgw
37
38 using namespace rgw;
39
40 namespace rgw {
41
42 extern RGWLib rgwlib;
43
44 const string RGWFileHandle::root_name = "/";
45
46 std::atomic<uint32_t> RGWLibFS::fs_inst_counter;
47
48 uint32_t RGWLibFS::write_completion_interval_s = 10;
49
50 ceph::timer<ceph::mono_clock> RGWLibFS::write_timer{
51 ceph::construct_suspended};
52
53 inline int valid_fs_bucket_name(const string& name) {
54 int rc = valid_s3_bucket_name(name, false /* relaxed */);
55 if (rc != 0) {
56 if (name.size() > 255)
57 return -ENAMETOOLONG;
58 return -EINVAL;
59 }
60 return 0;
61 }
62
63 inline int valid_fs_object_name(const string& name) {
64 int rc = valid_s3_object_name(name);
65 if (rc != 0) {
66 if (name.size() > 1024)
67 return -ENAMETOOLONG;
68 return -EINVAL;
69 }
70 return 0;
71 }
72
73 LookupFHResult RGWLibFS::stat_bucket(RGWFileHandle* parent, const char *path,
74 RGWLibFS::BucketStats& bs,
75 uint32_t flags)
76 {
77 LookupFHResult fhr{nullptr, 0};
78 std::string bucket_name{path};
79 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
80 RGWStatBucketRequest req(cct, &ruser, bucket_name, bs);
81
82 int rc = rgwlib.get_fe()->execute_req(&req);
83 if ((rc == 0) &&
84 (req.get_ret() == 0) &&
85 (req.matched())) {
86 fhr = lookup_fh(parent, path,
87 (flags & RGWFileHandle::FLAG_LOCKED)|
88 RGWFileHandle::FLAG_CREATE|
89 RGWFileHandle::FLAG_BUCKET);
90 if (get<0>(fhr)) {
91 RGWFileHandle* rgw_fh = get<0>(fhr);
92 if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
93 rgw_fh->mtx.lock();
94 }
95 rgw_fh->set_times(req.get_ctime());
96 /* restore attributes */
97 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
98 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
99 if (ux_key && ux_attrs) {
100 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
101 if (get<0>(dar) || get<1>(dar)) {
102 update_fh(rgw_fh);
103 }
104 }
105 if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
106 rgw_fh->mtx.unlock();
107 }
108 }
109 }
110 return fhr;
111 }
112
113 LookupFHResult RGWLibFS::fake_leaf(RGWFileHandle* parent,
114 const char *path,
115 enum rgw_fh_type type,
116 struct stat *st, uint32_t st_mask,
117 uint32_t flags)
118 {
119 /* synthesize a minimal handle from parent, path, type, and st */
120 using std::get;
121
122 flags |= RGWFileHandle::FLAG_CREATE;
123
124 switch (type) {
125 case RGW_FS_TYPE_DIRECTORY:
126 flags |= RGWFileHandle::FLAG_DIRECTORY;
127 break;
128 default:
129 /* file */
130 break;
131 };
132
133 LookupFHResult fhr = lookup_fh(parent, path, flags);
134 if (get<0>(fhr)) {
135 RGWFileHandle* rgw_fh = get<0>(fhr);
136 if (st) {
137 lock_guard guard(rgw_fh->mtx);
138 if (st_mask & RGW_SETATTR_SIZE) {
139 rgw_fh->set_size(st->st_size);
140 }
141 if (st_mask & RGW_SETATTR_MTIME) {
142 rgw_fh->set_times(st->st_mtim);
143 }
144 } /* st */
145 } /* rgw_fh */
146 return fhr;
147 } /* RGWLibFS::fake_leaf */
148
149 LookupFHResult RGWLibFS::stat_leaf(RGWFileHandle* parent,
150 const char *path,
151 enum rgw_fh_type type,
152 uint32_t flags)
153 {
154 /* find either-of <object_name>, <object_name/>, only one of
155 * which should exist; atomicity? */
156 using std::get;
157
158 LookupFHResult fhr{nullptr, 0};
159
160 /* XXX the need for two round-trip operations to identify file or
161 * directory leaf objects is unecessary--the current proposed
162 * mechanism to avoid this is to store leaf object names with an
163 * object locator w/o trailing slash */
164
165 std::string obj_path = parent->format_child_name(path, false);
166 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
167
168 for (auto ix : { 0, 1, 2 }) {
169 switch (ix) {
170 case 0:
171 {
172 /* type hint */
173 if (type == RGW_FS_TYPE_DIRECTORY)
174 continue;
175
176 RGWStatObjRequest req(cct, &ruser,
177 parent->bucket_name(), obj_path,
178 RGWStatObjRequest::FLAG_NONE);
179 int rc = rgwlib.get_fe()->execute_req(&req);
180 if ((rc == 0) &&
181 (req.get_ret() == 0)) {
182 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
183 if (get<0>(fhr)) {
184 RGWFileHandle* rgw_fh = get<0>(fhr);
185 lock_guard guard(rgw_fh->mtx);
186 rgw_fh->set_size(req.get_size());
187 rgw_fh->set_times(req.get_mtime());
188 /* restore attributes */
189 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
190 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
191 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
192 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
193 if (ux_key && ux_attrs) {
194 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
195 if (get<0>(dar) || get<1>(dar)) {
196 update_fh(rgw_fh);
197 }
198 }
199 }
200 goto done;
201 }
202 }
203 break;
204 case 1:
205 {
206 /* try dir form */
207 /* type hint */
208 if (type == RGW_FS_TYPE_FILE)
209 continue;
210
211 obj_path += "/";
212 RGWStatObjRequest req(cct, &ruser,
213 parent->bucket_name(), obj_path,
214 RGWStatObjRequest::FLAG_NONE);
215 int rc = rgwlib.get_fe()->execute_req(&req);
216 if ((rc == 0) &&
217 (req.get_ret() == 0)) {
218 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
219 if (get<0>(fhr)) {
220 RGWFileHandle* rgw_fh = get<0>(fhr);
221 lock_guard guard(rgw_fh->mtx);
222 rgw_fh->set_size(req.get_size());
223 rgw_fh->set_times(req.get_mtime());
224 /* restore attributes */
225 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
226 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
227 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
228 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
229 if (ux_key && ux_attrs) {
230 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
231 if (get<0>(dar) || get<1>(dar)) {
232 update_fh(rgw_fh);
233 }
234 }
235 }
236 goto done;
237 }
238 }
239 break;
240 case 2:
241 {
242 std::string object_name{path};
243 RGWStatLeafRequest req(cct, &ruser, parent, object_name);
244 int rc = rgwlib.get_fe()->execute_req(&req);
245 if ((rc == 0) &&
246 (req.get_ret() == 0)) {
247 if (req.matched) {
248 /* we need rgw object's key name equal to file name, if
249 * not return NULL */
250 if ((flags & RGWFileHandle::FLAG_EXACT_MATCH) &&
251 !req.exact_matched) {
252 lsubdout(get_context(), rgw, 15)
253 << __func__
254 << ": stat leaf not exact match file name = "
255 << path << dendl;
256 goto done;
257 }
258 fhr = lookup_fh(parent, path,
259 RGWFileHandle::FLAG_CREATE|
260 ((req.is_dir) ?
261 RGWFileHandle::FLAG_DIRECTORY :
262 RGWFileHandle::FLAG_NONE));
263 /* XXX we don't have an object--in general, there need not
264 * be one (just a path segment in some other object). In
265 * actual leaf an object exists, but we'd need another round
266 * trip to get attrs */
267 if (get<0>(fhr)) {
268 /* for now use the parent object's mtime */
269 RGWFileHandle* rgw_fh = get<0>(fhr);
270 lock_guard guard(rgw_fh->mtx);
271 rgw_fh->set_mtime(parent->get_mtime());
272 }
273 }
274 }
275 }
276 break;
277 default:
278 /* not reached */
279 break;
280 }
281 }
282 done:
283 return fhr;
284 } /* RGWLibFS::stat_leaf */
285
286 int RGWLibFS::read(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
287 size_t* bytes_read, void* buffer, uint32_t flags)
288 {
289 if (! rgw_fh->is_file())
290 return -EINVAL;
291
292 if (rgw_fh->deleted())
293 return -ESTALE;
294
295 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
296 RGWReadRequest req(get_context(), &ruser, rgw_fh, offset, length,
297 buffer);
298
299 int rc = rgwlib.get_fe()->execute_req(&req);
300 if ((rc == 0) &&
301 (req.get_ret() == 0)) {
302 lock_guard guard(rgw_fh->mtx);
303 rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
304 *bytes_read = req.nread;
305 }
306
307 return rc;
308 }
309
310 int RGWLibFS::readlink(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
311 size_t* bytes_read, void* buffer, uint32_t flags)
312 {
313 if (! rgw_fh->is_link())
314 return -EINVAL;
315
316 if (rgw_fh->deleted())
317 return -ESTALE;
318
319 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
320 RGWReadRequest req(get_context(), &ruser, rgw_fh, offset, length,
321 buffer);
322
323 int rc = rgwlib.get_fe()->execute_req(&req);
324 if ((rc == 0) &&
325 (req.get_ret() == 0)) {
326 lock_guard(rgw_fh->mtx);
327 rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
328 *bytes_read = req.nread;
329 }
330
331 return rc;
332 }
333
334 int RGWLibFS::unlink(RGWFileHandle* rgw_fh, const char* name, uint32_t flags)
335 {
336 int rc = 0;
337 BucketStats bs;
338 RGWFileHandle* parent = nullptr;
339 RGWFileHandle* bkt_fh = nullptr;
340
341 if (unlikely(flags & RGWFileHandle::FLAG_UNLINK_THIS)) {
342 /* LOCKED */
343 parent = rgw_fh->get_parent();
344 } else {
345 /* atomicity */
346 parent = rgw_fh;
347 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_LOCK);
348 rgw_fh = get<0>(fhr);
349 /* LOCKED */
350 }
351
352 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
353 if (parent->is_root()) {
354 /* a bucket may have an object storing Unix attributes, check
355 * for and delete it */
356 LookupFHResult fhr;
357 fhr = stat_bucket(parent, name, bs, (rgw_fh) ?
358 RGWFileHandle::FLAG_LOCKED :
359 RGWFileHandle::FLAG_NONE);
360 bkt_fh = get<0>(fhr);
361 if (unlikely(! bkt_fh)) {
362 /* implies !rgw_fh, so also !LOCKED */
363 return -ENOENT;
364 }
365
366 if (bs.num_entries > 1) {
367 unref(bkt_fh); /* return stat_bucket ref */
368 if (likely(!! rgw_fh)) { /* return lock and ref from
369 * lookup_fh (or caller in the
370 * special case of
371 * RGWFileHandle::FLAG_UNLINK_THIS) */
372 rgw_fh->mtx.unlock();
373 unref(rgw_fh);
374 }
375 return -ENOTEMPTY;
376 } else {
377 /* delete object w/key "<bucket>/" (uxattrs), if any */
378 string oname{"/"};
379 RGWDeleteObjRequest req(cct, &ruser, bkt_fh->bucket_name(), oname);
380 rc = rgwlib.get_fe()->execute_req(&req);
381 /* don't care if ENOENT */
382 unref(bkt_fh);
383 }
384
385 string bname{name};
386 RGWDeleteBucketRequest req(cct, &ruser, bname);
387 rc = rgwlib.get_fe()->execute_req(&req);
388 if (! rc) {
389 rc = req.get_ret();
390 }
391 } else {
392 /*
393 * leaf object
394 */
395 if (! rgw_fh) {
396 /* XXX for now, peform a hard lookup to deduce the type of
397 * object to be deleted ("foo" vs. "foo/")--also, ensures
398 * atomicity at this endpoint */
399 struct rgw_file_handle *fh;
400 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &fh,
401 nullptr /* st */, 0 /* mask */,
402 RGW_LOOKUP_FLAG_NONE);
403 if (!! rc)
404 return rc;
405
406 /* rgw_fh ref+ */
407 rgw_fh = get_rgwfh(fh);
408 rgw_fh->mtx.lock(); /* LOCKED */
409 }
410
411 std::string oname = rgw_fh->relative_object_name();
412 if (rgw_fh->is_dir()) {
413 /* for the duration of our cache timer, trust positive
414 * child cache */
415 if (rgw_fh->has_children()) {
416 rgw_fh->mtx.unlock();
417 unref(rgw_fh);
418 return(-ENOTEMPTY);
419 }
420 oname += "/";
421 }
422 RGWDeleteObjRequest req(cct, &ruser, parent->bucket_name(),
423 oname);
424 rc = rgwlib.get_fe()->execute_req(&req);
425 if (! rc) {
426 rc = req.get_ret();
427 }
428 }
429
430 /* ENOENT when raced with other s3 gateway */
431 if (! rc || rc == -ENOENT) {
432 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
433 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
434 RGWFileHandle::FHCache::FLAG_LOCK);
435 }
436
437 if (! rc) {
438 real_time t = real_clock::now();
439 parent->set_mtime(real_clock::to_timespec(t));
440 parent->set_ctime(real_clock::to_timespec(t));
441 }
442
443 rgw_fh->mtx.unlock();
444 unref(rgw_fh);
445
446 return rc;
447 } /* RGWLibFS::unlink */
448
449 int RGWLibFS::rename(RGWFileHandle* src_fh, RGWFileHandle* dst_fh,
450 const char *_src_name, const char *_dst_name)
451
452 {
453 /* XXX initial implementation: try-copy, and delete if copy
454 * succeeds */
455 int rc = -EINVAL;
456 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
457
458 real_time t;
459
460 std::string src_name{_src_name};
461 std::string dst_name{_dst_name};
462
463 /* atomicity */
464 LookupFHResult fhr = lookup_fh(src_fh, _src_name, RGWFileHandle::FLAG_LOCK);
465 RGWFileHandle* rgw_fh = get<0>(fhr);
466
467 /* should not happen */
468 if (! rgw_fh) {
469 ldout(get_context(), 0) << __func__
470 << " BUG no such src renaming path="
471 << src_name
472 << dendl;
473 goto out;
474 }
475
476 /* forbid renaming of directories (unreasonable at scale) */
477 if (rgw_fh->is_dir()) {
478 ldout(get_context(), 12) << __func__
479 << " rejecting attempt to rename directory path="
480 << rgw_fh->full_object_name()
481 << dendl;
482 rc = -EPERM;
483 goto unlock;
484 }
485
486 /* forbid renaming open files (violates intent, for now) */
487 if (rgw_fh->is_open()) {
488 ldout(get_context(), 12) << __func__
489 << " rejecting attempt to rename open file path="
490 << rgw_fh->full_object_name()
491 << dendl;
492 rc = -EPERM;
493 goto unlock;
494 }
495
496 t = real_clock::now();
497
498 for (int ix : {0, 1}) {
499 switch (ix) {
500 case 0:
501 {
502 RGWCopyObjRequest req(cct, &ruser, src_fh, dst_fh, src_name,
503 dst_name);
504 int rc = rgwlib.get_fe()->execute_req(&req);
505 if ((rc != 0) ||
506 ((rc = req.get_ret()) != 0)) {
507 ldout(get_context(), 1)
508 << __func__
509 << " rename step 0 failed src="
510 << src_fh->full_object_name() << " " << src_name
511 << " dst=" << dst_fh->full_object_name()
512 << " " << dst_name
513 << "rc " << rc
514 << dendl;
515 goto unlock;
516 }
517 ldout(get_context(), 12)
518 << __func__
519 << " rename step 0 success src="
520 << src_fh->full_object_name() << " " << src_name
521 << " dst=" << dst_fh->full_object_name()
522 << " " << dst_name
523 << " rc " << rc
524 << dendl;
525 /* update dst change id */
526 dst_fh->set_times(t);
527 }
528 break;
529 case 1:
530 {
531 rc = this->unlink(rgw_fh /* LOCKED */, _src_name,
532 RGWFileHandle::FLAG_UNLINK_THIS);
533 /* !LOCKED, -ref */
534 if (! rc) {
535 ldout(get_context(), 12)
536 << __func__
537 << " rename step 1 success src="
538 << src_fh->full_object_name() << " " << src_name
539 << " dst=" << dst_fh->full_object_name()
540 << " " << dst_name
541 << " rc " << rc
542 << dendl;
543 /* update src change id */
544 src_fh->set_times(t);
545 } else {
546 ldout(get_context(), 1)
547 << __func__
548 << " rename step 1 failed src="
549 << src_fh->full_object_name() << " " << src_name
550 << " dst=" << dst_fh->full_object_name()
551 << " " << dst_name
552 << " rc " << rc
553 << dendl;
554 }
555 }
556 goto out;
557 default:
558 ceph_abort();
559 } /* switch */
560 } /* ix */
561 unlock:
562 rgw_fh->mtx.unlock(); /* !LOCKED */
563 unref(rgw_fh); /* -ref */
564
565 out:
566 return rc;
567 } /* RGWLibFS::rename */
568
569 MkObjResult RGWLibFS::mkdir(RGWFileHandle* parent, const char *name,
570 struct stat *st, uint32_t mask, uint32_t flags)
571 {
572 int rc, rc2;
573 rgw_file_handle *lfh;
574 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
575
576 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
577 nullptr /* st */, 0 /* mask */,
578 RGW_LOOKUP_FLAG_NONE);
579 if (! rc) {
580 /* conflict! */
581 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
582 return MkObjResult{nullptr, -EEXIST};
583 }
584
585 MkObjResult mkr{nullptr, -EINVAL};
586 LookupFHResult fhr;
587 RGWFileHandle* rgw_fh = nullptr;
588 buffer::list ux_key, ux_attrs;
589
590 fhr = lookup_fh(parent, name,
591 RGWFileHandle::FLAG_CREATE|
592 RGWFileHandle::FLAG_DIRECTORY|
593 RGWFileHandle::FLAG_LOCK);
594 rgw_fh = get<0>(fhr);
595 if (rgw_fh) {
596 rgw_fh->create_stat(st, mask);
597 rgw_fh->set_times(real_clock::now());
598 /* save attrs */
599 rgw_fh->encode_attrs(ux_key, ux_attrs);
600 if (st)
601 rgw_fh->stat(st, RGWFileHandle::FLAG_LOCKED);
602 get<0>(mkr) = rgw_fh;
603 } else {
604 get<1>(mkr) = -EIO;
605 return mkr;
606 }
607
608 if (parent->is_root()) {
609 /* bucket */
610 string bname{name};
611 /* enforce S3 name restrictions */
612 rc = valid_fs_bucket_name(bname);
613 if (rc != 0) {
614 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
615 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
616 RGWFileHandle::FHCache::FLAG_LOCK);
617 rgw_fh->mtx.unlock();
618 unref(rgw_fh);
619 get<0>(mkr) = nullptr;
620 get<1>(mkr) = rc;
621 return mkr;
622 }
623
624 RGWCreateBucketRequest req(get_context(), &ruser, bname);
625
626 /* save attrs */
627 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
628 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
629
630 rc = rgwlib.get_fe()->execute_req(&req);
631 rc2 = req.get_ret();
632 } else {
633 /* create an object representing the directory */
634 buffer::list bl;
635 string dir_name = parent->format_child_name(name, true);
636
637 /* need valid S3 name (characters, length <= 1024, etc) */
638 rc = valid_fs_object_name(dir_name);
639 if (rc != 0) {
640 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
641 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
642 RGWFileHandle::FHCache::FLAG_LOCK);
643 rgw_fh->mtx.unlock();
644 unref(rgw_fh);
645 get<0>(mkr) = nullptr;
646 get<1>(mkr) = rc;
647 return mkr;
648 }
649
650 RGWPutObjRequest req(get_context(), &ruser, parent->bucket_name(),
651 dir_name, bl);
652
653 /* save attrs */
654 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
655 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
656
657 rc = rgwlib.get_fe()->execute_req(&req);
658 rc2 = req.get_ret();
659 }
660
661 if (! ((rc == 0) &&
662 (rc2 == 0))) {
663 /* op failed */
664 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
665 rgw_fh->mtx.unlock(); /* !LOCKED */
666 unref(rgw_fh);
667 get<0>(mkr) = nullptr;
668 /* fixup rc */
669 if (!rc)
670 rc = rc2;
671 } else {
672 real_time t = real_clock::now();
673 parent->set_mtime(real_clock::to_timespec(t));
674 parent->set_ctime(real_clock::to_timespec(t));
675 rgw_fh->mtx.unlock(); /* !LOCKED */
676 }
677
678 get<1>(mkr) = rc;
679
680 return mkr;
681 } /* RGWLibFS::mkdir */
682
683 MkObjResult RGWLibFS::create(RGWFileHandle* parent, const char *name,
684 struct stat *st, uint32_t mask, uint32_t flags)
685 {
686 int rc, rc2;
687
688 using std::get;
689
690 rgw_file_handle *lfh;
691 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
692 nullptr /* st */, 0 /* mask */,
693 RGW_LOOKUP_FLAG_NONE);
694 if (! rc) {
695 /* conflict! */
696 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
697 return MkObjResult{nullptr, -EEXIST};
698 }
699
700 /* expand and check name */
701 std::string obj_name = parent->format_child_name(name, false);
702 rc = valid_fs_object_name(obj_name);
703 if (rc != 0) {
704 return MkObjResult{nullptr, rc};
705 }
706
707 /* create it */
708 buffer::list bl;
709 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
710 RGWPutObjRequest req(cct, &ruser, parent->bucket_name(), obj_name, bl);
711 MkObjResult mkr{nullptr, -EINVAL};
712
713 rc = rgwlib.get_fe()->execute_req(&req);
714 rc2 = req.get_ret();
715
716 if ((rc == 0) &&
717 (rc2 == 0)) {
718 /* XXX atomicity */
719 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_CREATE |
720 RGWFileHandle::FLAG_LOCK);
721 RGWFileHandle* rgw_fh = get<0>(fhr);
722 if (rgw_fh) {
723 if (get<1>(fhr) & RGWFileHandle::FLAG_CREATE) {
724 /* fill in stat data */
725 real_time t = real_clock::now();
726 rgw_fh->create_stat(st, mask);
727 rgw_fh->set_times(t);
728
729 parent->set_mtime(real_clock::to_timespec(t));
730 parent->set_ctime(real_clock::to_timespec(t));
731 }
732 if (st)
733 (void) rgw_fh->stat(st, RGWFileHandle::FLAG_LOCKED);
734
735 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
736 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
737
738 get<0>(mkr) = rgw_fh;
739 rgw_fh->mtx.unlock();
740 } else
741 rc = -EIO;
742 }
743
744 get<1>(mkr) = rc;
745
746 /* case like : quota exceed will be considered as fail too*/
747 if(rc2 < 0)
748 get<1>(mkr) = rc2;
749
750 return mkr;
751 } /* RGWLibFS::create */
752
753 MkObjResult RGWLibFS::symlink(RGWFileHandle* parent, const char *name,
754 const char* link_path, struct stat *st, uint32_t mask, uint32_t flags)
755 {
756 int rc, rc2;
757
758 using std::get;
759
760 rgw_file_handle *lfh;
761 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
762 nullptr /* st */, 0 /* mask */,
763 RGW_LOOKUP_FLAG_NONE);
764 if (! rc) {
765 /* conflict! */
766 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
767 return MkObjResult{nullptr, -EEXIST};
768 }
769
770 MkObjResult mkr{nullptr, -EINVAL};
771 LookupFHResult fhr;
772 RGWFileHandle* rgw_fh = nullptr;
773 buffer::list ux_key, ux_attrs;
774
775 fhr = lookup_fh(parent, name,
776 RGWFileHandle::FLAG_CREATE|
777 RGWFileHandle::FLAG_SYMBOLIC_LINK|
778 RGWFileHandle::FLAG_LOCK);
779 rgw_fh = get<0>(fhr);
780 if (rgw_fh) {
781 rgw_fh->create_stat(st, mask);
782 rgw_fh->set_times(real_clock::now());
783 /* save attrs */
784 rgw_fh->encode_attrs(ux_key, ux_attrs);
785 if (st)
786 rgw_fh->stat(st);
787 get<0>(mkr) = rgw_fh;
788 } else {
789 get<1>(mkr) = -EIO;
790 return mkr;
791 }
792
793 /* need valid S3 name (characters, length <= 1024, etc) */
794 rc = valid_fs_object_name(name);
795 if (rc != 0) {
796 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
797 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
798 RGWFileHandle::FHCache::FLAG_LOCK);
799 rgw_fh->mtx.unlock();
800 unref(rgw_fh);
801 get<0>(mkr) = nullptr;
802 get<1>(mkr) = rc;
803 return mkr;
804 }
805
806 string obj_name = std::string(name);
807 /* create an object representing the directory */
808 buffer::list bl;
809
810 /* XXXX */
811 #if 0
812 bl.push_back(
813 buffer::create_static(len, static_cast<char*>(buffer)));
814 #else
815
816 bl.push_back(
817 buffer::copy(link_path, strlen(link_path)));
818 #endif
819
820 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
821 RGWPutObjRequest req(get_context(), &ruser, parent->bucket_name(),
822 obj_name, bl);
823
824 /* save attrs */
825 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
826 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
827
828 rc = rgwlib.get_fe()->execute_req(&req);
829 rc2 = req.get_ret();
830 if (! ((rc == 0) &&
831 (rc2 == 0))) {
832 /* op failed */
833 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
834 rgw_fh->mtx.unlock(); /* !LOCKED */
835 unref(rgw_fh);
836 get<0>(mkr) = nullptr;
837 /* fixup rc */
838 if (!rc)
839 rc = rc2;
840 } else {
841 real_time t = real_clock::now();
842 parent->set_mtime(real_clock::to_timespec(t));
843 parent->set_ctime(real_clock::to_timespec(t));
844 rgw_fh->mtx.unlock(); /* !LOCKED */
845 }
846
847 get<1>(mkr) = rc;
848
849 return mkr;
850 } /* RGWLibFS::symlink */
851
852 int RGWLibFS::getattr(RGWFileHandle* rgw_fh, struct stat* st)
853 {
854 switch(rgw_fh->fh.fh_type) {
855 case RGW_FS_TYPE_FILE:
856 {
857 if (rgw_fh->deleted())
858 return -ESTALE;
859 }
860 break;
861 default:
862 break;
863 };
864 /* if rgw_fh is a directory, mtime will be advanced */
865 return rgw_fh->stat(st);
866 } /* RGWLibFS::getattr */
867
868 int RGWLibFS::setattr(RGWFileHandle* rgw_fh, struct stat* st, uint32_t mask,
869 uint32_t flags)
870 {
871 int rc, rc2;
872 buffer::list ux_key, ux_attrs;
873 buffer::list etag = rgw_fh->get_etag();
874 buffer::list acls = rgw_fh->get_acls();
875
876 lock_guard guard(rgw_fh->mtx);
877
878 switch(rgw_fh->fh.fh_type) {
879 case RGW_FS_TYPE_FILE:
880 {
881 if (rgw_fh->deleted())
882 return -ESTALE;
883 }
884 break;
885 default:
886 break;
887 };
888
889 string obj_name{rgw_fh->relative_object_name()};
890
891 if (rgw_fh->is_dir() &&
892 (likely(! rgw_fh->is_bucket()))) {
893 obj_name += "/";
894 }
895
896 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
897 RGWSetAttrsRequest req(cct, &ruser, rgw_fh->bucket_name(), obj_name);
898
899 rgw_fh->create_stat(st, mask);
900 rgw_fh->encode_attrs(ux_key, ux_attrs);
901
902 /* save attrs */
903 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
904 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
905 req.emplace_attr(RGW_ATTR_ETAG, std::move(etag));
906 req.emplace_attr(RGW_ATTR_ACL, std::move(acls));
907
908 rc = rgwlib.get_fe()->execute_req(&req);
909 rc2 = req.get_ret();
910
911 if (rc == -ENOENT) {
912 /* special case: materialize placeholder dir */
913 buffer::list bl;
914 RGWPutObjRequest req(get_context(), &ruser, rgw_fh->bucket_name(),
915 obj_name, bl);
916
917 rgw_fh->encode_attrs(ux_key, ux_attrs); /* because std::moved */
918
919 /* save attrs */
920 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
921 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
922
923 rc = rgwlib.get_fe()->execute_req(&req);
924 rc2 = req.get_ret();
925 }
926
927 if ((rc != 0) || (rc2 != 0)) {
928 return -EIO;
929 }
930
931 rgw_fh->set_ctime(real_clock::to_timespec(real_clock::now()));
932
933 return 0;
934 } /* RGWLibFS::setattr */
935
936 /* called under rgw_fh->mtx held */
937 void RGWLibFS::update_fh(RGWFileHandle *rgw_fh)
938 {
939 int rc, rc2;
940 string obj_name{rgw_fh->relative_object_name()};
941 buffer::list ux_key, ux_attrs;
942
943 if (rgw_fh->is_dir() &&
944 (likely(! rgw_fh->is_bucket()))) {
945 obj_name += "/";
946 }
947
948 lsubdout(get_context(), rgw, 17)
949 << __func__
950 << " update old versioned fh : " << obj_name
951 << dendl;
952
953 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
954 RGWSetAttrsRequest req(cct, &ruser, rgw_fh->bucket_name(), obj_name);
955
956 rgw_fh->encode_attrs(ux_key, ux_attrs);
957
958 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
959 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
960
961 rc = rgwlib.get_fe()->execute_req(&req);
962 rc2 = req.get_ret();
963
964 if ((rc != 0) || (rc2 != 0)) {
965 lsubdout(get_context(), rgw, 17)
966 << __func__
967 << " update fh failed : " << obj_name
968 << dendl;
969 }
970 } /* RGWLibFS::update_fh */
971
972 void RGWLibFS::close()
973 {
974 state.flags |= FLAG_CLOSED;
975
976 class ObjUnref
977 {
978 RGWLibFS* fs;
979 public:
980 explicit ObjUnref(RGWLibFS* _fs) : fs(_fs) {}
981 void operator()(RGWFileHandle* fh) const {
982 lsubdout(fs->get_context(), rgw, 5)
983 << __func__
984 << fh->name
985 << " before ObjUnref refs=" << fh->get_refcnt()
986 << dendl;
987 fs->unref(fh);
988 }
989 };
990
991 /* force cache drain, forces objects to evict */
992 fh_cache.drain(ObjUnref(this),
993 RGWFileHandle::FHCache::FLAG_LOCK);
994 rgwlib.get_fe()->get_process()->unregister_fs(this);
995 rele();
996 } /* RGWLibFS::close */
997
998 inline std::ostream& operator<<(std::ostream &os, fh_key const &fhk) {
999 os << "<fh_key: bucket=";
1000 os << fhk.fh_hk.bucket;
1001 os << "; object=";
1002 os << fhk.fh_hk.object;
1003 os << ">";
1004 return os;
1005 }
1006
1007 inline std::ostream& operator<<(std::ostream &os, struct timespec const &ts) {
1008 os << "<timespec: tv_sec=";
1009 os << ts.tv_sec;
1010 os << "; tv_nsec=";
1011 os << ts.tv_nsec;
1012 os << ">";
1013 return os;
1014 }
1015
1016 std::ostream& operator<<(std::ostream &os, RGWLibFS::event const &ev) {
1017 os << "<event:";
1018 switch (ev.t) {
1019 case RGWLibFS::event::type::READDIR:
1020 os << "type=READDIR;";
1021 break;
1022 default:
1023 os << "type=UNKNOWN;";
1024 break;
1025 };
1026 os << "fid=" << ev.fhk.fh_hk.bucket << ":" << ev.fhk.fh_hk.object
1027 << ";ts=" << ev.ts << ">";
1028 return os;
1029 }
1030
1031 void RGWLibFS::gc()
1032 {
1033 using std::get;
1034 using directory = RGWFileHandle::directory;
1035
1036 /* dirent invalidate timeout--basically, the upper-bound on
1037 * inconsistency with the S3 namespace */
1038 auto expire_s
1039 = get_context()->_conf->rgw_nfs_namespace_expire_secs;
1040
1041 /* max events to gc in one cycle */
1042 uint32_t max_ev = get_context()->_conf->rgw_nfs_max_gc;
1043
1044 struct timespec now, expire_ts;
1045 event_vector ve;
1046 bool stop = false;
1047 std::deque<event> &events = state.events;
1048
1049 do {
1050 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
1051 lsubdout(get_context(), rgw, 15)
1052 << "GC: top of expire loop"
1053 << " now=" << now
1054 << " expire_s=" << expire_s
1055 << dendl;
1056 {
1057 lock_guard guard(state.mtx); /* LOCKED */
1058 lsubdout(get_context(), rgw, 15)
1059 << "GC: processing"
1060 << " count=" << events.size()
1061 << " events"
1062 << dendl;
1063 /* just return if no events */
1064 if (events.empty()) {
1065 return;
1066 }
1067 uint32_t _max_ev =
1068 (events.size() < 500) ? max_ev : (events.size() / 4);
1069 for (uint32_t ix = 0; (ix < _max_ev) && (events.size() > 0); ++ix) {
1070 event& ev = events.front();
1071 expire_ts = ev.ts;
1072 expire_ts.tv_sec += expire_s;
1073 if (expire_ts > now) {
1074 stop = true;
1075 break;
1076 }
1077 ve.push_back(ev);
1078 events.pop_front();
1079 }
1080 } /* anon */
1081 /* !LOCKED */
1082 for (auto& ev : ve) {
1083 lsubdout(get_context(), rgw, 15)
1084 << "try-expire ev: " << ev << dendl;
1085 if (likely(ev.t == event::type::READDIR)) {
1086 RGWFileHandle* rgw_fh = lookup_handle(ev.fhk.fh_hk);
1087 lsubdout(get_context(), rgw, 15)
1088 << "ev rgw_fh: " << rgw_fh << dendl;
1089 if (rgw_fh) {
1090 RGWFileHandle::directory* d;
1091 if (unlikely(! rgw_fh->is_dir())) {
1092 lsubdout(get_context(), rgw, 0)
1093 << __func__
1094 << " BUG non-directory found with READDIR event "
1095 << "(" << rgw_fh->bucket_name() << ","
1096 << rgw_fh->object_name() << ")"
1097 << dendl;
1098 goto rele;
1099 }
1100 /* maybe clear state */
1101 d = get<directory>(&rgw_fh->variant_type);
1102 if (d) {
1103 struct timespec ev_ts = ev.ts;
1104 lock_guard guard(rgw_fh->mtx);
1105 struct timespec d_last_readdir = d->last_readdir;
1106 if (unlikely(ev_ts < d_last_readdir)) {
1107 /* readdir cycle in progress, don't invalidate */
1108 lsubdout(get_context(), rgw, 15)
1109 << "GC: delay expiration for "
1110 << rgw_fh->object_name()
1111 << " ev.ts=" << ev_ts
1112 << " last_readdir=" << d_last_readdir
1113 << dendl;
1114 continue;
1115 } else {
1116 lsubdout(get_context(), rgw, 15)
1117 << "GC: expiring "
1118 << rgw_fh->object_name()
1119 << dendl;
1120 rgw_fh->clear_state();
1121 rgw_fh->invalidate();
1122 }
1123 }
1124 rele:
1125 unref(rgw_fh);
1126 } /* rgw_fh */
1127 } /* event::type::READDIR */
1128 } /* ev */
1129 ve.clear();
1130 } while (! (stop || shutdown));
1131 } /* RGWLibFS::gc */
1132
1133 std::ostream& operator<<(std::ostream &os,
1134 RGWFileHandle const &rgw_fh)
1135 {
1136 const auto& fhk = rgw_fh.get_key();
1137 const auto& fh = const_cast<RGWFileHandle&>(rgw_fh).get_fh();
1138 os << "<RGWFileHandle:";
1139 os << "addr=" << &rgw_fh << ";";
1140 switch (fh->fh_type) {
1141 case RGW_FS_TYPE_DIRECTORY:
1142 os << "type=DIRECTORY;";
1143 break;
1144 case RGW_FS_TYPE_FILE:
1145 os << "type=FILE;";
1146 break;
1147 default:
1148 os << "type=UNKNOWN;";
1149 break;
1150 };
1151 os << "fid=" << fhk.fh_hk.bucket << ":" << fhk.fh_hk.object << ";";
1152 os << "name=" << rgw_fh.object_name() << ";";
1153 os << "refcnt=" << rgw_fh.get_refcnt() << ";";
1154 os << ">";
1155 return os;
1156 }
1157
1158 RGWFileHandle::~RGWFileHandle() {
1159 /* !recycle case, handle may STILL be in handle table, BUT
1160 * the partition lock is not held in this path */
1161 if (fh_hook.is_linked()) {
1162 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_LOCK);
1163 }
1164 /* cond-unref parent */
1165 if (parent && (! parent->is_mount())) {
1166 /* safe because if parent->unref causes its deletion,
1167 * there are a) by refcnt, no other objects/paths pointing
1168 * to it and b) by the semantics of valid iteration of
1169 * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
1170 * no unsafe iterators reaching it either--n.b., this constraint
1171 * is binding oncode which may in future attempt to e.g.,
1172 * cause the eviction of objects in LRU order */
1173 (void) get_fs()->unref(parent);
1174 }
1175 }
1176
1177 fh_key RGWFileHandle::make_fhk(const std::string& name)
1178 {
1179 std::string tenant = get_fs()->get_user()->user_id.to_str();
1180 if (depth == 0) {
1181 /* S3 bucket -- assert mount-at-bucket case reaches here */
1182 return fh_key(name, name, tenant);
1183 } else {
1184 std::string key_name = make_key_name(name.c_str());
1185 return fh_key(fhk.fh_hk.bucket, key_name.c_str(), tenant);
1186 }
1187 }
1188
1189 void RGWFileHandle::encode_attrs(ceph::buffer::list& ux_key1,
1190 ceph::buffer::list& ux_attrs1)
1191 {
1192 using ceph::encode;
1193 fh_key fhk(this->fh.fh_hk);
1194 encode(fhk, ux_key1);
1195 encode(*this, ux_attrs1);
1196 } /* RGWFileHandle::encode_attrs */
1197
1198 DecodeAttrsResult RGWFileHandle::decode_attrs(const ceph::buffer::list* ux_key1,
1199 const ceph::buffer::list* ux_attrs1)
1200 {
1201 using ceph::decode;
1202 DecodeAttrsResult dar { false, false };
1203 fh_key fhk;
1204 auto bl_iter_key1 = ux_key1->cbegin();
1205 decode(fhk, bl_iter_key1);
1206 get<0>(dar) = true;
1207
1208 auto bl_iter_unix1 = ux_attrs1->cbegin();
1209 decode(*this, bl_iter_unix1);
1210 if (this->state.version < 2) {
1211 get<1>(dar) = true;
1212 }
1213
1214 return dar;
1215 } /* RGWFileHandle::decode_attrs */
1216
1217 bool RGWFileHandle::reclaim(const cohort::lru::ObjectFactory* newobj_fac) {
1218 lsubdout(fs->get_context(), rgw, 17)
1219 << __func__ << " " << *this
1220 << dendl;
1221 auto factory = dynamic_cast<const RGWFileHandle::Factory*>(newobj_fac);
1222 if (factory == nullptr) {
1223 return false;
1224 }
1225 /* make sure the reclaiming object is the same partiton with newobject factory,
1226 * then we can recycle the object, and replace with newobject */
1227 if (!fs->fh_cache.is_same_partition(factory->fhk.fh_hk.object, fh.fh_hk.object)) {
1228 return false;
1229 }
1230 /* in the non-delete case, handle may still be in handle table */
1231 if (fh_hook.is_linked()) {
1232 /* in this case, we are being called from a context which holds
1233 * the partition lock */
1234 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_NONE);
1235 }
1236 return true;
1237 } /* RGWFileHandle::reclaim */
1238
1239 bool RGWFileHandle::has_children() const
1240 {
1241 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), *fs->get_user());
1242 if (unlikely(! is_dir()))
1243 return false;
1244
1245 RGWRMdirCheck req(fs->get_context(), &ruser, this);
1246 int rc = rgwlib.get_fe()->execute_req(&req);
1247 if (! rc) {
1248 return req.valid && req.has_children;
1249 }
1250
1251 return false;
1252 }
1253
1254 std::ostream& operator<<(std::ostream &os,
1255 RGWFileHandle::readdir_offset const &offset)
1256 {
1257 using boost::get;
1258 if (unlikely(!! get<uint64_t*>(&offset))) {
1259 uint64_t* ioff = get<uint64_t*>(offset);
1260 os << *ioff;
1261 }
1262 else
1263 os << get<const char*>(offset);
1264 return os;
1265 }
1266
1267 int RGWFileHandle::readdir(rgw_readdir_cb rcb, void *cb_arg,
1268 readdir_offset offset,
1269 bool *eof, uint32_t flags)
1270 {
1271 using event = RGWLibFS::event;
1272 using boost::get;
1273 int rc = 0;
1274 struct timespec now;
1275 CephContext* cct = fs->get_context();
1276 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), *fs->get_user());
1277
1278 lsubdout(cct, rgw, 10)
1279 << __func__ << " readdir called on "
1280 << object_name()
1281 << dendl;
1282
1283 directory* d = get<directory>(&variant_type);
1284 if (d) {
1285 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1286 lock_guard guard(mtx);
1287 d->last_readdir = now;
1288 }
1289
1290 bool initial_off;
1291 char* mk{nullptr};
1292
1293 if (likely(!! get<const char*>(&offset))) {
1294 mk = const_cast<char*>(get<const char*>(offset));
1295 initial_off = !mk;
1296 } else {
1297 initial_off = (*get<uint64_t*>(offset) == 0);
1298 }
1299
1300 if (is_root()) {
1301 RGWListBucketsRequest req(cct, &ruser, this, rcb, cb_arg,
1302 offset);
1303 rc = rgwlib.get_fe()->execute_req(&req);
1304 if (! rc) {
1305 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1306 lock_guard guard(mtx);
1307 state.atime = now;
1308 if (initial_off)
1309 set_nlink(2);
1310 inc_nlink(req.d_count);
1311 *eof = req.eof();
1312 }
1313 } else {
1314 RGWReaddirRequest req(cct, &ruser, this, rcb, cb_arg, offset);
1315 rc = rgwlib.get_fe()->execute_req(&req);
1316 if (! rc) {
1317 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1318 lock_guard guard(mtx);
1319 state.atime = now;
1320 if (initial_off)
1321 set_nlink(2);
1322 inc_nlink(req.d_count);
1323 *eof = req.eof();
1324 }
1325 }
1326
1327 event ev(event::type::READDIR, get_key(), state.atime);
1328 lock_guard sguard(fs->state.mtx);
1329 fs->state.push_event(ev);
1330
1331 lsubdout(fs->get_context(), rgw, 15)
1332 << __func__
1333 << " final link count=" << state.nlink
1334 << dendl;
1335
1336 return rc;
1337 } /* RGWFileHandle::readdir */
1338
1339 int RGWFileHandle::write(uint64_t off, size_t len, size_t *bytes_written,
1340 void *buffer)
1341 {
1342 using std::get;
1343 using WriteCompletion = RGWLibFS::WriteCompletion;
1344 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), *fs->get_user());
1345
1346 lock_guard guard(mtx);
1347
1348 int rc = 0;
1349
1350 file* f = get<file>(&variant_type);
1351 if (! f)
1352 return -EISDIR;
1353
1354 if (deleted()) {
1355 lsubdout(fs->get_context(), rgw, 5)
1356 << __func__
1357 << " write attempted on deleted object "
1358 << this->object_name()
1359 << dendl;
1360 /* zap write transaction, if any */
1361 if (f->write_req) {
1362 delete f->write_req;
1363 f->write_req = nullptr;
1364 }
1365 return -ESTALE;
1366 }
1367
1368 if (! f->write_req) {
1369 /* guard--we do not support (e.g., COW-backed) partial writes */
1370 if (off != 0) {
1371 lsubdout(fs->get_context(), rgw, 5)
1372 << __func__
1373 << " " << object_name()
1374 << " non-0 initial write position " << off
1375 << " (mounting with -o sync required)"
1376 << dendl;
1377 return -EIO;
1378 }
1379
1380 /* start */
1381 std::string object_name = relative_object_name();
1382 f->write_req =
1383 new RGWWriteRequest(fs->get_context(), &ruser, this,
1384 bucket_name(), object_name);
1385 rc = rgwlib.get_fe()->start_req(f->write_req);
1386 if (rc < 0) {
1387 lsubdout(fs->get_context(), rgw, 5)
1388 << __func__
1389 << this->object_name()
1390 << " write start failed " << off
1391 << " (" << rc << ")"
1392 << dendl;
1393 /* zap failed write transaction */
1394 delete f->write_req;
1395 f->write_req = nullptr;
1396 return -EIO;
1397 } else {
1398 if (stateless_open()) {
1399 /* start write timer */
1400 f->write_req->timer_id =
1401 RGWLibFS::write_timer.add_event(
1402 std::chrono::seconds(RGWLibFS::write_completion_interval_s),
1403 WriteCompletion(*this));
1404 }
1405 }
1406 }
1407
1408 int overlap = 0;
1409 if ((static_cast<off_t>(off) < f->write_req->real_ofs) &&
1410 ((f->write_req->real_ofs - off) <= len)) {
1411 overlap = f->write_req->real_ofs - off;
1412 off = f->write_req->real_ofs;
1413 buffer = static_cast<char*>(buffer) + overlap;
1414 len -= overlap;
1415 }
1416
1417 buffer::list bl;
1418 /* XXXX */
1419 #if 0
1420 bl.push_back(
1421 buffer::create_static(len, static_cast<char*>(buffer)));
1422 #else
1423 bl.push_back(
1424 buffer::copy(static_cast<char*>(buffer), len));
1425 #endif
1426
1427 f->write_req->put_data(off, bl);
1428 rc = f->write_req->exec_continue();
1429
1430 if (rc == 0) {
1431 size_t min_size = off + len;
1432 if (min_size > get_size())
1433 set_size(min_size);
1434 if (stateless_open()) {
1435 /* bump write timer */
1436 RGWLibFS::write_timer.adjust_event(
1437 f->write_req->timer_id, std::chrono::seconds(10));
1438 }
1439 } else {
1440 /* continuation failed (e.g., non-contiguous write position) */
1441 lsubdout(fs->get_context(), rgw, 5)
1442 << __func__
1443 << object_name()
1444 << " failed write at position " << off
1445 << " (fails write transaction) "
1446 << dendl;
1447 /* zap failed write transaction */
1448 delete f->write_req;
1449 f->write_req = nullptr;
1450 rc = -EIO;
1451 }
1452
1453 *bytes_written = (rc == 0) ? (len + overlap) : 0;
1454 return rc;
1455 } /* RGWFileHandle::write */
1456
1457 int RGWFileHandle::write_finish(uint32_t flags)
1458 {
1459 unique_lock guard{mtx, std::defer_lock};
1460 int rc = 0;
1461
1462 if (! (flags & FLAG_LOCKED)) {
1463 guard.lock();
1464 }
1465
1466 file* f = get<file>(&variant_type);
1467 if (f && (f->write_req)) {
1468 lsubdout(fs->get_context(), rgw, 10)
1469 << __func__
1470 << " finishing write trans on " << object_name()
1471 << dendl;
1472 rc = rgwlib.get_fe()->finish_req(f->write_req);
1473 if (! rc) {
1474 rc = f->write_req->get_ret();
1475 }
1476 delete f->write_req;
1477 f->write_req = nullptr;
1478 }
1479
1480 return rc;
1481 } /* RGWFileHandle::write_finish */
1482
1483 int RGWFileHandle::close()
1484 {
1485 lock_guard guard(mtx);
1486
1487 int rc = write_finish(FLAG_LOCKED);
1488
1489 flags &= ~FLAG_OPEN;
1490 flags &= ~FLAG_STATELESS_OPEN;
1491
1492 return rc;
1493 } /* RGWFileHandle::close */
1494
1495 RGWFileHandle::file::~file()
1496 {
1497 delete write_req;
1498 }
1499
1500 void RGWFileHandle::clear_state()
1501 {
1502 directory* d = get<directory>(&variant_type);
1503 if (d) {
1504 state.nlink = 2;
1505 d->last_marker = rgw_obj_key{};
1506 }
1507 }
1508
1509 void RGWFileHandle::advance_mtime(uint32_t flags) {
1510 /* intended for use on directories, fast-forward mtime so as to
1511 * ensure a new, higher value for the change attribute */
1512 unique_lock uniq(mtx, std::defer_lock);
1513 if (likely(! (flags & RGWFileHandle::FLAG_LOCKED))) {
1514 uniq.lock();
1515 }
1516
1517 /* advance mtime only if stored mtime is older than the
1518 * configured namespace expiration */
1519 auto now = real_clock::now();
1520 auto cmptime = state.mtime;
1521 cmptime.tv_sec +=
1522 fs->get_context()->_conf->rgw_nfs_namespace_expire_secs;
1523 if (cmptime < real_clock::to_timespec(now)) {
1524 /* sets ctime as well as mtime, to avoid masking updates should
1525 * ctime inexplicably hold a higher value */
1526 set_times(now);
1527 }
1528 }
1529
1530 void RGWFileHandle::invalidate() {
1531 RGWLibFS *fs = get_fs();
1532 if (fs->invalidate_cb) {
1533 fs->invalidate_cb(fs->invalidate_arg, get_key().fh_hk);
1534 }
1535 }
1536
1537 int RGWWriteRequest::exec_start() {
1538 struct req_state* s = get_state();
1539
1540 auto compression_type =
1541 get_store()->svc()->zone->get_zone_params().get_compression_type(
1542 s->bucket_info.placement_rule);
1543
1544 /* not obviously supportable */
1545 ceph_assert(! dlo_manifest);
1546 ceph_assert(! slo_info);
1547
1548 perfcounter->inc(l_rgw_put);
1549 op_ret = -EINVAL;
1550 rgw_obj obj{s->bucket, s->object};
1551
1552 if (s->object.empty()) {
1553 ldout(s->cct, 0) << __func__ << " called on empty object" << dendl;
1554 goto done;
1555 }
1556
1557 op_ret = get_params();
1558 if (op_ret < 0)
1559 goto done;
1560
1561 op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
1562 if (op_ret < 0) {
1563 goto done;
1564 }
1565
1566 /* user-supplied MD5 check skipped (not supplied) */
1567 /* early quota check skipped--we don't have size yet */
1568 /* skipping user-supplied etag--we might have one in future, but
1569 * like data it and other attrs would arrive after open */
1570
1571 aio.emplace(s->cct->_conf->rgw_put_obj_min_window_size);
1572
1573 if (s->bucket_info.versioning_enabled()) {
1574 if (!version_id.empty()) {
1575 obj.key.set_instance(version_id);
1576 } else {
1577 get_store()->getRados()->gen_rand_obj_instance_name(&obj);
1578 version_id = obj.key.instance;
1579 }
1580 }
1581 processor.emplace(&*aio, get_store(), s->bucket_info,
1582 &s->dest_placement,
1583 s->bucket_owner.get_id(),
1584 *static_cast<RGWObjectCtx *>(s->obj_ctx),
1585 obj, olh_epoch, s->req_id, this, s->yield);
1586
1587 op_ret = processor->prepare(s->yield);
1588 if (op_ret < 0) {
1589 ldout(s->cct, 20) << "processor->prepare() returned ret=" << op_ret
1590 << dendl;
1591 goto done;
1592 }
1593 filter = &*processor;
1594 if (compression_type != "none") {
1595 plugin = Compressor::create(s->cct, compression_type);
1596 if (! plugin) {
1597 ldout(s->cct, 1) << "Cannot load plugin for rgw_compression_type "
1598 << compression_type << dendl;
1599 } else {
1600 compressor.emplace(s->cct, plugin, filter);
1601 filter = &*compressor;
1602 }
1603 }
1604
1605 done:
1606 return op_ret;
1607 } /* exec_start */
1608
1609 int RGWWriteRequest::exec_continue()
1610 {
1611 struct req_state* s = get_state();
1612 op_ret = 0;
1613
1614 /* check guards (e.g., contig write) */
1615 if (eio) {
1616 ldout(s->cct, 5)
1617 << " chunks arrived in wrong order"
1618 << " (mounting with -o sync required)"
1619 << dendl;
1620 return -EIO;
1621 }
1622
1623 op_ret = get_store()->getRados()->check_quota(s->bucket_owner.get_id(), s->bucket,
1624 user_quota, bucket_quota, real_ofs, true);
1625 /* max_size exceed */
1626 if (op_ret < 0)
1627 return -EIO;
1628
1629 size_t len = data.length();
1630 if (! len)
1631 return 0;
1632
1633 hash.Update((const unsigned char *)data.c_str(), data.length());
1634 op_ret = filter->process(std::move(data), ofs);
1635 if (op_ret < 0) {
1636 goto done;
1637 }
1638 bytes_written += len;
1639
1640 done:
1641 return op_ret;
1642 } /* exec_continue */
1643
1644 int RGWWriteRequest::exec_finish()
1645 {
1646 buffer::list bl, aclbl, ux_key, ux_attrs;
1647 map<string, string>::iterator iter;
1648 char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
1649 unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
1650 struct req_state* s = get_state();
1651
1652 size_t osize = rgw_fh->get_size();
1653 struct timespec octime = rgw_fh->get_ctime();
1654 struct timespec omtime = rgw_fh->get_mtime();
1655 real_time appx_t = real_clock::now();
1656
1657 s->obj_size = bytes_written;
1658 perfcounter->inc(l_rgw_put_b, s->obj_size);
1659
1660 // flush data in filters
1661 op_ret = filter->process({}, s->obj_size);
1662 if (op_ret < 0) {
1663 goto done;
1664 }
1665
1666 op_ret = get_store()->getRados()->check_quota(s->bucket_owner.get_id(), s->bucket,
1667 user_quota, bucket_quota, s->obj_size, true);
1668 /* max_size exceed */
1669 if (op_ret < 0) {
1670 goto done;
1671 }
1672
1673 hash.Final(m);
1674
1675 if (compressor && compressor->is_compressed()) {
1676 bufferlist tmp;
1677 RGWCompressionInfo cs_info;
1678 cs_info.compression_type = plugin->get_type_name();
1679 cs_info.orig_size = s->obj_size;
1680 cs_info.blocks = std::move(compressor->get_compression_blocks());
1681 encode(cs_info, tmp);
1682 attrs[RGW_ATTR_COMPRESSION] = tmp;
1683 ldout(s->cct, 20) << "storing " << RGW_ATTR_COMPRESSION
1684 << " with type=" << cs_info.compression_type
1685 << ", orig_size=" << cs_info.orig_size
1686 << ", blocks=" << cs_info.blocks.size() << dendl;
1687 }
1688
1689 buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
1690 etag = calc_md5;
1691
1692 bl.append(etag.c_str(), etag.size() + 1);
1693 emplace_attr(RGW_ATTR_ETAG, std::move(bl));
1694
1695 policy.encode(aclbl);
1696 emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
1697
1698 /* unix attrs */
1699 rgw_fh->set_mtime(real_clock::to_timespec(appx_t));
1700 rgw_fh->set_ctime(real_clock::to_timespec(appx_t));
1701 rgw_fh->set_size(bytes_written);
1702 rgw_fh->encode_attrs(ux_key, ux_attrs);
1703
1704 emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
1705 emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
1706
1707 for (iter = s->generic_attrs.begin(); iter != s->generic_attrs.end();
1708 ++iter) {
1709 buffer::list& attrbl = attrs[iter->first];
1710 const string& val = iter->second;
1711 attrbl.append(val.c_str(), val.size() + 1);
1712 }
1713
1714 op_ret = rgw_get_request_metadata(s->cct, s->info, attrs);
1715 if (op_ret < 0) {
1716 goto done;
1717 }
1718 encode_delete_at_attr(delete_at, attrs);
1719
1720 /* Add a custom metadata to expose the information whether an object
1721 * is an SLO or not. Appending the attribute must be performed AFTER
1722 * processing any input from user in order to prohibit overwriting. */
1723 if (unlikely(!! slo_info)) {
1724 buffer::list slo_userindicator_bl;
1725 using ceph::encode;
1726 encode("True", slo_userindicator_bl);
1727 emplace_attr(RGW_ATTR_SLO_UINDICATOR, std::move(slo_userindicator_bl));
1728 }
1729
1730 op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs,
1731 (delete_at ? *delete_at : real_time()),
1732 if_match, if_nomatch, nullptr, nullptr, nullptr,
1733 s->yield);
1734 if (op_ret != 0) {
1735 /* revert attr updates */
1736 rgw_fh->set_mtime(omtime);
1737 rgw_fh->set_ctime(octime);
1738 rgw_fh->set_size(osize);
1739 }
1740
1741 done:
1742 perfcounter->tinc(l_rgw_put_lat, s->time_elapsed());
1743 return op_ret;
1744 } /* exec_finish */
1745
1746 } /* namespace rgw */
1747
1748 /* librgw */
1749 extern "C" {
1750
1751 void rgwfile_version(int *major, int *minor, int *extra)
1752 {
1753 if (major)
1754 *major = LIBRGW_FILE_VER_MAJOR;
1755 if (minor)
1756 *minor = LIBRGW_FILE_VER_MINOR;
1757 if (extra)
1758 *extra = LIBRGW_FILE_VER_EXTRA;
1759 }
1760
1761 /*
1762 attach rgw namespace
1763 */
1764 int rgw_mount(librgw_t rgw, const char *uid, const char *acc_key,
1765 const char *sec_key, struct rgw_fs **rgw_fs,
1766 uint32_t flags)
1767 {
1768 int rc = 0;
1769
1770 /* stash access data for "mount" */
1771 RGWLibFS* new_fs = new RGWLibFS(static_cast<CephContext*>(rgw), uid, acc_key,
1772 sec_key, "/");
1773 ceph_assert(new_fs);
1774
1775 rc = new_fs->authorize(rgwlib.get_store());
1776 if (rc != 0) {
1777 delete new_fs;
1778 return -EINVAL;
1779 }
1780
1781 /* register fs for shared gc */
1782 rgwlib.get_fe()->get_process()->register_fs(new_fs);
1783
1784 struct rgw_fs *fs = new_fs->get_fs();
1785 fs->rgw = rgw;
1786
1787 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1788 * roots atm */
1789
1790 *rgw_fs = fs;
1791
1792 return 0;
1793 }
1794
1795 int rgw_mount2(librgw_t rgw, const char *uid, const char *acc_key,
1796 const char *sec_key, const char *root, struct rgw_fs **rgw_fs,
1797 uint32_t flags)
1798 {
1799 int rc = 0;
1800
1801 /* stash access data for "mount" */
1802 RGWLibFS* new_fs = new RGWLibFS(static_cast<CephContext*>(rgw), uid, acc_key,
1803 sec_key, root);
1804 ceph_assert(new_fs);
1805
1806 rc = new_fs->authorize(rgwlib.get_store());
1807 if (rc != 0) {
1808 delete new_fs;
1809 return -EINVAL;
1810 }
1811
1812 /* register fs for shared gc */
1813 rgwlib.get_fe()->get_process()->register_fs(new_fs);
1814
1815 struct rgw_fs *fs = new_fs->get_fs();
1816 fs->rgw = rgw;
1817
1818 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1819 * roots atm */
1820
1821 *rgw_fs = fs;
1822
1823 return 0;
1824 }
1825
1826 /*
1827 register invalidate callbacks
1828 */
1829 int rgw_register_invalidate(struct rgw_fs *rgw_fs, rgw_fh_callback_t cb,
1830 void *arg, uint32_t flags)
1831
1832 {
1833 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1834 return fs->register_invalidate(cb, arg, flags);
1835 }
1836
1837 /*
1838 detach rgw namespace
1839 */
1840 int rgw_umount(struct rgw_fs *rgw_fs, uint32_t flags)
1841 {
1842 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1843 fs->close();
1844 return 0;
1845 }
1846
1847 /*
1848 get filesystem attributes
1849 */
1850 int rgw_statfs(struct rgw_fs *rgw_fs,
1851 struct rgw_file_handle *parent_fh,
1852 struct rgw_statvfs *vfs_st, uint32_t flags)
1853 {
1854 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1855 struct rados_cluster_stat_t stats;
1856 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), *fs->get_user());
1857
1858 RGWGetClusterStatReq req(fs->get_context(), &ruser, stats);
1859 int rc = rgwlib.get_fe()->execute_req(&req);
1860 if (rc < 0) {
1861 lderr(fs->get_context()) << "ERROR: getting total cluster usage"
1862 << cpp_strerror(-rc) << dendl;
1863 return rc;
1864 }
1865
1866 //Set block size to 1M.
1867 constexpr uint32_t CEPH_BLOCK_SHIFT = 20;
1868 vfs_st->f_bsize = 1 << CEPH_BLOCK_SHIFT;
1869 vfs_st->f_frsize = 1 << CEPH_BLOCK_SHIFT;
1870 vfs_st->f_blocks = stats.kb >> (CEPH_BLOCK_SHIFT - 10);
1871 vfs_st->f_bfree = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10);
1872 vfs_st->f_bavail = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10);
1873 vfs_st->f_files = stats.num_objects;
1874 vfs_st->f_ffree = -1;
1875 vfs_st->f_fsid[0] = fs->get_fsid();
1876 vfs_st->f_fsid[1] = fs->get_fsid();
1877 vfs_st->f_flag = 0;
1878 vfs_st->f_namemax = 4096;
1879 return 0;
1880 }
1881
1882 /*
1883 generic create -- create an empty regular file
1884 */
1885 int rgw_create(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1886 const char *name, struct stat *st, uint32_t mask,
1887 struct rgw_file_handle **fh, uint32_t posix_flags,
1888 uint32_t flags)
1889 {
1890 using std::get;
1891
1892 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1893 RGWFileHandle* parent = get_rgwfh(parent_fh);
1894
1895 if ((! parent) ||
1896 (parent->is_root()) ||
1897 (parent->is_file())) {
1898 /* bad parent */
1899 return -EINVAL;
1900 }
1901
1902 MkObjResult fhr = fs->create(parent, name, st, mask, flags);
1903 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1904
1905 if (nfh)
1906 *fh = nfh->get_fh();
1907
1908 return get<1>(fhr);
1909 } /* rgw_create */
1910
1911 /*
1912 create a symbolic link
1913 */
1914 int rgw_symlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1915 const char *name, const char *link_path, struct stat *st, uint32_t mask,
1916 struct rgw_file_handle **fh, uint32_t posix_flags,
1917 uint32_t flags)
1918 {
1919 using std::get;
1920
1921 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1922 RGWFileHandle* parent = get_rgwfh(parent_fh);
1923
1924 if ((! parent) ||
1925 (parent->is_root()) ||
1926 (parent->is_file())) {
1927 /* bad parent */
1928 return -EINVAL;
1929 }
1930
1931 MkObjResult fhr = fs->symlink(parent, name, link_path, st, mask, flags);
1932 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1933
1934 if (nfh)
1935 *fh = nfh->get_fh();
1936
1937 return get<1>(fhr);
1938 } /* rgw_symlink */
1939
1940 /*
1941 create a new directory
1942 */
1943 int rgw_mkdir(struct rgw_fs *rgw_fs,
1944 struct rgw_file_handle *parent_fh,
1945 const char *name, struct stat *st, uint32_t mask,
1946 struct rgw_file_handle **fh, uint32_t flags)
1947 {
1948 using std::get;
1949
1950 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1951 RGWFileHandle* parent = get_rgwfh(parent_fh);
1952
1953 if (! parent) {
1954 /* bad parent */
1955 return -EINVAL;
1956 }
1957
1958 MkObjResult fhr = fs->mkdir(parent, name, st, mask, flags);
1959 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1960
1961 if (nfh)
1962 *fh = nfh->get_fh();
1963
1964 return get<1>(fhr);
1965 } /* rgw_mkdir */
1966
1967 /*
1968 rename object
1969 */
1970 int rgw_rename(struct rgw_fs *rgw_fs,
1971 struct rgw_file_handle *src, const char* src_name,
1972 struct rgw_file_handle *dst, const char* dst_name,
1973 uint32_t flags)
1974 {
1975 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1976
1977 RGWFileHandle* src_fh = get_rgwfh(src);
1978 RGWFileHandle* dst_fh = get_rgwfh(dst);
1979
1980 return fs->rename(src_fh, dst_fh, src_name, dst_name);
1981 }
1982
1983 /*
1984 remove file or directory
1985 */
1986 int rgw_unlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1987 const char *name, uint32_t flags)
1988 {
1989 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1990 RGWFileHandle* parent = get_rgwfh(parent_fh);
1991
1992 return fs->unlink(parent, name);
1993 }
1994
1995 /*
1996 lookup object by name (POSIX style)
1997 */
1998 int rgw_lookup(struct rgw_fs *rgw_fs,
1999 struct rgw_file_handle *parent_fh, const char* path,
2000 struct rgw_file_handle **fh,
2001 struct stat *st, uint32_t mask, uint32_t flags)
2002 {
2003 //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2004 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2005
2006 RGWFileHandle* parent = get_rgwfh(parent_fh);
2007 if ((! parent) ||
2008 (! parent->is_dir())) {
2009 /* bad parent */
2010 return -EINVAL;
2011 }
2012
2013 RGWFileHandle* rgw_fh;
2014 LookupFHResult fhr;
2015
2016 if (parent->is_root()) {
2017 /* special: parent lookup--note lack of ref()! */
2018 if (unlikely((strcmp(path, "..") == 0) ||
2019 (strcmp(path, "/") == 0))) {
2020 rgw_fh = parent;
2021 } else {
2022 RGWLibFS::BucketStats bstat;
2023 fhr = fs->stat_bucket(parent, path, bstat, RGWFileHandle::FLAG_NONE);
2024 rgw_fh = get<0>(fhr);
2025 if (! rgw_fh)
2026 return -ENOENT;
2027 }
2028 } else {
2029 /* special: after readdir--note extra ref()! */
2030 if (unlikely((strcmp(path, "..") == 0))) {
2031 rgw_fh = parent;
2032 lsubdout(fs->get_context(), rgw, 17)
2033 << __func__ << " BANG"<< *rgw_fh
2034 << dendl;
2035 fs->ref(rgw_fh);
2036 } else {
2037 enum rgw_fh_type fh_type = fh_type_of(flags);
2038
2039 uint32_t sl_flags = (flags & RGW_LOOKUP_FLAG_RCB)
2040 ? RGWFileHandle::FLAG_NONE
2041 : RGWFileHandle::FLAG_EXACT_MATCH;
2042
2043 bool fast_attrs= fs->get_context()->_conf->rgw_nfs_s3_fast_attrs;
2044
2045 if ((flags & RGW_LOOKUP_FLAG_RCB) && fast_attrs) {
2046 /* FAKE STAT--this should mean, interpolate special
2047 * owner, group, and perms masks */
2048 fhr = fs->fake_leaf(parent, path, fh_type, st, mask, sl_flags);
2049 } else {
2050 if ((fh_type == RGW_FS_TYPE_DIRECTORY) && fast_attrs) {
2051 /* trust cached dir, if present */
2052 fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
2053 if (get<0>(fhr)) {
2054 rgw_fh = get<0>(fhr);
2055 goto done;
2056 }
2057 }
2058 fhr = fs->stat_leaf(parent, path, fh_type, sl_flags);
2059 }
2060 if (! get<0>(fhr)) {
2061 if (! (flags & RGW_LOOKUP_FLAG_CREATE))
2062 return -ENOENT;
2063 else
2064 fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
2065 }
2066 rgw_fh = get<0>(fhr);
2067 }
2068 } /* !root */
2069
2070 done:
2071 struct rgw_file_handle *rfh = rgw_fh->get_fh();
2072 *fh = rfh;
2073
2074 return 0;
2075 } /* rgw_lookup */
2076
2077 /*
2078 lookup object by handle (NFS style)
2079 */
2080 int rgw_lookup_handle(struct rgw_fs *rgw_fs, struct rgw_fh_hk *fh_hk,
2081 struct rgw_file_handle **fh, uint32_t flags)
2082 {
2083 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2084
2085 RGWFileHandle* rgw_fh = fs->lookup_handle(*fh_hk);
2086 if (! rgw_fh) {
2087 /* not found */
2088 return -ENOENT;
2089 }
2090
2091 struct rgw_file_handle *rfh = rgw_fh->get_fh();
2092 *fh = rfh;
2093
2094 return 0;
2095 }
2096
2097 /*
2098 * release file handle
2099 */
2100 int rgw_fh_rele(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2101 uint32_t flags)
2102 {
2103 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2104 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2105
2106 lsubdout(fs->get_context(), rgw, 17)
2107 << __func__ << " " << *rgw_fh
2108 << dendl;
2109
2110 fs->unref(rgw_fh);
2111 return 0;
2112 }
2113
2114 /*
2115 get unix attributes for object
2116 */
2117 int rgw_getattr(struct rgw_fs *rgw_fs,
2118 struct rgw_file_handle *fh, struct stat *st, uint32_t flags)
2119 {
2120 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2121 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2122
2123 return fs->getattr(rgw_fh, st);
2124 }
2125
2126 /*
2127 set unix attributes for object
2128 */
2129 int rgw_setattr(struct rgw_fs *rgw_fs,
2130 struct rgw_file_handle *fh, struct stat *st,
2131 uint32_t mask, uint32_t flags)
2132 {
2133 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2134 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2135
2136 return fs->setattr(rgw_fh, st, mask, flags);
2137 }
2138
2139 /*
2140 truncate file
2141 */
2142 int rgw_truncate(struct rgw_fs *rgw_fs,
2143 struct rgw_file_handle *fh, uint64_t size, uint32_t flags)
2144 {
2145 return 0;
2146 }
2147
2148 /*
2149 open file
2150 */
2151 int rgw_open(struct rgw_fs *rgw_fs,
2152 struct rgw_file_handle *fh, uint32_t posix_flags, uint32_t flags)
2153 {
2154 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2155
2156 /* XXX
2157 * need to track specific opens--at least read opens and
2158 * a write open; we need to know when a write open is returned,
2159 * that closes a write transaction
2160 *
2161 * for now, we will support single-open only, it's preferable to
2162 * anything we can otherwise do without access to the NFS state
2163 */
2164 if (! rgw_fh->is_file())
2165 return -EISDIR;
2166
2167 return rgw_fh->open(flags);
2168 }
2169
2170 /*
2171 close file
2172 */
2173 int rgw_close(struct rgw_fs *rgw_fs,
2174 struct rgw_file_handle *fh, uint32_t flags)
2175 {
2176 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2177 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2178 int rc = rgw_fh->close(/* XXX */);
2179
2180 if (flags & RGW_CLOSE_FLAG_RELE)
2181 fs->unref(rgw_fh);
2182
2183 return rc;
2184 }
2185
2186 int rgw_readdir(struct rgw_fs *rgw_fs,
2187 struct rgw_file_handle *parent_fh, uint64_t *offset,
2188 rgw_readdir_cb rcb, void *cb_arg, bool *eof,
2189 uint32_t flags)
2190 {
2191 RGWFileHandle* parent = get_rgwfh(parent_fh);
2192 if (! parent) {
2193 /* bad parent */
2194 return -EINVAL;
2195 }
2196
2197 lsubdout(parent->get_fs()->get_context(), rgw, 15)
2198 << __func__
2199 << " offset=" << *offset
2200 << dendl;
2201
2202 if ((*offset == 0) &&
2203 (flags & RGW_READDIR_FLAG_DOTDOT)) {
2204 /* send '.' and '..' with their NFS-defined offsets */
2205 rcb(".", cb_arg, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2206 rcb("..", cb_arg, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2207 }
2208
2209 int rc = parent->readdir(rcb, cb_arg, offset, eof, flags);
2210 return rc;
2211 } /* rgw_readdir */
2212
2213 /* enumeration continuing from name */
2214 int rgw_readdir2(struct rgw_fs *rgw_fs,
2215 struct rgw_file_handle *parent_fh, const char *name,
2216 rgw_readdir_cb rcb, void *cb_arg, bool *eof,
2217 uint32_t flags)
2218 {
2219 RGWFileHandle* parent = get_rgwfh(parent_fh);
2220 if (! parent) {
2221 /* bad parent */
2222 return -EINVAL;
2223 }
2224
2225 lsubdout(parent->get_fs()->get_context(), rgw, 15)
2226 << __func__
2227 << " offset=" << ((name) ? name : "(nil)")
2228 << dendl;
2229
2230 if ((! name) &&
2231 (flags & RGW_READDIR_FLAG_DOTDOT)) {
2232 /* send '.' and '..' with their NFS-defined offsets */
2233 rcb(".", cb_arg, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2234 rcb("..", cb_arg, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2235 }
2236
2237 int rc = parent->readdir(rcb, cb_arg, name, eof, flags);
2238 return rc;
2239 } /* rgw_readdir2 */
2240
2241 /* project offset of dirent name */
2242 int rgw_dirent_offset(struct rgw_fs *rgw_fs,
2243 struct rgw_file_handle *parent_fh,
2244 const char *name, int64_t *offset,
2245 uint32_t flags)
2246 {
2247 RGWFileHandle* parent = get_rgwfh(parent_fh);
2248 if ((! parent)) {
2249 /* bad parent */
2250 return -EINVAL;
2251 }
2252 std::string sname{name};
2253 int rc = parent->offset_of(sname, offset, flags);
2254 return rc;
2255 }
2256
2257 /*
2258 read data from file
2259 */
2260 int rgw_read(struct rgw_fs *rgw_fs,
2261 struct rgw_file_handle *fh, uint64_t offset,
2262 size_t length, size_t *bytes_read, void *buffer,
2263 uint32_t flags)
2264 {
2265 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2266 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2267
2268 return fs->read(rgw_fh, offset, length, bytes_read, buffer, flags);
2269 }
2270
2271 /*
2272 read symbolic link
2273 */
2274 int rgw_readlink(struct rgw_fs *rgw_fs,
2275 struct rgw_file_handle *fh, uint64_t offset,
2276 size_t length, size_t *bytes_read, void *buffer,
2277 uint32_t flags)
2278 {
2279 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2280 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2281
2282 return fs->readlink(rgw_fh, offset, length, bytes_read, buffer, flags);
2283 }
2284
2285 /*
2286 write data to file
2287 */
2288 int rgw_write(struct rgw_fs *rgw_fs,
2289 struct rgw_file_handle *fh, uint64_t offset,
2290 size_t length, size_t *bytes_written, void *buffer,
2291 uint32_t flags)
2292 {
2293 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2294 int rc;
2295
2296 *bytes_written = 0;
2297
2298 if (! rgw_fh->is_file())
2299 return -EISDIR;
2300
2301 if (! rgw_fh->is_open()) {
2302 if (flags & RGW_OPEN_FLAG_V3) {
2303 rc = rgw_fh->open(flags);
2304 if (!! rc)
2305 return rc;
2306 } else
2307 return -EPERM;
2308 }
2309
2310 rc = rgw_fh->write(offset, length, bytes_written, buffer);
2311
2312 return rc;
2313 }
2314
2315 /*
2316 read data from file (vector)
2317 */
2318 class RGWReadV
2319 {
2320 buffer::list bl;
2321 struct rgw_vio* vio;
2322
2323 public:
2324 RGWReadV(buffer::list& _bl, rgw_vio* _vio) : vio(_vio) {
2325 bl.claim(_bl);
2326 }
2327
2328 struct rgw_vio* get_vio() { return vio; }
2329
2330 const auto& buffers() { return bl.buffers(); }
2331
2332 unsigned /* XXX */ length() { return bl.length(); }
2333
2334 };
2335
2336 void rgw_readv_rele(struct rgw_uio *uio, uint32_t flags)
2337 {
2338 RGWReadV* rdv = static_cast<RGWReadV*>(uio->uio_p1);
2339 rdv->~RGWReadV();
2340 ::operator delete(rdv);
2341 }
2342
2343 int rgw_readv(struct rgw_fs *rgw_fs,
2344 struct rgw_file_handle *fh, rgw_uio *uio, uint32_t flags)
2345 {
2346 #if 0 /* XXX */
2347 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2348 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2349 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2350
2351 if (! rgw_fh->is_file())
2352 return -EINVAL;
2353
2354 int rc = 0;
2355
2356 buffer::list bl;
2357 RGWGetObjRequest req(cct, fs->get_user(), rgw_fh->bucket_name(),
2358 rgw_fh->object_name(), uio->uio_offset, uio->uio_resid,
2359 bl);
2360 req.do_hexdump = false;
2361
2362 rc = rgwlib.get_fe()->execute_req(&req);
2363
2364 if (! rc) {
2365 RGWReadV* rdv = static_cast<RGWReadV*>(
2366 ::operator new(sizeof(RGWReadV) +
2367 (bl.buffers().size() * sizeof(struct rgw_vio))));
2368
2369 (void) new (rdv)
2370 RGWReadV(bl, reinterpret_cast<rgw_vio*>(rdv+sizeof(RGWReadV)));
2371
2372 uio->uio_p1 = rdv;
2373 uio->uio_cnt = rdv->buffers().size();
2374 uio->uio_resid = rdv->length();
2375 uio->uio_vio = rdv->get_vio();
2376 uio->uio_rele = rgw_readv_rele;
2377
2378 int ix = 0;
2379 auto& buffers = rdv->buffers();
2380 for (auto& bp : buffers) {
2381 rgw_vio *vio = &(uio->uio_vio[ix]);
2382 vio->vio_base = const_cast<char*>(bp.c_str());
2383 vio->vio_len = bp.length();
2384 vio->vio_u1 = nullptr;
2385 vio->vio_p1 = nullptr;
2386 ++ix;
2387 }
2388 }
2389
2390 return rc;
2391 #else
2392 return 0;
2393 #endif
2394 }
2395
2396 /*
2397 write data to file (vector)
2398 */
2399 int rgw_writev(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2400 rgw_uio *uio, uint32_t flags)
2401 {
2402
2403 return -ENOTSUP;
2404
2405 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2406 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2407 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2408 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), *fs->get_user());
2409
2410 if (! rgw_fh->is_file())
2411 return -EINVAL;
2412
2413 buffer::list bl;
2414 for (unsigned int ix = 0; ix < uio->uio_cnt; ++ix) {
2415 rgw_vio *vio = &(uio->uio_vio[ix]);
2416 bl.push_back(
2417 buffer::create_static(vio->vio_len,
2418 static_cast<char*>(vio->vio_base)));
2419 }
2420
2421 std::string oname = rgw_fh->relative_object_name();
2422 RGWPutObjRequest req(cct, &ruser, rgw_fh->bucket_name(),
2423 oname, bl);
2424
2425 int rc = rgwlib.get_fe()->execute_req(&req);
2426
2427 /* XXX update size (in request) */
2428
2429 return rc;
2430 }
2431
2432 /*
2433 sync written data
2434 */
2435 int rgw_fsync(struct rgw_fs *rgw_fs, struct rgw_file_handle *handle,
2436 uint32_t flags)
2437 {
2438 return 0;
2439 }
2440
2441 int rgw_commit(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2442 uint64_t offset, uint64_t length, uint32_t flags)
2443 {
2444 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2445
2446 return rgw_fh->commit(offset, length, RGWFileHandle::FLAG_NONE);
2447 }
2448
2449 } /* extern "C" */