1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
4 #include "include/compat.h"
5 #include "include/rados/rgw_file.h"
11 #include "rgw_resolve.h"
15 #include "rgw_acl_s3.h"
16 #include "rgw_frontend.h"
17 #include "rgw_request.h"
18 #include "rgw_process.h"
19 #include "rgw_rest_user.h"
20 #include "rgw_rest_s3.h"
21 #include "rgw_os_lib.h"
22 #include "rgw_auth_s3.h"
24 #include "rgw_bucket.h"
27 #include "rgw_lib_frontend.h"
28 #include "rgw_perf_counters.h"
29 #include "common/errno.h"
31 #include "services/svc_zone.h"
35 #define dout_subsys ceph_subsys_rgw
42 const string
RGWFileHandle::root_name
= "/";
44 std::atomic
<uint32_t> RGWLibFS::fs_inst_counter
;
46 uint32_t RGWLibFS::write_completion_interval_s
= 10;
48 ceph::timer
<ceph::mono_clock
> RGWLibFS::write_timer
{
49 ceph::construct_suspended
};
51 inline int valid_fs_bucket_name(const string
& name
) {
52 int rc
= valid_s3_bucket_name(name
, false /* relaxed */);
54 if (name
.size() > 255)
61 inline int valid_fs_object_name(const string
& name
) {
62 int rc
= valid_s3_object_name(name
);
64 if (name
.size() > 1024)
74 std::size_t operator()(const rgw_xattrstr
& att
) const noexcept
{
75 return XXH64(att
.val
, att
.len
, 5882300);
82 bool operator()(const rgw_xattrstr
& lhs
, const rgw_xattrstr
& rhs
) const {
83 return ((lhs
.len
== rhs
.len
) &&
84 (strncmp(lhs
.val
, rhs
.val
, lhs
.len
) == 0));
88 /* well-known attributes */
89 static const std::unordered_set
<
90 rgw_xattrstr
, XattrHash
, XattrEqual
> rgw_exposed_attrs
= {
91 rgw_xattrstr
{const_cast<char*>(RGW_ATTR_ETAG
), sizeof(RGW_ATTR_ETAG
)-1}
94 static inline bool is_exposed_attr(const rgw_xattrstr
& k
) {
95 return (rgw_exposed_attrs
.find(k
) != rgw_exposed_attrs
.end());
98 LookupFHResult
RGWLibFS::stat_bucket(RGWFileHandle
* parent
, const char *path
,
99 RGWLibFS::BucketStats
& bs
,
102 LookupFHResult fhr
{nullptr, 0};
103 std::string bucket_name
{path
};
104 RGWStatBucketRequest
req(cct
, user
->clone(), bucket_name
, bs
);
106 int rc
= g_rgwlib
->get_fe()->execute_req(&req
);
108 (req
.get_ret() == 0) &&
110 fhr
= lookup_fh(parent
, path
,
111 (flags
& RGWFileHandle::FLAG_LOCKED
)|
112 RGWFileHandle::FLAG_CREATE
|
113 RGWFileHandle::FLAG_BUCKET
);
115 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
116 if (! (flags
& RGWFileHandle::FLAG_LOCKED
)) {
119 rgw_fh
->set_times(req
.get_ctime());
120 /* restore attributes */
121 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
122 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
123 if (ux_key
&& ux_attrs
) {
124 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
125 if (get
<0>(dar
) || get
<1>(dar
)) {
129 if (! (flags
& RGWFileHandle::FLAG_LOCKED
)) {
130 rgw_fh
->mtx
.unlock();
137 LookupFHResult
RGWLibFS::fake_leaf(RGWFileHandle
* parent
,
139 enum rgw_fh_type type
,
140 struct stat
*st
, uint32_t st_mask
,
143 /* synthesize a minimal handle from parent, path, type, and st */
146 flags
|= RGWFileHandle::FLAG_CREATE
;
149 case RGW_FS_TYPE_DIRECTORY
:
150 flags
|= RGWFileHandle::FLAG_DIRECTORY
;
157 LookupFHResult fhr
= lookup_fh(parent
, path
, flags
);
159 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
161 lock_guard
guard(rgw_fh
->mtx
);
162 if (st_mask
& RGW_SETATTR_SIZE
) {
163 rgw_fh
->set_size(st
->st_size
);
165 if (st_mask
& RGW_SETATTR_MTIME
) {
166 rgw_fh
->set_times(st
->st_mtim
);
171 } /* RGWLibFS::fake_leaf */
173 LookupFHResult
RGWLibFS::stat_leaf(RGWFileHandle
* parent
,
175 enum rgw_fh_type type
,
178 /* find either-of <object_name>, <object_name/>, only one of
179 * which should exist; atomicity? */
182 LookupFHResult fhr
{nullptr, 0};
184 /* XXX the need for two round-trip operations to identify file or
185 * directory leaf objects is unecessary--the current proposed
186 * mechanism to avoid this is to store leaf object names with an
187 * object locator w/o trailing slash */
189 std::string obj_path
= parent
->format_child_name(path
, false);
191 for (auto ix
: { 0, 1, 2 }) {
196 if (type
== RGW_FS_TYPE_DIRECTORY
)
199 RGWStatObjRequest
req(cct
, user
->clone(),
200 parent
->bucket_name(), obj_path
,
201 RGWStatObjRequest::FLAG_NONE
);
202 int rc
= g_rgwlib
->get_fe()->execute_req(&req
);
204 (req
.get_ret() == 0)) {
205 fhr
= lookup_fh(parent
, path
, RGWFileHandle::FLAG_CREATE
);
207 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
208 lock_guard
guard(rgw_fh
->mtx
);
209 rgw_fh
->set_size(req
.get_size());
210 rgw_fh
->set_times(req
.get_mtime());
211 /* restore attributes */
212 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
213 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
214 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
215 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
216 if (!(flags
& RGWFileHandle::FLAG_IN_CB
) &&
217 ux_key
&& ux_attrs
) {
218 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
219 if (get
<0>(dar
) || get
<1>(dar
)) {
232 if (type
== RGW_FS_TYPE_FILE
)
236 RGWStatObjRequest
req(cct
, user
->clone(),
237 parent
->bucket_name(), obj_path
,
238 RGWStatObjRequest::FLAG_NONE
);
239 int rc
= g_rgwlib
->get_fe()->execute_req(&req
);
241 (req
.get_ret() == 0)) {
242 fhr
= lookup_fh(parent
, path
, RGWFileHandle::FLAG_DIRECTORY
);
244 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
245 lock_guard
guard(rgw_fh
->mtx
);
246 rgw_fh
->set_size(req
.get_size());
247 rgw_fh
->set_times(req
.get_mtime());
248 /* restore attributes */
249 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
250 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
251 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
252 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
253 if (!(flags
& RGWFileHandle::FLAG_IN_CB
) &&
254 ux_key
&& ux_attrs
) {
255 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
256 if (get
<0>(dar
) || get
<1>(dar
)) {
267 std::string object_name
{path
};
268 RGWStatLeafRequest
req(cct
, user
->clone(),
269 parent
, object_name
);
270 int rc
= g_rgwlib
->get_fe()->execute_req(&req
);
272 (req
.get_ret() == 0)) {
274 /* we need rgw object's key name equal to file name, if
276 if ((flags
& RGWFileHandle::FLAG_EXACT_MATCH
) &&
277 !req
.exact_matched
) {
278 lsubdout(get_context(), rgw
, 15)
280 << ": stat leaf not exact match file name = "
284 fhr
= lookup_fh(parent
, path
,
285 RGWFileHandle::FLAG_CREATE
|
287 RGWFileHandle::FLAG_DIRECTORY
:
288 RGWFileHandle::FLAG_NONE
));
289 /* XXX we don't have an object--in general, there need not
290 * be one (just a path segment in some other object). In
291 * actual leaf an object exists, but we'd need another round
292 * trip to get attrs */
294 /* for now use the parent object's mtime */
295 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
296 lock_guard
guard(rgw_fh
->mtx
);
297 rgw_fh
->set_mtime(parent
->get_mtime());
310 } /* RGWLibFS::stat_leaf */
312 int RGWLibFS::read(RGWFileHandle
* rgw_fh
, uint64_t offset
, size_t length
,
313 size_t* bytes_read
, void* buffer
, uint32_t flags
)
315 if (! rgw_fh
->is_file())
318 if (rgw_fh
->deleted())
321 RGWReadRequest
req(get_context(), user
->clone(), rgw_fh
, offset
, length
, buffer
);
323 int rc
= g_rgwlib
->get_fe()->execute_req(&req
);
325 ((rc
= req
.get_ret()) == 0)) {
326 lock_guard
guard(rgw_fh
->mtx
);
327 rgw_fh
->set_atime(real_clock::to_timespec(real_clock::now()));
328 *bytes_read
= req
.nread
;
334 int RGWLibFS::readlink(RGWFileHandle
* rgw_fh
, uint64_t offset
, size_t length
,
335 size_t* bytes_read
, void* buffer
, uint32_t flags
)
337 if (! rgw_fh
->is_link())
340 if (rgw_fh
->deleted())
343 RGWReadRequest
req(get_context(), user
->clone(), rgw_fh
, offset
, length
, buffer
);
345 int rc
= g_rgwlib
->get_fe()->execute_req(&req
);
347 ((rc
= req
.get_ret()) == 0)) {
348 lock_guard(rgw_fh
->mtx
);
349 rgw_fh
->set_atime(real_clock::to_timespec(real_clock::now()));
350 *bytes_read
= req
.nread
;
356 int RGWLibFS::unlink(RGWFileHandle
* rgw_fh
, const char* name
, uint32_t flags
)
360 RGWFileHandle
* parent
= nullptr;
361 RGWFileHandle
* bkt_fh
= nullptr;
363 if (unlikely(flags
& RGWFileHandle::FLAG_UNLINK_THIS
)) {
365 parent
= rgw_fh
->get_parent();
369 LookupFHResult fhr
= lookup_fh(parent
, name
, RGWFileHandle::FLAG_LOCK
);
370 rgw_fh
= get
<0>(fhr
);
374 if (parent
->is_root()) {
375 /* a bucket may have an object storing Unix attributes, check
376 * for and delete it */
378 fhr
= stat_bucket(parent
, name
, bs
, (rgw_fh
) ?
379 RGWFileHandle::FLAG_LOCKED
:
380 RGWFileHandle::FLAG_NONE
);
381 bkt_fh
= get
<0>(fhr
);
382 if (unlikely(! bkt_fh
)) {
383 /* implies !rgw_fh, so also !LOCKED */
387 if (bs
.num_entries
> 1) {
388 unref(bkt_fh
); /* return stat_bucket ref */
389 if (likely(!! rgw_fh
)) { /* return lock and ref from
390 * lookup_fh (or caller in the
392 * RGWFileHandle::FLAG_UNLINK_THIS) */
393 rgw_fh
->mtx
.unlock();
398 /* delete object w/key "<bucket>/" (uxattrs), if any */
400 RGWDeleteObjRequest
req(cct
, user
->clone(), bkt_fh
->bucket_name(), oname
);
401 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
402 /* don't care if ENOENT */
407 RGWDeleteBucketRequest
req(cct
, user
->clone(), bname
);
408 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
417 /* XXX for now, peform a hard lookup to deduce the type of
418 * object to be deleted ("foo" vs. "foo/")--also, ensures
419 * atomicity at this endpoint */
420 struct rgw_file_handle
*fh
;
421 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &fh
,
422 nullptr /* st */, 0 /* mask */,
423 RGW_LOOKUP_FLAG_NONE
);
428 rgw_fh
= get_rgwfh(fh
);
429 rgw_fh
->mtx
.lock(); /* LOCKED */
432 std::string oname
= rgw_fh
->relative_object_name();
433 if (rgw_fh
->is_dir()) {
434 /* for the duration of our cache timer, trust positive
436 if (rgw_fh
->has_children()) {
437 rgw_fh
->mtx
.unlock();
443 RGWDeleteObjRequest
req(cct
, user
->clone(), parent
->bucket_name(), oname
);
444 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
450 /* ENOENT when raced with other s3 gateway */
451 if (! rc
|| rc
== -ENOENT
) {
452 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
453 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
454 RGWFileHandle::FHCache::FLAG_LOCK
);
458 real_time t
= real_clock::now();
459 parent
->set_mtime(real_clock::to_timespec(t
));
460 parent
->set_ctime(real_clock::to_timespec(t
));
463 rgw_fh
->mtx
.unlock();
467 } /* RGWLibFS::unlink */
469 int RGWLibFS::rename(RGWFileHandle
* src_fh
, RGWFileHandle
* dst_fh
,
470 const char *_src_name
, const char *_dst_name
)
473 /* XXX initial implementation: try-copy, and delete if copy
478 std::string src_name
{_src_name
};
479 std::string dst_name
{_dst_name
};
482 LookupFHResult fhr
= lookup_fh(src_fh
, _src_name
, RGWFileHandle::FLAG_LOCK
);
483 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
485 /* should not happen */
487 ldout(get_context(), 0) << __func__
488 << " BUG no such src renaming path="
494 /* forbid renaming of directories (unreasonable at scale) */
495 if (rgw_fh
->is_dir()) {
496 ldout(get_context(), 12) << __func__
497 << " rejecting attempt to rename directory path="
498 << rgw_fh
->full_object_name()
504 /* forbid renaming open files (violates intent, for now) */
505 if (rgw_fh
->is_open()) {
506 ldout(get_context(), 12) << __func__
507 << " rejecting attempt to rename open file path="
508 << rgw_fh
->full_object_name()
514 t
= real_clock::now();
516 for (int ix
: {0, 1}) {
520 RGWCopyObjRequest
req(cct
, user
->clone(), src_fh
, dst_fh
, src_name
, dst_name
);
521 int rc
= g_rgwlib
->get_fe()->execute_req(&req
);
523 ((rc
= req
.get_ret()) != 0)) {
524 ldout(get_context(), 1)
526 << " rename step 0 failed src="
527 << src_fh
->full_object_name() << " " << src_name
528 << " dst=" << dst_fh
->full_object_name()
534 ldout(get_context(), 12)
536 << " rename step 0 success src="
537 << src_fh
->full_object_name() << " " << src_name
538 << " dst=" << dst_fh
->full_object_name()
542 /* update dst change id */
543 dst_fh
->set_times(t
);
548 rc
= this->unlink(rgw_fh
/* LOCKED */, _src_name
,
549 RGWFileHandle::FLAG_UNLINK_THIS
);
552 ldout(get_context(), 12)
554 << " rename step 1 success src="
555 << src_fh
->full_object_name() << " " << src_name
556 << " dst=" << dst_fh
->full_object_name()
560 /* update src change id */
561 src_fh
->set_times(t
);
563 ldout(get_context(), 1)
565 << " rename step 1 failed src="
566 << src_fh
->full_object_name() << " " << src_name
567 << " dst=" << dst_fh
->full_object_name()
579 rgw_fh
->mtx
.unlock(); /* !LOCKED */
580 unref(rgw_fh
); /* -ref */
584 } /* RGWLibFS::rename */
586 MkObjResult
RGWLibFS::mkdir(RGWFileHandle
* parent
, const char *name
,
587 struct stat
*st
, uint32_t mask
, uint32_t flags
)
590 rgw_file_handle
*lfh
;
592 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
593 nullptr /* st */, 0 /* mask */,
594 RGW_LOOKUP_FLAG_NONE
);
597 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
598 // ignore return code
599 return MkObjResult
{nullptr, -EEXIST
};
602 MkObjResult mkr
{nullptr, -EINVAL
};
604 RGWFileHandle
* rgw_fh
= nullptr;
605 buffer::list ux_key
, ux_attrs
;
607 fhr
= lookup_fh(parent
, name
,
608 RGWFileHandle::FLAG_CREATE
|
609 RGWFileHandle::FLAG_DIRECTORY
|
610 RGWFileHandle::FLAG_LOCK
);
611 rgw_fh
= get
<0>(fhr
);
613 rgw_fh
->create_stat(st
, mask
);
614 rgw_fh
->set_times(real_clock::now());
616 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
618 rgw_fh
->stat(st
, RGWFileHandle::FLAG_LOCKED
);
619 get
<0>(mkr
) = rgw_fh
;
625 if (parent
->is_root()) {
628 /* enforce S3 name restrictions */
629 rc
= valid_fs_bucket_name(bname
);
631 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
632 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
633 RGWFileHandle::FHCache::FLAG_LOCK
);
634 rgw_fh
->mtx
.unlock();
636 get
<0>(mkr
) = nullptr;
641 RGWCreateBucketRequest
req(get_context(), user
->clone(), bname
);
644 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
645 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
647 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
650 /* create an object representing the directory */
652 string dir_name
= parent
->format_child_name(name
, true);
654 /* need valid S3 name (characters, length <= 1024, etc) */
655 rc
= valid_fs_object_name(dir_name
);
657 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
658 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
659 RGWFileHandle::FHCache::FLAG_LOCK
);
660 rgw_fh
->mtx
.unlock();
662 get
<0>(mkr
) = nullptr;
667 RGWPutObjRequest
req(get_context(), user
->clone(), parent
->bucket_name(), dir_name
, bl
);
670 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
671 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
673 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
680 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
681 rgw_fh
->mtx
.unlock(); /* !LOCKED */
683 get
<0>(mkr
) = nullptr;
688 real_time t
= real_clock::now();
689 parent
->set_mtime(real_clock::to_timespec(t
));
690 parent
->set_ctime(real_clock::to_timespec(t
));
691 rgw_fh
->mtx
.unlock(); /* !LOCKED */
697 } /* RGWLibFS::mkdir */
699 MkObjResult
RGWLibFS::create(RGWFileHandle
* parent
, const char *name
,
700 struct stat
*st
, uint32_t mask
, uint32_t flags
)
706 rgw_file_handle
*lfh
;
707 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
708 nullptr /* st */, 0 /* mask */,
709 RGW_LOOKUP_FLAG_NONE
);
712 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
713 // ignore return code
714 return MkObjResult
{nullptr, -EEXIST
};
717 /* expand and check name */
718 std::string obj_name
= parent
->format_child_name(name
, false);
719 rc
= valid_fs_object_name(obj_name
);
721 return MkObjResult
{nullptr, rc
};
726 RGWPutObjRequest
req(cct
, user
->clone(), parent
->bucket_name(), obj_name
, bl
);
727 MkObjResult mkr
{nullptr, -EINVAL
};
729 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
735 LookupFHResult fhr
= lookup_fh(parent
, name
, RGWFileHandle::FLAG_CREATE
|
736 RGWFileHandle::FLAG_LOCK
);
737 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
739 if (get
<1>(fhr
) & RGWFileHandle::FLAG_CREATE
) {
740 /* fill in stat data */
741 real_time t
= real_clock::now();
742 rgw_fh
->create_stat(st
, mask
);
743 rgw_fh
->set_times(t
);
745 parent
->set_mtime(real_clock::to_timespec(t
));
746 parent
->set_ctime(real_clock::to_timespec(t
));
749 (void) rgw_fh
->stat(st
, RGWFileHandle::FLAG_LOCKED
);
751 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
752 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
754 get
<0>(mkr
) = rgw_fh
;
755 rgw_fh
->file_ondisk_version
= 0; // inital version
756 rgw_fh
->mtx
.unlock();
763 /* case like : quota exceed will be considered as fail too*/
768 } /* RGWLibFS::create */
770 MkObjResult
RGWLibFS::symlink(RGWFileHandle
* parent
, const char *name
,
771 const char* link_path
, struct stat
*st
, uint32_t mask
, uint32_t flags
)
777 rgw_file_handle
*lfh
;
778 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
779 nullptr /* st */, 0 /* mask */,
780 RGW_LOOKUP_FLAG_NONE
);
783 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
784 // ignore return code
785 return MkObjResult
{nullptr, -EEXIST
};
788 MkObjResult mkr
{nullptr, -EINVAL
};
790 RGWFileHandle
* rgw_fh
= nullptr;
791 buffer::list ux_key
, ux_attrs
;
793 fhr
= lookup_fh(parent
, name
,
794 RGWFileHandle::FLAG_CREATE
|
795 RGWFileHandle::FLAG_SYMBOLIC_LINK
|
796 RGWFileHandle::FLAG_LOCK
);
797 rgw_fh
= get
<0>(fhr
);
799 rgw_fh
->create_stat(st
, mask
);
800 rgw_fh
->set_times(real_clock::now());
802 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
805 get
<0>(mkr
) = rgw_fh
;
811 /* need valid S3 name (characters, length <= 1024, etc) */
812 rc
= valid_fs_object_name(name
);
814 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
815 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
816 RGWFileHandle::FHCache::FLAG_LOCK
);
817 rgw_fh
->mtx
.unlock();
819 get
<0>(mkr
) = nullptr;
824 string obj_name
= std::string(name
);
825 /* create an object representing the directory */
831 buffer::create_static(len
, static_cast<char*>(buffer
)));
835 buffer::copy(link_path
, strlen(link_path
)));
838 RGWPutObjRequest
req(get_context(), user
->clone(), parent
->bucket_name(), obj_name
, bl
);
841 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
842 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
844 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
849 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
850 rgw_fh
->mtx
.unlock(); /* !LOCKED */
852 get
<0>(mkr
) = nullptr;
857 real_time t
= real_clock::now();
858 parent
->set_mtime(real_clock::to_timespec(t
));
859 parent
->set_ctime(real_clock::to_timespec(t
));
860 rgw_fh
->mtx
.unlock(); /* !LOCKED */
866 } /* RGWLibFS::symlink */
868 int RGWLibFS::getattr(RGWFileHandle
* rgw_fh
, struct stat
* st
)
870 switch(rgw_fh
->fh
.fh_type
) {
871 case RGW_FS_TYPE_FILE
:
873 if (rgw_fh
->deleted())
880 /* if rgw_fh is a directory, mtime will be advanced */
881 return rgw_fh
->stat(st
);
882 } /* RGWLibFS::getattr */
884 int RGWLibFS::setattr(RGWFileHandle
* rgw_fh
, struct stat
* st
, uint32_t mask
,
888 buffer::list ux_key
, ux_attrs
;
889 buffer::list etag
= rgw_fh
->get_etag();
890 buffer::list acls
= rgw_fh
->get_acls();
892 lock_guard
guard(rgw_fh
->mtx
);
894 switch(rgw_fh
->fh
.fh_type
) {
895 case RGW_FS_TYPE_FILE
:
897 if (rgw_fh
->deleted())
905 string obj_name
{rgw_fh
->relative_object_name()};
907 if (rgw_fh
->is_dir() &&
908 (likely(! rgw_fh
->is_bucket()))) {
912 RGWSetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
914 rgw_fh
->create_stat(st
, mask
);
915 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
918 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
919 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
920 req
.emplace_attr(RGW_ATTR_ETAG
, std::move(etag
));
921 req
.emplace_attr(RGW_ATTR_ACL
, std::move(acls
));
923 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
927 /* special case: materialize placeholder dir */
929 RGWPutObjRequest
req(get_context(), user
->clone(), rgw_fh
->bucket_name(), obj_name
, bl
);
931 rgw_fh
->encode_attrs(ux_key
, ux_attrs
); /* because std::moved */
934 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
935 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
937 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
941 if ((rc
!= 0) || (rc2
!= 0)) {
945 rgw_fh
->set_ctime(real_clock::to_timespec(real_clock::now()));
948 } /* RGWLibFS::setattr */
950 static inline std::string
prefix_xattr_keystr(const rgw_xattrstr
& key
) {
952 keystr
.reserve(sizeof(RGW_ATTR_META_PREFIX
) + key
.len
);
953 keystr
+= string
{RGW_ATTR_META_PREFIX
};
954 keystr
+= string
{key
.val
, key
.len
};
958 static inline std::string_view
unprefix_xattr_keystr(const std::string
& key
)
960 std::string_view svk
{key
};
961 auto pos
= svk
.find(RGW_ATTR_META_PREFIX
);
962 if (pos
== std::string_view::npos
) {
963 return std::string_view
{""};
964 } else if (pos
== 0) {
965 svk
.remove_prefix(sizeof(RGW_ATTR_META_PREFIX
)-1);
970 int RGWLibFS::getxattrs(RGWFileHandle
* rgw_fh
, rgw_xattrlist
*attrs
,
971 rgw_getxattr_cb cb
, void *cb_arg
,
974 /* cannot store on fs_root, should not on buckets? */
975 if ((rgw_fh
->is_bucket()) ||
976 (rgw_fh
->is_root())) {
981 string obj_name
{rgw_fh
->relative_object_name2()};
983 RGWGetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
985 for (uint32_t ix
= 0; ix
< attrs
->xattr_cnt
; ++ix
) {
986 auto& xattr
= attrs
->xattrs
[ix
];
988 /* pass exposed attr keys as given, else prefix */
989 std::string k
= is_exposed_attr(xattr
.key
)
990 ? std::string
{xattr
.key
.val
, xattr
.key
.len
}
991 : prefix_xattr_keystr(xattr
.key
);
993 req
.emplace_key(std::move(k
));
996 if (ldlog_p1(get_context(), ceph_subsys_rgw
, 15)) {
997 lsubdout(get_context(), rgw
, 15)
1000 << rgw_fh
->object_name()
1003 for (const auto& attr
: req
.get_attrs()) {
1004 lsubdout(get_context(), rgw
, 15)
1005 << "\tkey: " << attr
.first
<< dendl
;
1009 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
1010 rc2
= req
.get_ret();
1011 rc3
= ((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
;
1013 /* call back w/xattr data */
1015 const auto& attrs
= req
.get_attrs();
1016 for (const auto& attr
: attrs
) {
1018 if (!attr
.second
.has_value())
1021 const auto& k
= attr
.first
;
1022 const auto& v
= attr
.second
.value();
1024 /* return exposed attr keys as given, else unprefix --
1025 * yes, we could have memoized the exposed check, but
1026 * to be efficient it would need to be saved with
1027 * RGWGetAttrs::attrs, I think */
1028 std::string_view svk
=
1029 is_exposed_attr(rgw_xattrstr
{const_cast<char*>(k
.c_str()),
1030 uint32_t(k
.length())})
1032 : unprefix_xattr_keystr(k
);
1034 /* skip entries not matching prefix */
1038 rgw_xattrstr xattr_k
= { const_cast<char*>(svk
.data()),
1039 uint32_t(svk
.length())};
1040 rgw_xattrstr xattr_v
=
1041 {const_cast<char*>(const_cast<buffer::list
&>(v
).c_str()),
1042 uint32_t(v
.length())};
1043 rgw_xattr xattr
= { xattr_k
, xattr_v
};
1044 rgw_xattrlist xattrlist
= { &xattr
, 1 };
1046 cb(&xattrlist
, cb_arg
, RGW_GETXATTR_FLAG_NONE
);
1051 } /* RGWLibFS::getxattrs */
1053 int RGWLibFS::lsxattrs(
1054 RGWFileHandle
* rgw_fh
, rgw_xattrstr
*filter_prefix
, rgw_getxattr_cb cb
,
1055 void *cb_arg
, uint32_t flags
)
1057 /* cannot store on fs_root, should not on buckets? */
1058 if ((rgw_fh
->is_bucket()) ||
1059 (rgw_fh
->is_root())) {
1064 string obj_name
{rgw_fh
->relative_object_name2()};
1066 RGWGetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1068 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
1069 rc2
= req
.get_ret();
1070 rc3
= ((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
;
1072 /* call back w/xattr data--check for eof */
1074 const auto& keys
= req
.get_attrs();
1075 for (const auto& k
: keys
) {
1077 /* return exposed attr keys as given, else unprefix */
1078 std::string_view svk
=
1079 is_exposed_attr(rgw_xattrstr
{const_cast<char*>(k
.first
.c_str()),
1080 uint32_t(k
.first
.length())})
1082 : unprefix_xattr_keystr(k
.first
);
1084 /* skip entries not matching prefix */
1088 rgw_xattrstr xattr_k
= { const_cast<char*>(svk
.data()),
1089 uint32_t(svk
.length())};
1090 rgw_xattrstr xattr_v
= { nullptr, 0 };
1091 rgw_xattr xattr
= { xattr_k
, xattr_v
};
1092 rgw_xattrlist xattrlist
= { &xattr
, 1 };
1094 auto cbr
= cb(&xattrlist
, cb_arg
, RGW_LSXATTR_FLAG_NONE
);
1095 if (cbr
& RGW_LSXATTR_FLAG_STOP
)
1101 } /* RGWLibFS::lsxattrs */
1103 int RGWLibFS::setxattrs(RGWFileHandle
* rgw_fh
, rgw_xattrlist
*attrs
,
1106 /* cannot store on fs_root, should not on buckets? */
1107 if ((rgw_fh
->is_bucket()) ||
1108 (rgw_fh
->is_root())) {
1113 string obj_name
{rgw_fh
->relative_object_name2()};
1115 RGWSetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1117 for (uint32_t ix
= 0; ix
< attrs
->xattr_cnt
; ++ix
) {
1118 auto& xattr
= attrs
->xattrs
[ix
];
1119 buffer::list attr_bl
;
1120 /* don't allow storing at RGW_ATTR_META_PREFIX */
1121 if (! (xattr
.key
.len
> 0))
1124 /* reject lexical match with any exposed attr */
1125 if (is_exposed_attr(xattr
.key
))
1128 string k
= prefix_xattr_keystr(xattr
.key
);
1129 attr_bl
.append(xattr
.val
.val
, xattr
.val
.len
);
1130 req
.emplace_attr(k
.c_str(), std::move(attr_bl
));
1133 /* don't send null requests */
1134 if (! (req
.get_attrs().size() > 0)) {
1138 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
1139 rc2
= req
.get_ret();
1141 return (((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
);
1143 } /* RGWLibFS::setxattrs */
1145 int RGWLibFS::rmxattrs(RGWFileHandle
* rgw_fh
, rgw_xattrlist
* attrs
,
1148 /* cannot store on fs_root, should not on buckets? */
1149 if ((rgw_fh
->is_bucket()) ||
1150 (rgw_fh
->is_root())) {
1155 string obj_name
{rgw_fh
->relative_object_name2()};
1157 RGWRMAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1159 for (uint32_t ix
= 0; ix
< attrs
->xattr_cnt
; ++ix
) {
1160 auto& xattr
= attrs
->xattrs
[ix
];
1161 /* don't allow storing at RGW_ATTR_META_PREFIX */
1162 if (! (xattr
.key
.len
> 0)) {
1165 string k
= prefix_xattr_keystr(xattr
.key
);
1166 req
.emplace_key(std::move(k
));
1169 /* don't send null requests */
1170 if (! (req
.get_attrs().size() > 0)) {
1174 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
1175 rc2
= req
.get_ret();
1177 return (((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
);
1179 } /* RGWLibFS::rmxattrs */
1181 /* called with rgw_fh->mtx held */
1182 void RGWLibFS::update_fh(RGWFileHandle
*rgw_fh
)
1185 string obj_name
{rgw_fh
->relative_object_name()};
1186 buffer::list ux_key
, ux_attrs
;
1188 if (rgw_fh
->is_dir() &&
1189 (likely(! rgw_fh
->is_bucket()))) {
1193 lsubdout(get_context(), rgw
, 17)
1195 << " update old versioned fh : " << obj_name
1198 RGWSetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1200 rgw_fh
->encode_attrs(ux_key
, ux_attrs
, false);
1202 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
1203 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
1205 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
1206 rc2
= req
.get_ret();
1208 if ((rc
!= 0) || (rc2
!= 0)) {
1209 lsubdout(get_context(), rgw
, 17)
1211 << " update fh failed : " << obj_name
1214 } /* RGWLibFS::update_fh */
1216 void RGWLibFS::close()
1218 state
.flags
|= FLAG_CLOSED
;
1224 explicit ObjUnref(RGWLibFS
* _fs
) : fs(_fs
) {}
1225 void operator()(RGWFileHandle
* fh
) const {
1226 lsubdout(fs
->get_context(), rgw
, 5)
1227 << __PRETTY_FUNCTION__
1229 << " before ObjUnref refs=" << fh
->get_refcnt()
1235 /* force cache drain, forces objects to evict */
1236 fh_cache
.drain(ObjUnref(this),
1237 RGWFileHandle::FHCache::FLAG_LOCK
);
1238 g_rgwlib
->get_fe()->get_process()->unregister_fs(this);
1240 } /* RGWLibFS::close */
1242 inline std::ostream
& operator<<(std::ostream
&os
, fh_key
const &fhk
) {
1243 os
<< "<fh_key: bucket=";
1244 os
<< fhk
.fh_hk
.bucket
;
1246 os
<< fhk
.fh_hk
.object
;
1251 inline std::ostream
& operator<<(std::ostream
&os
, struct timespec
const &ts
) {
1252 os
<< "<timespec: tv_sec=";
1260 std::ostream
& operator<<(std::ostream
&os
, RGWLibFS::event
const &ev
) {
1263 case RGWLibFS::event::type::READDIR
:
1264 os
<< "type=READDIR;";
1267 os
<< "type=UNKNOWN;";
1270 os
<< "fid=" << ev
.fhk
.fh_hk
.bucket
<< ":" << ev
.fhk
.fh_hk
.object
1271 << ";ts=" << ev
.ts
<< ">";
1278 using directory
= RGWFileHandle::directory
;
1280 /* dirent invalidate timeout--basically, the upper-bound on
1281 * inconsistency with the S3 namespace */
1283 = get_context()->_conf
->rgw_nfs_namespace_expire_secs
;
1285 /* max events to gc in one cycle */
1286 uint32_t max_ev
= get_context()->_conf
->rgw_nfs_max_gc
;
1288 struct timespec now
, expire_ts
;
1291 std::deque
<event
> &events
= state
.events
;
1294 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
);
1295 lsubdout(get_context(), rgw
, 15)
1296 << "GC: top of expire loop"
1298 << " expire_s=" << expire_s
1301 lock_guard
guard(state
.mtx
); /* LOCKED */
1302 lsubdout(get_context(), rgw
, 15)
1304 << " count=" << events
.size()
1307 /* just return if no events */
1308 if (events
.empty()) {
1312 (events
.size() < 500) ? max_ev
: (events
.size() / 4);
1313 for (uint32_t ix
= 0; (ix
< _max_ev
) && (events
.size() > 0); ++ix
) {
1314 event
& ev
= events
.front();
1316 expire_ts
.tv_sec
+= expire_s
;
1317 if (expire_ts
> now
) {
1326 for (auto& ev
: ve
) {
1327 lsubdout(get_context(), rgw
, 15)
1328 << "try-expire ev: " << ev
<< dendl
;
1329 if (likely(ev
.t
== event::type::READDIR
)) {
1330 RGWFileHandle
* rgw_fh
= lookup_handle(ev
.fhk
.fh_hk
);
1331 lsubdout(get_context(), rgw
, 15)
1332 << "ev rgw_fh: " << rgw_fh
<< dendl
;
1334 RGWFileHandle::directory
* d
;
1335 if (unlikely(! rgw_fh
->is_dir())) {
1336 lsubdout(get_context(), rgw
, 0)
1338 << " BUG non-directory found with READDIR event "
1339 << "(" << rgw_fh
->bucket_name() << ","
1340 << rgw_fh
->object_name() << ")"
1344 /* maybe clear state */
1345 d
= get
<directory
>(&rgw_fh
->variant_type
);
1347 struct timespec ev_ts
= ev
.ts
;
1348 lock_guard
guard(rgw_fh
->mtx
);
1349 struct timespec d_last_readdir
= d
->last_readdir
;
1350 if (unlikely(ev_ts
< d_last_readdir
)) {
1351 /* readdir cycle in progress, don't invalidate */
1352 lsubdout(get_context(), rgw
, 15)
1353 << "GC: delay expiration for "
1354 << rgw_fh
->object_name()
1355 << " ev.ts=" << ev_ts
1356 << " last_readdir=" << d_last_readdir
1360 lsubdout(get_context(), rgw
, 15)
1362 << rgw_fh
->object_name()
1364 rgw_fh
->clear_state();
1365 rgw_fh
->invalidate();
1371 } /* event::type::READDIR */
1374 } while (! (stop
|| shutdown
));
1375 } /* RGWLibFS::gc */
1377 std::ostream
& operator<<(std::ostream
&os
,
1378 RGWFileHandle
const &rgw_fh
)
1380 const auto& fhk
= rgw_fh
.get_key();
1381 const auto& fh
= const_cast<RGWFileHandle
&>(rgw_fh
).get_fh();
1382 os
<< "<RGWFileHandle:";
1383 os
<< "addr=" << &rgw_fh
<< ";";
1384 switch (fh
->fh_type
) {
1385 case RGW_FS_TYPE_DIRECTORY
:
1386 os
<< "type=DIRECTORY;";
1388 case RGW_FS_TYPE_FILE
:
1392 os
<< "type=UNKNOWN;";
1395 os
<< "fid=" << fhk
.fh_hk
.bucket
<< ":" << fhk
.fh_hk
.object
<< ";";
1396 os
<< "name=" << rgw_fh
.object_name() << ";";
1397 os
<< "refcnt=" << rgw_fh
.get_refcnt() << ";";
1402 RGWFileHandle::~RGWFileHandle() {
1403 /* !recycle case, handle may STILL be in handle table, BUT
1404 * the partition lock is not held in this path */
1405 if (fh_hook
.is_linked()) {
1406 fs
->fh_cache
.remove(fh
.fh_hk
.object
, this, FHCache::FLAG_LOCK
);
1408 /* cond-unref parent */
1409 if (parent
&& (! parent
->is_mount())) {
1410 /* safe because if parent->unref causes its deletion,
1411 * there are a) by refcnt, no other objects/paths pointing
1412 * to it and b) by the semantics of valid iteration of
1413 * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
1414 * no unsafe iterators reaching it either--n.b., this constraint
1415 * is binding oncode which may in future attempt to e.g.,
1416 * cause the eviction of objects in LRU order */
1417 (void) get_fs()->unref(parent
);
1421 fh_key
RGWFileHandle::make_fhk(const std::string
& name
)
1423 std::string tenant
= get_fs()->get_user()->user_id
.to_str();
1425 /* S3 bucket -- assert mount-at-bucket case reaches here */
1426 return fh_key(name
, name
, tenant
);
1428 std::string key_name
= make_key_name(name
.c_str());
1429 return fh_key(fhk
.fh_hk
.bucket
, key_name
.c_str(), tenant
);
1433 void RGWFileHandle::encode_attrs(ceph::buffer::list
& ux_key1
,
1434 ceph::buffer::list
& ux_attrs1
,
1438 fh_key
fhk(this->fh
.fh_hk
);
1439 encode(fhk
, ux_key1
);
1440 bool need_ondisk_version
=
1441 (fh
.fh_type
== RGW_FS_TYPE_FILE
||
1442 fh
.fh_type
== RGW_FS_TYPE_SYMBOLIC_LINK
);
1443 if (need_ondisk_version
&&
1444 file_ondisk_version
< 0) {
1445 file_ondisk_version
= 0;
1447 encode(*this, ux_attrs1
);
1448 if (need_ondisk_version
&& inc_ov
) {
1449 file_ondisk_version
++;
1451 } /* RGWFileHandle::encode_attrs */
1453 DecodeAttrsResult
RGWFileHandle::decode_attrs(const ceph::buffer::list
* ux_key1
,
1454 const ceph::buffer::list
* ux_attrs1
)
1457 DecodeAttrsResult dar
{ false, false };
1459 auto bl_iter_key1
= ux_key1
->cbegin();
1460 decode(fhk
, bl_iter_key1
);
1463 // decode to a temporary file handle which may not be
1464 // copied to the current file handle if its file_ondisk_version
1466 RGWFileHandle
tmp_fh(fs
);
1467 tmp_fh
.fh
.fh_type
= fh
.fh_type
;
1468 auto bl_iter_unix1
= ux_attrs1
->cbegin();
1469 decode(tmp_fh
, bl_iter_unix1
);
1471 fh
.fh_type
= tmp_fh
.fh
.fh_type
;
1472 // for file handles that represent files and whose file_ondisk_version
1473 // is newer, no updates are need, otherwise, go updating the current
1475 if (!((fh
.fh_type
== RGW_FS_TYPE_FILE
||
1476 fh
.fh_type
== RGW_FS_TYPE_SYMBOLIC_LINK
) &&
1477 file_ondisk_version
>= tmp_fh
.file_ondisk_version
)) {
1478 // make sure the following "encode" always encode a greater version
1479 file_ondisk_version
= tmp_fh
.file_ondisk_version
+ 1;
1480 state
.dev
= tmp_fh
.state
.dev
;
1481 state
.size
= tmp_fh
.state
.size
;
1482 state
.nlink
= tmp_fh
.state
.nlink
;
1483 state
.owner_uid
= tmp_fh
.state
.owner_uid
;
1484 state
.owner_gid
= tmp_fh
.state
.owner_gid
;
1485 state
.unix_mode
= tmp_fh
.state
.unix_mode
;
1486 state
.ctime
= tmp_fh
.state
.ctime
;
1487 state
.mtime
= tmp_fh
.state
.mtime
;
1488 state
.atime
= tmp_fh
.state
.atime
;
1489 state
.version
= tmp_fh
.state
.version
;
1492 if (this->state
.version
< 2) {
1497 } /* RGWFileHandle::decode_attrs */
1499 bool RGWFileHandle::reclaim(const cohort::lru::ObjectFactory
* newobj_fac
) {
1500 lsubdout(fs
->get_context(), rgw
, 17)
1501 << __func__
<< " " << *this
1503 auto factory
= dynamic_cast<const RGWFileHandle::Factory
*>(newobj_fac
);
1504 if (factory
== nullptr) {
1507 /* make sure the reclaiming object is the same partiton with newobject factory,
1508 * then we can recycle the object, and replace with newobject */
1509 if (!fs
->fh_cache
.is_same_partition(factory
->fhk
.fh_hk
.object
, fh
.fh_hk
.object
)) {
1512 /* in the non-delete case, handle may still be in handle table */
1513 if (fh_hook
.is_linked()) {
1514 /* in this case, we are being called from a context which holds
1515 * the partition lock */
1516 fs
->fh_cache
.remove(fh
.fh_hk
.object
, this, FHCache::FLAG_NONE
);
1519 } /* RGWFileHandle::reclaim */
1521 bool RGWFileHandle::has_children() const
1523 if (unlikely(! is_dir()))
1526 RGWRMdirCheck
req(fs
->get_context(),
1527 g_rgwlib
->get_driver()->get_user(fs
->get_user()->user_id
),
1529 int rc
= g_rgwlib
->get_fe()->execute_req(&req
);
1531 return req
.valid
&& req
.has_children
;
1537 std::ostream
& operator<<(std::ostream
&os
,
1538 RGWFileHandle::readdir_offset
const &offset
)
1541 if (unlikely(!! get
<uint64_t*>(&offset
))) {
1542 uint64_t* ioff
= get
<uint64_t*>(offset
);
1546 os
<< get
<const char*>(offset
);
1550 int RGWFileHandle::readdir(rgw_readdir_cb rcb
, void *cb_arg
,
1551 readdir_offset offset
,
1552 bool *eof
, uint32_t flags
)
1554 using event
= RGWLibFS::event
;
1557 struct timespec now
;
1558 CephContext
* cct
= fs
->get_context();
1560 lsubdout(cct
, rgw
, 10)
1561 << __func__
<< " readdir called on "
1565 directory
* d
= get
<directory
>(&variant_type
);
1567 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1568 lock_guard
guard(mtx
);
1569 d
->last_readdir
= now
;
1575 if (likely(!! get
<const char*>(&offset
))) {
1576 mk
= const_cast<char*>(get
<const char*>(offset
));
1579 initial_off
= (*get
<uint64_t*>(offset
) == 0);
1583 RGWListBucketsRequest
req(cct
, g_rgwlib
->get_driver()->get_user(fs
->get_user()->user_id
),
1584 this, rcb
, cb_arg
, offset
);
1585 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
1587 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1588 lock_guard
guard(mtx
);
1592 inc_nlink(req
.d_count
);
1596 RGWReaddirRequest
req(cct
, g_rgwlib
->get_driver()->get_user(fs
->get_user()->user_id
),
1597 this, rcb
, cb_arg
, offset
);
1598 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
1600 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1601 lock_guard
guard(mtx
);
1605 inc_nlink(req
.d_count
);
1610 event
ev(event::type::READDIR
, get_key(), state
.atime
);
1611 lock_guard
sguard(fs
->state
.mtx
);
1612 fs
->state
.push_event(ev
);
1614 lsubdout(fs
->get_context(), rgw
, 15)
1616 << " final link count=" << state
.nlink
1620 } /* RGWFileHandle::readdir */
1622 int RGWFileHandle::write(uint64_t off
, size_t len
, size_t *bytes_written
,
1626 using WriteCompletion
= RGWLibFS::WriteCompletion
;
1628 lock_guard
guard(mtx
);
1632 file
* f
= get
<file
>(&variant_type
);
1637 lsubdout(fs
->get_context(), rgw
, 5)
1639 << " write attempted on deleted object "
1640 << this->object_name()
1642 /* zap write transaction, if any */
1644 delete f
->write_req
;
1645 f
->write_req
= nullptr;
1650 if (! f
->write_req
) {
1651 /* guard--we do not support (e.g., COW-backed) partial writes */
1653 lsubdout(fs
->get_context(), rgw
, 5)
1655 << " " << object_name()
1656 << " non-0 initial write position " << off
1657 << " (mounting with -o sync required)"
1662 const RGWProcessEnv
& penv
= g_rgwlib
->get_fe()->get_process()->get_env();
1665 std::string object_name
= relative_object_name();
1667 new RGWWriteRequest(g_rgwlib
->get_driver(), penv
,
1668 g_rgwlib
->get_driver()->get_user(fs
->get_user()->user_id
),
1669 this, bucket_name(), object_name
);
1670 rc
= g_rgwlib
->get_fe()->start_req(f
->write_req
);
1672 lsubdout(fs
->get_context(), rgw
, 5)
1674 << this->object_name()
1675 << " write start failed " << off
1676 << " (" << rc
<< ")"
1678 /* zap failed write transaction */
1679 delete f
->write_req
;
1680 f
->write_req
= nullptr;
1683 if (stateless_open()) {
1684 /* start write timer */
1685 f
->write_req
->timer_id
=
1686 RGWLibFS::write_timer
.add_event(
1687 std::chrono::seconds(RGWLibFS::write_completion_interval_s
),
1688 WriteCompletion(*this));
1694 if ((static_cast<off_t
>(off
) < f
->write_req
->real_ofs
) &&
1695 ((f
->write_req
->real_ofs
- off
) <= len
)) {
1696 overlap
= f
->write_req
->real_ofs
- off
;
1697 off
= f
->write_req
->real_ofs
;
1698 buffer
= static_cast<char*>(buffer
) + overlap
;
1706 buffer::create_static(len
, static_cast<char*>(buffer
)));
1709 buffer::copy(static_cast<char*>(buffer
), len
));
1712 f
->write_req
->put_data(off
, bl
);
1713 rc
= f
->write_req
->exec_continue();
1716 size_t min_size
= off
+ len
;
1717 if (min_size
> get_size())
1719 if (stateless_open()) {
1720 /* bump write timer */
1721 RGWLibFS::write_timer
.adjust_event(
1722 f
->write_req
->timer_id
, std::chrono::seconds(10));
1725 /* continuation failed (e.g., non-contiguous write position) */
1726 lsubdout(fs
->get_context(), rgw
, 5)
1729 << " failed write at position " << off
1730 << " (fails write transaction) "
1732 /* zap failed write transaction */
1733 delete f
->write_req
;
1734 f
->write_req
= nullptr;
1738 *bytes_written
= (rc
== 0) ? (len
+ overlap
) : 0;
1740 } /* RGWFileHandle::write */
1742 int RGWFileHandle::write_finish(uint32_t flags
)
1744 unique_lock guard
{mtx
, std::defer_lock
};
1747 if (! (flags
& FLAG_LOCKED
)) {
1751 file
* f
= get
<file
>(&variant_type
);
1752 if (f
&& (f
->write_req
)) {
1753 lsubdout(fs
->get_context(), rgw
, 10)
1755 << " finishing write trans on " << object_name()
1757 rc
= g_rgwlib
->get_fe()->finish_req(f
->write_req
);
1759 rc
= f
->write_req
->get_ret();
1761 delete f
->write_req
;
1762 f
->write_req
= nullptr;
1766 } /* RGWFileHandle::write_finish */
1768 int RGWFileHandle::close()
1770 lock_guard
guard(mtx
);
1772 int rc
= write_finish(FLAG_LOCKED
);
1774 flags
&= ~FLAG_OPEN
;
1775 flags
&= ~FLAG_STATELESS_OPEN
;
1778 } /* RGWFileHandle::close */
1780 RGWFileHandle::file::~file()
1785 void RGWFileHandle::clear_state()
1787 directory
* d
= get
<directory
>(&variant_type
);
1790 d
->last_marker
= rgw_obj_key
{};
1794 void RGWFileHandle::advance_mtime(uint32_t flags
) {
1795 /* intended for use on directories, fast-forward mtime so as to
1796 * ensure a new, higher value for the change attribute */
1797 unique_lock
uniq(mtx
, std::defer_lock
);
1798 if (likely(! (flags
& RGWFileHandle::FLAG_LOCKED
))) {
1802 /* advance mtime only if stored mtime is older than the
1803 * configured namespace expiration */
1804 auto now
= real_clock::now();
1805 auto cmptime
= state
.mtime
;
1807 fs
->get_context()->_conf
->rgw_nfs_namespace_expire_secs
;
1808 if (cmptime
< real_clock::to_timespec(now
)) {
1809 /* sets ctime as well as mtime, to avoid masking updates should
1810 * ctime inexplicably hold a higher value */
1815 void RGWFileHandle::invalidate() {
1816 RGWLibFS
*fs
= get_fs();
1817 if (fs
->invalidate_cb
) {
1818 fs
->invalidate_cb(fs
->invalidate_arg
, get_key().fh_hk
);
1822 int RGWWriteRequest::exec_start() {
1823 req_state
* state
= get_state();
1825 /* Object needs a bucket from this point */
1826 state
->object
->set_bucket(state
->bucket
.get());
1828 auto compression_type
=
1829 get_driver()->get_compression_type(state
->bucket
->get_placement_rule());
1831 /* not obviously supportable */
1832 ceph_assert(! dlo_manifest
);
1833 ceph_assert(! slo_info
);
1835 perfcounter
->inc(l_rgw_put
);
1838 if (state
->object
->empty()) {
1839 ldout(state
->cct
, 0) << __func__
<< " called on empty object" << dendl
;
1843 op_ret
= get_params(null_yield
);
1847 op_ret
= get_system_versioning_params(state
, &olh_epoch
, &version_id
);
1852 /* user-supplied MD5 check skipped (not supplied) */
1853 /* early quota check skipped--we don't have size yet */
1854 /* skipping user-supplied etag--we might have one in future, but
1855 * like data it and other attrs would arrive after open */
1857 aio
.emplace(state
->cct
->_conf
->rgw_put_obj_min_window_size
);
1859 if (state
->bucket
->versioning_enabled()) {
1860 if (!version_id
.empty()) {
1861 state
->object
->set_instance(version_id
);
1863 state
->object
->gen_rand_obj_instance_name();
1864 version_id
= state
->object
->get_instance();
1867 processor
= get_driver()->get_atomic_writer(this, state
->yield
, state
->object
.get(),
1868 state
->bucket_owner
.get_id(),
1869 &state
->dest_placement
, 0, state
->req_id
);
1871 op_ret
= processor
->prepare(state
->yield
);
1873 ldout(state
->cct
, 20) << "processor->prepare() returned ret=" << op_ret
1877 filter
= &*processor
;
1878 if (compression_type
!= "none") {
1879 plugin
= Compressor::create(state
->cct
, compression_type
);
1881 ldout(state
->cct
, 1) << "Cannot load plugin for rgw_compression_type "
1882 << compression_type
<< dendl
;
1884 compressor
.emplace(state
->cct
, plugin
, filter
);
1885 filter
= &*compressor
;
1893 int RGWWriteRequest::exec_continue()
1895 req_state
* state
= get_state();
1898 /* check guards (e.g., contig write) */
1900 ldout(state
->cct
, 5)
1901 << " chunks arrived in wrong order"
1902 << " (mounting with -o sync required)"
1907 op_ret
= state
->bucket
->check_quota(this, quota
, real_ofs
, null_yield
, true);
1908 /* max_size exceed */
1912 size_t len
= data
.length();
1916 hash
.Update((const unsigned char *)data
.c_str(), data
.length());
1917 op_ret
= filter
->process(std::move(data
), ofs
);
1921 bytes_written
+= len
;
1925 } /* exec_continue */
1927 int RGWWriteRequest::exec_finish()
1929 buffer::list bl
, aclbl
, ux_key
, ux_attrs
;
1930 map
<string
, string
>::iterator iter
;
1931 char calc_md5
[CEPH_CRYPTO_MD5_DIGESTSIZE
* 2 + 1];
1932 unsigned char m
[CEPH_CRYPTO_MD5_DIGESTSIZE
];
1933 req_state
* state
= get_state();
1935 size_t osize
= rgw_fh
->get_size();
1936 struct timespec octime
= rgw_fh
->get_ctime();
1937 struct timespec omtime
= rgw_fh
->get_mtime();
1938 real_time appx_t
= real_clock::now();
1940 state
->obj_size
= bytes_written
;
1941 perfcounter
->inc(l_rgw_put_b
, state
->obj_size
);
1943 // flush data in filters
1944 op_ret
= filter
->process({}, state
->obj_size
);
1949 op_ret
= state
->bucket
->check_quota(this, quota
, state
->obj_size
, null_yield
, true);
1950 /* max_size exceed */
1957 if (compressor
&& compressor
->is_compressed()) {
1959 RGWCompressionInfo cs_info
;
1960 cs_info
.compression_type
= plugin
->get_type_name();
1961 cs_info
.orig_size
= state
->obj_size
;
1962 cs_info
.blocks
= std::move(compressor
->get_compression_blocks());
1963 encode(cs_info
, tmp
);
1964 attrs
[RGW_ATTR_COMPRESSION
] = tmp
;
1965 ldpp_dout(this, 20) << "storing " << RGW_ATTR_COMPRESSION
1966 << " with type=" << cs_info
.compression_type
1967 << ", orig_size=" << cs_info
.orig_size
1968 << ", blocks=" << cs_info
.blocks
.size() << dendl
;
1971 buf_to_hex(m
, CEPH_CRYPTO_MD5_DIGESTSIZE
, calc_md5
);
1974 bl
.append(etag
.c_str(), etag
.size() + 1);
1975 emplace_attr(RGW_ATTR_ETAG
, std::move(bl
));
1977 policy
.encode(aclbl
);
1978 emplace_attr(RGW_ATTR_ACL
, std::move(aclbl
));
1981 rgw_fh
->set_mtime(real_clock::to_timespec(appx_t
));
1982 rgw_fh
->set_ctime(real_clock::to_timespec(appx_t
));
1983 rgw_fh
->set_size(bytes_written
);
1984 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
1986 emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
1987 emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
1989 for (iter
= state
->generic_attrs
.begin(); iter
!= state
->generic_attrs
.end();
1991 buffer::list
& attrbl
= attrs
[iter
->first
];
1992 const string
& val
= iter
->second
;
1993 attrbl
.append(val
.c_str(), val
.size() + 1);
1996 op_ret
= rgw_get_request_metadata(this, state
->cct
, state
->info
, attrs
);
2000 encode_delete_at_attr(delete_at
, attrs
);
2002 /* Add a custom metadata to expose the information whether an object
2003 * is an SLO or not. Appending the attribute must be performed AFTER
2004 * processing any input from user in order to prohibit overwriting. */
2005 if (unlikely(!! slo_info
)) {
2006 buffer::list slo_userindicator_bl
;
2008 encode("True", slo_userindicator_bl
);
2009 emplace_attr(RGW_ATTR_SLO_UINDICATOR
, std::move(slo_userindicator_bl
));
2012 op_ret
= processor
->complete(state
->obj_size
, etag
, &mtime
, real_time(), attrs
,
2013 (delete_at
? *delete_at
: real_time()),
2014 if_match
, if_nomatch
, nullptr, nullptr, nullptr,
2015 state
->yield
, rgw::sal::FLAG_LOG_OP
);
2017 /* revert attr updates */
2018 rgw_fh
->set_mtime(omtime
);
2019 rgw_fh
->set_ctime(octime
);
2020 rgw_fh
->set_size(osize
);
2024 perfcounter
->tinc(l_rgw_put_lat
, state
->time_elapsed());
2028 } /* namespace rgw */
2033 void rgwfile_version(int *major
, int *minor
, int *extra
)
2036 *major
= LIBRGW_FILE_VER_MAJOR
;
2038 *minor
= LIBRGW_FILE_VER_MINOR
;
2040 *extra
= LIBRGW_FILE_VER_EXTRA
;
2044 attach rgw namespace
2046 int rgw_mount(librgw_t rgw
, const char *uid
, const char *acc_key
,
2047 const char *sec_key
, struct rgw_fs
**rgw_fs
,
2052 /* stash access data for "mount" */
2053 RGWLibFS
* new_fs
= new RGWLibFS(static_cast<CephContext
*>(rgw
), uid
, acc_key
,
2055 ceph_assert(new_fs
);
2057 const DoutPrefix
dp(g_rgwlib
->get_driver()->ctx(), dout_subsys
, "rgw mount: ");
2058 rc
= new_fs
->authorize(&dp
, g_rgwlib
->get_driver());
2064 /* register fs for shared gc */
2065 g_rgwlib
->get_fe()->get_process()->register_fs(new_fs
);
2067 struct rgw_fs
*fs
= new_fs
->get_fs();
2070 /* XXX we no longer assume "/" is unique, but we aren't tracking the
2078 int rgw_mount2(librgw_t rgw
, const char *uid
, const char *acc_key
,
2079 const char *sec_key
, const char *root
, struct rgw_fs
**rgw_fs
,
2084 /* if the config has no value for path/root, choose "/" */
2085 RGWLibFS
* new_fs
{nullptr};
2087 (!strcmp(root
, ""))) {
2088 /* stash access data for "mount" */
2089 new_fs
= new RGWLibFS(
2090 static_cast<CephContext
*>(rgw
), uid
, acc_key
, sec_key
, "/");
2093 /* stash access data for "mount" */
2094 new_fs
= new RGWLibFS(
2095 static_cast<CephContext
*>(rgw
), uid
, acc_key
, sec_key
, root
);
2098 ceph_assert(new_fs
); /* should we be using ceph_assert? */
2100 const DoutPrefix
dp(g_rgwlib
->get_driver()->ctx(), dout_subsys
, "rgw mount2: ");
2101 rc
= new_fs
->authorize(&dp
, g_rgwlib
->get_driver());
2107 /* register fs for shared gc */
2108 g_rgwlib
->get_fe()->get_process()->register_fs(new_fs
);
2110 struct rgw_fs
*fs
= new_fs
->get_fs();
2113 /* XXX we no longer assume "/" is unique, but we aren't tracking the
2122 register invalidate callbacks
2124 int rgw_register_invalidate(struct rgw_fs
*rgw_fs
, rgw_fh_callback_t cb
,
2125 void *arg
, uint32_t flags
)
2128 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2129 return fs
->register_invalidate(cb
, arg
, flags
);
2133 detach rgw namespace
2135 int rgw_umount(struct rgw_fs
*rgw_fs
, uint32_t flags
)
2137 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2143 get filesystem attributes
2145 int rgw_statfs(struct rgw_fs
*rgw_fs
,
2146 struct rgw_file_handle
*parent_fh
,
2147 struct rgw_statvfs
*vfs_st
, uint32_t flags
)
2149 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2150 struct rados_cluster_stat_t stats
;
2152 RGWGetClusterStatReq
req(fs
->get_context(),
2153 g_rgwlib
->get_driver()->get_user(fs
->get_user()->user_id
),
2155 int rc
= g_rgwlib
->get_fe()->execute_req(&req
);
2157 lderr(fs
->get_context()) << "ERROR: getting total cluster usage"
2158 << cpp_strerror(-rc
) << dendl
;
2162 //Set block size to 1M.
2163 constexpr uint32_t CEPH_BLOCK_SHIFT
= 20;
2164 vfs_st
->f_bsize
= 1 << CEPH_BLOCK_SHIFT
;
2165 vfs_st
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
2166 vfs_st
->f_blocks
= stats
.kb
>> (CEPH_BLOCK_SHIFT
- 10);
2167 vfs_st
->f_bfree
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
2168 vfs_st
->f_bavail
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
2169 vfs_st
->f_files
= stats
.num_objects
;
2170 vfs_st
->f_ffree
= -1;
2171 vfs_st
->f_fsid
[0] = fs
->get_fsid();
2172 vfs_st
->f_fsid
[1] = fs
->get_fsid();
2174 vfs_st
->f_namemax
= 4096;
2179 generic create -- create an empty regular file
2181 int rgw_create(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
2182 const char *name
, struct stat
*st
, uint32_t mask
,
2183 struct rgw_file_handle
**fh
, uint32_t posix_flags
,
2188 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2189 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2192 (parent
->is_root()) ||
2193 (parent
->is_file())) {
2198 MkObjResult fhr
= fs
->create(parent
, name
, st
, mask
, flags
);
2199 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
2202 *fh
= nfh
->get_fh();
2208 create a symbolic link
2210 int rgw_symlink(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
2211 const char *name
, const char *link_path
, struct stat
*st
, uint32_t mask
,
2212 struct rgw_file_handle
**fh
, uint32_t posix_flags
,
2217 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2218 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2221 (parent
->is_root()) ||
2222 (parent
->is_file())) {
2227 MkObjResult fhr
= fs
->symlink(parent
, name
, link_path
, st
, mask
, flags
);
2228 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
2231 *fh
= nfh
->get_fh();
2237 create a new directory
2239 int rgw_mkdir(struct rgw_fs
*rgw_fs
,
2240 struct rgw_file_handle
*parent_fh
,
2241 const char *name
, struct stat
*st
, uint32_t mask
,
2242 struct rgw_file_handle
**fh
, uint32_t flags
)
2246 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2247 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2254 MkObjResult fhr
= fs
->mkdir(parent
, name
, st
, mask
, flags
);
2255 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
2258 *fh
= nfh
->get_fh();
2266 int rgw_rename(struct rgw_fs
*rgw_fs
,
2267 struct rgw_file_handle
*src
, const char* src_name
,
2268 struct rgw_file_handle
*dst
, const char* dst_name
,
2271 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2273 RGWFileHandle
* src_fh
= get_rgwfh(src
);
2274 RGWFileHandle
* dst_fh
= get_rgwfh(dst
);
2276 return fs
->rename(src_fh
, dst_fh
, src_name
, dst_name
);
2280 remove file or directory
2282 int rgw_unlink(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
2283 const char *name
, uint32_t flags
)
2285 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2286 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2288 return fs
->unlink(parent
, name
);
2292 lookup object by name (POSIX style)
2294 int rgw_lookup(struct rgw_fs
*rgw_fs
,
2295 struct rgw_file_handle
*parent_fh
, const char* path
,
2296 struct rgw_file_handle
**fh
,
2297 struct stat
*st
, uint32_t mask
, uint32_t flags
)
2299 //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2300 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2302 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2304 (! parent
->is_dir())) {
2309 RGWFileHandle
* rgw_fh
;
2312 if (parent
->is_root()) {
2313 /* special: parent lookup--note lack of ref()! */
2314 if (unlikely((strcmp(path
, "..") == 0) ||
2315 (strcmp(path
, "/") == 0))) {
2318 RGWLibFS::BucketStats bstat
;
2319 fhr
= fs
->stat_bucket(parent
, path
, bstat
, RGWFileHandle::FLAG_NONE
);
2320 rgw_fh
= get
<0>(fhr
);
2325 /* special: after readdir--note extra ref()! */
2326 if (unlikely((strcmp(path
, "..") == 0))) {
2328 lsubdout(fs
->get_context(), rgw
, 17)
2329 << __func__
<< " BANG"<< *rgw_fh
2333 enum rgw_fh_type fh_type
= fh_type_of(flags
);
2335 uint32_t sl_flags
= (flags
& RGW_LOOKUP_FLAG_RCB
)
2336 ? RGWFileHandle::FLAG_IN_CB
2337 : RGWFileHandle::FLAG_EXACT_MATCH
;
2339 bool fast_attrs
= fs
->get_context()->_conf
->rgw_nfs_s3_fast_attrs
;
2341 if ((flags
& RGW_LOOKUP_FLAG_RCB
) && fast_attrs
) {
2342 /* FAKE STAT--this should mean, interpolate special
2343 * owner, group, and perms masks */
2344 fhr
= fs
->fake_leaf(parent
, path
, fh_type
, st
, mask
, sl_flags
);
2346 if ((fh_type
== RGW_FS_TYPE_DIRECTORY
) && fast_attrs
) {
2347 /* trust cached dir, if present */
2348 fhr
= fs
->lookup_fh(parent
, path
, RGWFileHandle::FLAG_DIRECTORY
);
2350 rgw_fh
= get
<0>(fhr
);
2354 fhr
= fs
->stat_leaf(parent
, path
, fh_type
, sl_flags
);
2356 if (! get
<0>(fhr
)) {
2357 if (! (flags
& RGW_LOOKUP_FLAG_CREATE
))
2360 fhr
= fs
->lookup_fh(parent
, path
, RGWFileHandle::FLAG_CREATE
);
2362 rgw_fh
= get
<0>(fhr
);
2367 struct rgw_file_handle
*rfh
= rgw_fh
->get_fh();
2374 lookup object by handle (NFS style)
2376 int rgw_lookup_handle(struct rgw_fs
*rgw_fs
, struct rgw_fh_hk
*fh_hk
,
2377 struct rgw_file_handle
**fh
, uint32_t flags
)
2379 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2381 RGWFileHandle
* rgw_fh
= fs
->lookup_handle(*fh_hk
);
2387 struct rgw_file_handle
*rfh
= rgw_fh
->get_fh();
2394 * release file handle
2396 int rgw_fh_rele(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2399 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2400 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2402 lsubdout(fs
->get_context(), rgw
, 17)
2403 << __func__
<< " " << *rgw_fh
2411 get unix attributes for object
2413 int rgw_getattr(struct rgw_fs
*rgw_fs
,
2414 struct rgw_file_handle
*fh
, struct stat
*st
, uint32_t flags
)
2416 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2417 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2419 return fs
->getattr(rgw_fh
, st
);
2423 set unix attributes for object
2425 int rgw_setattr(struct rgw_fs
*rgw_fs
,
2426 struct rgw_file_handle
*fh
, struct stat
*st
,
2427 uint32_t mask
, uint32_t flags
)
2429 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2430 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2432 return fs
->setattr(rgw_fh
, st
, mask
, flags
);
2438 int rgw_truncate(struct rgw_fs
*rgw_fs
,
2439 struct rgw_file_handle
*fh
, uint64_t size
, uint32_t flags
)
2447 int rgw_open(struct rgw_fs
*rgw_fs
,
2448 struct rgw_file_handle
*fh
, uint32_t posix_flags
, uint32_t flags
)
2450 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2453 * need to track specific opens--at least read opens and
2454 * a write open; we need to know when a write open is returned,
2455 * that closes a write transaction
2457 * for now, we will support single-open only, it's preferable to
2458 * anything we can otherwise do without access to the NFS state
2460 if (! rgw_fh
->is_file())
2463 return rgw_fh
->open(flags
);
2469 int rgw_close(struct rgw_fs
*rgw_fs
,
2470 struct rgw_file_handle
*fh
, uint32_t flags
)
2472 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2473 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2474 int rc
= rgw_fh
->close(/* XXX */);
2476 if (flags
& RGW_CLOSE_FLAG_RELE
)
2482 int rgw_readdir(struct rgw_fs
*rgw_fs
,
2483 struct rgw_file_handle
*parent_fh
, uint64_t *offset
,
2484 rgw_readdir_cb rcb
, void *cb_arg
, bool *eof
,
2487 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2493 lsubdout(parent
->get_fs()->get_context(), rgw
, 15)
2495 << " offset=" << *offset
2498 if ((*offset
== 0) &&
2499 (flags
& RGW_READDIR_FLAG_DOTDOT
)) {
2500 /* send '.' and '..' with their NFS-defined offsets */
2501 rcb(".", cb_arg
, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2502 rcb("..", cb_arg
, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2505 int rc
= parent
->readdir(rcb
, cb_arg
, offset
, eof
, flags
);
2509 /* enumeration continuing from name */
2510 int rgw_readdir2(struct rgw_fs
*rgw_fs
,
2511 struct rgw_file_handle
*parent_fh
, const char *name
,
2512 rgw_readdir_cb rcb
, void *cb_arg
, bool *eof
,
2515 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2521 lsubdout(parent
->get_fs()->get_context(), rgw
, 15)
2523 << " offset=" << ((name
) ? name
: "(nil)")
2527 (flags
& RGW_READDIR_FLAG_DOTDOT
)) {
2528 /* send '.' and '..' with their NFS-defined offsets */
2529 rcb(".", cb_arg
, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2530 rcb("..", cb_arg
, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2533 int rc
= parent
->readdir(rcb
, cb_arg
, name
, eof
, flags
);
2535 } /* rgw_readdir2 */
2537 /* project offset of dirent name */
2538 int rgw_dirent_offset(struct rgw_fs
*rgw_fs
,
2539 struct rgw_file_handle
*parent_fh
,
2540 const char *name
, int64_t *offset
,
2543 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2548 std::string sname
{name
};
2549 int rc
= parent
->offset_of(sname
, offset
, flags
);
2556 int rgw_read(struct rgw_fs
*rgw_fs
,
2557 struct rgw_file_handle
*fh
, uint64_t offset
,
2558 size_t length
, size_t *bytes_read
, void *buffer
,
2561 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2562 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2564 return fs
->read(rgw_fh
, offset
, length
, bytes_read
, buffer
, flags
);
2570 int rgw_readlink(struct rgw_fs
*rgw_fs
,
2571 struct rgw_file_handle
*fh
, uint64_t offset
,
2572 size_t length
, size_t *bytes_read
, void *buffer
,
2575 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2576 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2578 return fs
->readlink(rgw_fh
, offset
, length
, bytes_read
, buffer
, flags
);
2584 int rgw_write(struct rgw_fs
*rgw_fs
,
2585 struct rgw_file_handle
*fh
, uint64_t offset
,
2586 size_t length
, size_t *bytes_written
, void *buffer
,
2589 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2594 if (! rgw_fh
->is_file())
2597 if (! rgw_fh
->is_open()) {
2598 if (flags
& RGW_OPEN_FLAG_V3
) {
2599 rc
= rgw_fh
->open(flags
);
2606 rc
= rgw_fh
->write(offset
, length
, bytes_written
, buffer
);
2612 read data from file (vector)
2617 struct rgw_vio
* vio
;
2620 RGWReadV(buffer::list
& _bl
, rgw_vio
* _vio
) : vio(_vio
) {
2621 bl
= std::move(_bl
);
2624 struct rgw_vio
* get_vio() { return vio
; }
2626 const auto& buffers() { return bl
.buffers(); }
2628 unsigned /* XXX */ length() { return bl
.length(); }
2632 void rgw_readv_rele(struct rgw_uio
*uio
, uint32_t flags
)
2634 RGWReadV
* rdv
= static_cast<RGWReadV
*>(uio
->uio_p1
);
2636 ::operator delete(rdv
);
2639 int rgw_readv(struct rgw_fs
*rgw_fs
,
2640 struct rgw_file_handle
*fh
, rgw_uio
*uio
, uint32_t flags
)
2643 CephContext
* cct
= static_cast<CephContext
*>(rgw_fs
->rgw
);
2644 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2645 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2647 if (! rgw_fh
->is_file())
2653 RGWGetObjRequest
req(cct
, fs
->get_user(), rgw_fh
->bucket_name(),
2654 rgw_fh
->object_name(), uio
->uio_offset
, uio
->uio_resid
,
2656 req
.do_hexdump
= false;
2658 rc
= g_rgwlib
->get_fe()->execute_req(&req
);
2661 RGWReadV
* rdv
= static_cast<RGWReadV
*>(
2662 ::operator new(sizeof(RGWReadV
) +
2663 (bl
.buffers().size() * sizeof(struct rgw_vio
))));
2666 RGWReadV(bl
, reinterpret_cast<rgw_vio
*>(rdv
+sizeof(RGWReadV
)));
2669 uio
->uio_cnt
= rdv
->buffers().size();
2670 uio
->uio_resid
= rdv
->length();
2671 uio
->uio_vio
= rdv
->get_vio();
2672 uio
->uio_rele
= rgw_readv_rele
;
2675 auto& buffers
= rdv
->buffers();
2676 for (auto& bp
: buffers
) {
2677 rgw_vio
*vio
= &(uio
->uio_vio
[ix
]);
2678 vio
->vio_base
= const_cast<char*>(bp
.c_str());
2679 vio
->vio_len
= bp
.length();
2680 vio
->vio_u1
= nullptr;
2681 vio
->vio_p1
= nullptr;
2693 write data to file (vector)
2695 int rgw_writev(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2696 rgw_uio
*uio
, uint32_t flags
)
2699 // not supported - rest of function is ignored
2702 CephContext
* cct
= static_cast<CephContext
*>(rgw_fs
->rgw
);
2703 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2704 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2706 if (! rgw_fh
->is_file())
2710 for (unsigned int ix
= 0; ix
< uio
->uio_cnt
; ++ix
) {
2711 rgw_vio
*vio
= &(uio
->uio_vio
[ix
]);
2713 buffer::create_static(vio
->vio_len
,
2714 static_cast<char*>(vio
->vio_base
)));
2717 std::string oname
= rgw_fh
->relative_object_name();
2718 RGWPutObjRequest
req(cct
, g_rgwlib
->get_driver()->get_user(fs
->get_user()->user_id
),
2719 rgw_fh
->bucket_name(), oname
, bl
);
2721 int rc
= g_rgwlib
->get_fe()->execute_req(&req
);
2723 /* XXX update size (in request) */
2731 int rgw_fsync(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*handle
,
2737 int rgw_commit(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2738 uint64_t offset
, uint64_t length
, uint32_t flags
)
2740 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2742 return rgw_fh
->commit(offset
, length
, RGWFileHandle::FLAG_NONE
);
2749 int rgw_getxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2750 rgw_xattrlist
*attrs
, rgw_getxattr_cb cb
, void *cb_arg
,
2753 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2754 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2756 return fs
->getxattrs(rgw_fh
, attrs
, cb
, cb_arg
, flags
);
2759 int rgw_lsxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2760 rgw_xattrstr
*filter_prefix
/* ignored */,
2761 rgw_getxattr_cb cb
, void *cb_arg
, uint32_t flags
)
2763 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2764 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2766 return fs
->lsxattrs(rgw_fh
, filter_prefix
, cb
, cb_arg
, flags
);
2769 int rgw_setxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2770 rgw_xattrlist
*attrs
, uint32_t flags
)
2772 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2773 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2775 return fs
->setxattrs(rgw_fh
, attrs
, flags
);
2778 int rgw_rmxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2779 rgw_xattrlist
*attrs
, uint32_t flags
)
2781 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2782 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2784 return fs
->rmxattrs(rgw_fh
, attrs
, flags
);