1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
4 #include "include/compat.h"
5 #include "include/rados/rgw_file.h"
11 #include "rgw_resolve.h"
15 #include "rgw_acl_s3.h"
16 #include "rgw_frontend.h"
17 #include "rgw_request.h"
18 #include "rgw_process.h"
19 #include "rgw_rest_user.h"
20 #include "rgw_rest_s3.h"
21 #include "rgw_os_lib.h"
22 #include "rgw_auth_s3.h"
24 #include "rgw_bucket.h"
27 #include "rgw_lib_frontend.h"
28 #include "rgw_perf_counters.h"
29 #include "common/errno.h"
31 #include "services/svc_zone.h"
35 #define dout_subsys ceph_subsys_rgw
44 const string
RGWFileHandle::root_name
= "/";
46 std::atomic
<uint32_t> RGWLibFS::fs_inst_counter
;
48 uint32_t RGWLibFS::write_completion_interval_s
= 10;
50 ceph::timer
<ceph::mono_clock
> RGWLibFS::write_timer
{
51 ceph::construct_suspended
};
53 inline int valid_fs_bucket_name(const string
& name
) {
54 int rc
= valid_s3_bucket_name(name
, false /* relaxed */);
56 if (name
.size() > 255)
63 inline int valid_fs_object_name(const string
& name
) {
64 int rc
= valid_s3_object_name(name
);
66 if (name
.size() > 1024)
76 std::size_t operator()(const rgw_xattrstr
& att
) const noexcept
{
77 return XXH64(att
.val
, att
.len
, 5882300);
84 bool operator()(const rgw_xattrstr
& lhs
, const rgw_xattrstr
& rhs
) const {
85 return ((lhs
.len
== rhs
.len
) &&
86 (strncmp(lhs
.val
, rhs
.val
, lhs
.len
) == 0));
90 /* well-known attributes */
91 static const std::unordered_set
<
92 rgw_xattrstr
, XattrHash
, XattrEqual
> rgw_exposed_attrs
= {
93 rgw_xattrstr
{const_cast<char*>(RGW_ATTR_ETAG
), sizeof(RGW_ATTR_ETAG
)-1}
96 static inline bool is_exposed_attr(const rgw_xattrstr
& k
) {
97 return (rgw_exposed_attrs
.find(k
) != rgw_exposed_attrs
.end());
100 LookupFHResult
RGWLibFS::stat_bucket(RGWFileHandle
* parent
, const char *path
,
101 RGWLibFS::BucketStats
& bs
,
104 LookupFHResult fhr
{nullptr, 0};
105 std::string bucket_name
{path
};
106 RGWStatBucketRequest
req(cct
, user
->clone(), bucket_name
, bs
);
108 int rc
= rgwlib
.get_fe()->execute_req(&req
);
110 (req
.get_ret() == 0) &&
112 fhr
= lookup_fh(parent
, path
,
113 (flags
& RGWFileHandle::FLAG_LOCKED
)|
114 RGWFileHandle::FLAG_CREATE
|
115 RGWFileHandle::FLAG_BUCKET
);
117 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
118 if (! (flags
& RGWFileHandle::FLAG_LOCKED
)) {
121 rgw_fh
->set_times(req
.get_ctime());
122 /* restore attributes */
123 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
124 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
125 if (ux_key
&& ux_attrs
) {
126 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
127 if (get
<0>(dar
) || get
<1>(dar
)) {
131 if (! (flags
& RGWFileHandle::FLAG_LOCKED
)) {
132 rgw_fh
->mtx
.unlock();
139 LookupFHResult
RGWLibFS::fake_leaf(RGWFileHandle
* parent
,
141 enum rgw_fh_type type
,
142 struct stat
*st
, uint32_t st_mask
,
145 /* synthesize a minimal handle from parent, path, type, and st */
148 flags
|= RGWFileHandle::FLAG_CREATE
;
151 case RGW_FS_TYPE_DIRECTORY
:
152 flags
|= RGWFileHandle::FLAG_DIRECTORY
;
159 LookupFHResult fhr
= lookup_fh(parent
, path
, flags
);
161 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
163 lock_guard
guard(rgw_fh
->mtx
);
164 if (st_mask
& RGW_SETATTR_SIZE
) {
165 rgw_fh
->set_size(st
->st_size
);
167 if (st_mask
& RGW_SETATTR_MTIME
) {
168 rgw_fh
->set_times(st
->st_mtim
);
173 } /* RGWLibFS::fake_leaf */
175 LookupFHResult
RGWLibFS::stat_leaf(RGWFileHandle
* parent
,
177 enum rgw_fh_type type
,
180 /* find either-of <object_name>, <object_name/>, only one of
181 * which should exist; atomicity? */
184 LookupFHResult fhr
{nullptr, 0};
186 /* XXX the need for two round-trip operations to identify file or
187 * directory leaf objects is unecessary--the current proposed
188 * mechanism to avoid this is to store leaf object names with an
189 * object locator w/o trailing slash */
191 std::string obj_path
= parent
->format_child_name(path
, false);
193 for (auto ix
: { 0, 1, 2 }) {
198 if (type
== RGW_FS_TYPE_DIRECTORY
)
201 RGWStatObjRequest
req(cct
, user
->clone(),
202 parent
->bucket_name(), obj_path
,
203 RGWStatObjRequest::FLAG_NONE
);
204 int rc
= rgwlib
.get_fe()->execute_req(&req
);
206 (req
.get_ret() == 0)) {
207 fhr
= lookup_fh(parent
, path
, RGWFileHandle::FLAG_CREATE
);
209 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
210 lock_guard
guard(rgw_fh
->mtx
);
211 rgw_fh
->set_size(req
.get_size());
212 rgw_fh
->set_times(req
.get_mtime());
213 /* restore attributes */
214 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
215 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
216 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
217 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
218 if (!(flags
& RGWFileHandle::FLAG_IN_CB
) &&
219 ux_key
&& ux_attrs
) {
220 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
221 if (get
<0>(dar
) || get
<1>(dar
)) {
234 if (type
== RGW_FS_TYPE_FILE
)
238 RGWStatObjRequest
req(cct
, user
->clone(),
239 parent
->bucket_name(), obj_path
,
240 RGWStatObjRequest::FLAG_NONE
);
241 int rc
= rgwlib
.get_fe()->execute_req(&req
);
243 (req
.get_ret() == 0)) {
244 fhr
= lookup_fh(parent
, path
, RGWFileHandle::FLAG_DIRECTORY
);
246 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
247 lock_guard
guard(rgw_fh
->mtx
);
248 rgw_fh
->set_size(req
.get_size());
249 rgw_fh
->set_times(req
.get_mtime());
250 /* restore attributes */
251 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
252 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
253 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
254 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
255 if (!(flags
& RGWFileHandle::FLAG_IN_CB
) &&
256 ux_key
&& ux_attrs
) {
257 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
258 if (get
<0>(dar
) || get
<1>(dar
)) {
269 std::string object_name
{path
};
270 RGWStatLeafRequest
req(cct
, user
->clone(),
271 parent
, object_name
);
272 int rc
= rgwlib
.get_fe()->execute_req(&req
);
274 (req
.get_ret() == 0)) {
276 /* we need rgw object's key name equal to file name, if
278 if ((flags
& RGWFileHandle::FLAG_EXACT_MATCH
) &&
279 !req
.exact_matched
) {
280 lsubdout(get_context(), rgw
, 15)
282 << ": stat leaf not exact match file name = "
286 fhr
= lookup_fh(parent
, path
,
287 RGWFileHandle::FLAG_CREATE
|
289 RGWFileHandle::FLAG_DIRECTORY
:
290 RGWFileHandle::FLAG_NONE
));
291 /* XXX we don't have an object--in general, there need not
292 * be one (just a path segment in some other object). In
293 * actual leaf an object exists, but we'd need another round
294 * trip to get attrs */
296 /* for now use the parent object's mtime */
297 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
298 lock_guard
guard(rgw_fh
->mtx
);
299 rgw_fh
->set_mtime(parent
->get_mtime());
312 } /* RGWLibFS::stat_leaf */
314 int RGWLibFS::read(RGWFileHandle
* rgw_fh
, uint64_t offset
, size_t length
,
315 size_t* bytes_read
, void* buffer
, uint32_t flags
)
317 if (! rgw_fh
->is_file())
320 if (rgw_fh
->deleted())
323 RGWReadRequest
req(get_context(), user
->clone(), rgw_fh
, offset
, length
, buffer
);
325 int rc
= rgwlib
.get_fe()->execute_req(&req
);
327 ((rc
= req
.get_ret()) == 0)) {
328 lock_guard
guard(rgw_fh
->mtx
);
329 rgw_fh
->set_atime(real_clock::to_timespec(real_clock::now()));
330 *bytes_read
= req
.nread
;
336 int RGWLibFS::readlink(RGWFileHandle
* rgw_fh
, uint64_t offset
, size_t length
,
337 size_t* bytes_read
, void* buffer
, uint32_t flags
)
339 if (! rgw_fh
->is_link())
342 if (rgw_fh
->deleted())
345 RGWReadRequest
req(get_context(), user
->clone(), rgw_fh
, offset
, length
, buffer
);
347 int rc
= rgwlib
.get_fe()->execute_req(&req
);
349 ((rc
= req
.get_ret()) == 0)) {
350 lock_guard(rgw_fh
->mtx
);
351 rgw_fh
->set_atime(real_clock::to_timespec(real_clock::now()));
352 *bytes_read
= req
.nread
;
358 int RGWLibFS::unlink(RGWFileHandle
* rgw_fh
, const char* name
, uint32_t flags
)
362 RGWFileHandle
* parent
= nullptr;
363 RGWFileHandle
* bkt_fh
= nullptr;
365 if (unlikely(flags
& RGWFileHandle::FLAG_UNLINK_THIS
)) {
367 parent
= rgw_fh
->get_parent();
371 LookupFHResult fhr
= lookup_fh(parent
, name
, RGWFileHandle::FLAG_LOCK
);
372 rgw_fh
= get
<0>(fhr
);
376 if (parent
->is_root()) {
377 /* a bucket may have an object storing Unix attributes, check
378 * for and delete it */
380 fhr
= stat_bucket(parent
, name
, bs
, (rgw_fh
) ?
381 RGWFileHandle::FLAG_LOCKED
:
382 RGWFileHandle::FLAG_NONE
);
383 bkt_fh
= get
<0>(fhr
);
384 if (unlikely(! bkt_fh
)) {
385 /* implies !rgw_fh, so also !LOCKED */
389 if (bs
.num_entries
> 1) {
390 unref(bkt_fh
); /* return stat_bucket ref */
391 if (likely(!! rgw_fh
)) { /* return lock and ref from
392 * lookup_fh (or caller in the
394 * RGWFileHandle::FLAG_UNLINK_THIS) */
395 rgw_fh
->mtx
.unlock();
400 /* delete object w/key "<bucket>/" (uxattrs), if any */
402 RGWDeleteObjRequest
req(cct
, user
->clone(), bkt_fh
->bucket_name(), oname
);
403 rc
= rgwlib
.get_fe()->execute_req(&req
);
404 /* don't care if ENOENT */
409 RGWDeleteBucketRequest
req(cct
, user
->clone(), bname
);
410 rc
= rgwlib
.get_fe()->execute_req(&req
);
419 /* XXX for now, peform a hard lookup to deduce the type of
420 * object to be deleted ("foo" vs. "foo/")--also, ensures
421 * atomicity at this endpoint */
422 struct rgw_file_handle
*fh
;
423 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &fh
,
424 nullptr /* st */, 0 /* mask */,
425 RGW_LOOKUP_FLAG_NONE
);
430 rgw_fh
= get_rgwfh(fh
);
431 rgw_fh
->mtx
.lock(); /* LOCKED */
434 std::string oname
= rgw_fh
->relative_object_name();
435 if (rgw_fh
->is_dir()) {
436 /* for the duration of our cache timer, trust positive
438 if (rgw_fh
->has_children()) {
439 rgw_fh
->mtx
.unlock();
445 RGWDeleteObjRequest
req(cct
, user
->clone(), parent
->bucket_name(), oname
);
446 rc
= rgwlib
.get_fe()->execute_req(&req
);
452 /* ENOENT when raced with other s3 gateway */
453 if (! rc
|| rc
== -ENOENT
) {
454 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
455 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
456 RGWFileHandle::FHCache::FLAG_LOCK
);
460 real_time t
= real_clock::now();
461 parent
->set_mtime(real_clock::to_timespec(t
));
462 parent
->set_ctime(real_clock::to_timespec(t
));
465 rgw_fh
->mtx
.unlock();
469 } /* RGWLibFS::unlink */
471 int RGWLibFS::rename(RGWFileHandle
* src_fh
, RGWFileHandle
* dst_fh
,
472 const char *_src_name
, const char *_dst_name
)
475 /* XXX initial implementation: try-copy, and delete if copy
480 std::string src_name
{_src_name
};
481 std::string dst_name
{_dst_name
};
484 LookupFHResult fhr
= lookup_fh(src_fh
, _src_name
, RGWFileHandle::FLAG_LOCK
);
485 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
487 /* should not happen */
489 ldout(get_context(), 0) << __func__
490 << " BUG no such src renaming path="
496 /* forbid renaming of directories (unreasonable at scale) */
497 if (rgw_fh
->is_dir()) {
498 ldout(get_context(), 12) << __func__
499 << " rejecting attempt to rename directory path="
500 << rgw_fh
->full_object_name()
506 /* forbid renaming open files (violates intent, for now) */
507 if (rgw_fh
->is_open()) {
508 ldout(get_context(), 12) << __func__
509 << " rejecting attempt to rename open file path="
510 << rgw_fh
->full_object_name()
516 t
= real_clock::now();
518 for (int ix
: {0, 1}) {
522 RGWCopyObjRequest
req(cct
, user
->clone(), src_fh
, dst_fh
, src_name
, dst_name
);
523 int rc
= rgwlib
.get_fe()->execute_req(&req
);
525 ((rc
= req
.get_ret()) != 0)) {
526 ldout(get_context(), 1)
528 << " rename step 0 failed src="
529 << src_fh
->full_object_name() << " " << src_name
530 << " dst=" << dst_fh
->full_object_name()
536 ldout(get_context(), 12)
538 << " rename step 0 success src="
539 << src_fh
->full_object_name() << " " << src_name
540 << " dst=" << dst_fh
->full_object_name()
544 /* update dst change id */
545 dst_fh
->set_times(t
);
550 rc
= this->unlink(rgw_fh
/* LOCKED */, _src_name
,
551 RGWFileHandle::FLAG_UNLINK_THIS
);
554 ldout(get_context(), 12)
556 << " rename step 1 success src="
557 << src_fh
->full_object_name() << " " << src_name
558 << " dst=" << dst_fh
->full_object_name()
562 /* update src change id */
563 src_fh
->set_times(t
);
565 ldout(get_context(), 1)
567 << " rename step 1 failed src="
568 << src_fh
->full_object_name() << " " << src_name
569 << " dst=" << dst_fh
->full_object_name()
581 rgw_fh
->mtx
.unlock(); /* !LOCKED */
582 unref(rgw_fh
); /* -ref */
586 } /* RGWLibFS::rename */
588 MkObjResult
RGWLibFS::mkdir(RGWFileHandle
* parent
, const char *name
,
589 struct stat
*st
, uint32_t mask
, uint32_t flags
)
592 rgw_file_handle
*lfh
;
594 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
595 nullptr /* st */, 0 /* mask */,
596 RGW_LOOKUP_FLAG_NONE
);
599 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
600 // ignore return code
601 return MkObjResult
{nullptr, -EEXIST
};
604 MkObjResult mkr
{nullptr, -EINVAL
};
606 RGWFileHandle
* rgw_fh
= nullptr;
607 buffer::list ux_key
, ux_attrs
;
609 fhr
= lookup_fh(parent
, name
,
610 RGWFileHandle::FLAG_CREATE
|
611 RGWFileHandle::FLAG_DIRECTORY
|
612 RGWFileHandle::FLAG_LOCK
);
613 rgw_fh
= get
<0>(fhr
);
615 rgw_fh
->create_stat(st
, mask
);
616 rgw_fh
->set_times(real_clock::now());
618 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
620 rgw_fh
->stat(st
, RGWFileHandle::FLAG_LOCKED
);
621 get
<0>(mkr
) = rgw_fh
;
627 if (parent
->is_root()) {
630 /* enforce S3 name restrictions */
631 rc
= valid_fs_bucket_name(bname
);
633 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
634 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
635 RGWFileHandle::FHCache::FLAG_LOCK
);
636 rgw_fh
->mtx
.unlock();
638 get
<0>(mkr
) = nullptr;
643 RGWCreateBucketRequest
req(get_context(), user
->clone(), bname
);
646 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
647 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
649 rc
= rgwlib
.get_fe()->execute_req(&req
);
652 /* create an object representing the directory */
654 string dir_name
= parent
->format_child_name(name
, true);
656 /* need valid S3 name (characters, length <= 1024, etc) */
657 rc
= valid_fs_object_name(dir_name
);
659 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
660 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
661 RGWFileHandle::FHCache::FLAG_LOCK
);
662 rgw_fh
->mtx
.unlock();
664 get
<0>(mkr
) = nullptr;
669 RGWPutObjRequest
req(get_context(), user
->clone(), parent
->bucket_name(), dir_name
, bl
);
672 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
673 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
675 rc
= rgwlib
.get_fe()->execute_req(&req
);
682 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
683 rgw_fh
->mtx
.unlock(); /* !LOCKED */
685 get
<0>(mkr
) = nullptr;
690 real_time t
= real_clock::now();
691 parent
->set_mtime(real_clock::to_timespec(t
));
692 parent
->set_ctime(real_clock::to_timespec(t
));
693 rgw_fh
->mtx
.unlock(); /* !LOCKED */
699 } /* RGWLibFS::mkdir */
701 MkObjResult
RGWLibFS::create(RGWFileHandle
* parent
, const char *name
,
702 struct stat
*st
, uint32_t mask
, uint32_t flags
)
708 rgw_file_handle
*lfh
;
709 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
710 nullptr /* st */, 0 /* mask */,
711 RGW_LOOKUP_FLAG_NONE
);
714 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
715 // ignore return code
716 return MkObjResult
{nullptr, -EEXIST
};
719 /* expand and check name */
720 std::string obj_name
= parent
->format_child_name(name
, false);
721 rc
= valid_fs_object_name(obj_name
);
723 return MkObjResult
{nullptr, rc
};
728 RGWPutObjRequest
req(cct
, user
->clone(), parent
->bucket_name(), obj_name
, bl
);
729 MkObjResult mkr
{nullptr, -EINVAL
};
731 rc
= rgwlib
.get_fe()->execute_req(&req
);
737 LookupFHResult fhr
= lookup_fh(parent
, name
, RGWFileHandle::FLAG_CREATE
|
738 RGWFileHandle::FLAG_LOCK
);
739 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
741 if (get
<1>(fhr
) & RGWFileHandle::FLAG_CREATE
) {
742 /* fill in stat data */
743 real_time t
= real_clock::now();
744 rgw_fh
->create_stat(st
, mask
);
745 rgw_fh
->set_times(t
);
747 parent
->set_mtime(real_clock::to_timespec(t
));
748 parent
->set_ctime(real_clock::to_timespec(t
));
751 (void) rgw_fh
->stat(st
, RGWFileHandle::FLAG_LOCKED
);
753 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
754 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
756 get
<0>(mkr
) = rgw_fh
;
757 rgw_fh
->file_ondisk_version
= 0; // inital version
758 rgw_fh
->mtx
.unlock();
765 /* case like : quota exceed will be considered as fail too*/
770 } /* RGWLibFS::create */
772 MkObjResult
RGWLibFS::symlink(RGWFileHandle
* parent
, const char *name
,
773 const char* link_path
, struct stat
*st
, uint32_t mask
, uint32_t flags
)
779 rgw_file_handle
*lfh
;
780 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
781 nullptr /* st */, 0 /* mask */,
782 RGW_LOOKUP_FLAG_NONE
);
785 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
786 // ignore return code
787 return MkObjResult
{nullptr, -EEXIST
};
790 MkObjResult mkr
{nullptr, -EINVAL
};
792 RGWFileHandle
* rgw_fh
= nullptr;
793 buffer::list ux_key
, ux_attrs
;
795 fhr
= lookup_fh(parent
, name
,
796 RGWFileHandle::FLAG_CREATE
|
797 RGWFileHandle::FLAG_SYMBOLIC_LINK
|
798 RGWFileHandle::FLAG_LOCK
);
799 rgw_fh
= get
<0>(fhr
);
801 rgw_fh
->create_stat(st
, mask
);
802 rgw_fh
->set_times(real_clock::now());
804 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
807 get
<0>(mkr
) = rgw_fh
;
813 /* need valid S3 name (characters, length <= 1024, etc) */
814 rc
= valid_fs_object_name(name
);
816 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
817 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
818 RGWFileHandle::FHCache::FLAG_LOCK
);
819 rgw_fh
->mtx
.unlock();
821 get
<0>(mkr
) = nullptr;
826 string obj_name
= std::string(name
);
827 /* create an object representing the directory */
833 buffer::create_static(len
, static_cast<char*>(buffer
)));
837 buffer::copy(link_path
, strlen(link_path
)));
840 RGWPutObjRequest
req(get_context(), user
->clone(), parent
->bucket_name(), obj_name
, bl
);
843 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
844 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
846 rc
= rgwlib
.get_fe()->execute_req(&req
);
851 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
852 rgw_fh
->mtx
.unlock(); /* !LOCKED */
854 get
<0>(mkr
) = nullptr;
859 real_time t
= real_clock::now();
860 parent
->set_mtime(real_clock::to_timespec(t
));
861 parent
->set_ctime(real_clock::to_timespec(t
));
862 rgw_fh
->mtx
.unlock(); /* !LOCKED */
868 } /* RGWLibFS::symlink */
870 int RGWLibFS::getattr(RGWFileHandle
* rgw_fh
, struct stat
* st
)
872 switch(rgw_fh
->fh
.fh_type
) {
873 case RGW_FS_TYPE_FILE
:
875 if (rgw_fh
->deleted())
882 /* if rgw_fh is a directory, mtime will be advanced */
883 return rgw_fh
->stat(st
);
884 } /* RGWLibFS::getattr */
886 int RGWLibFS::setattr(RGWFileHandle
* rgw_fh
, struct stat
* st
, uint32_t mask
,
890 buffer::list ux_key
, ux_attrs
;
891 buffer::list etag
= rgw_fh
->get_etag();
892 buffer::list acls
= rgw_fh
->get_acls();
894 lock_guard
guard(rgw_fh
->mtx
);
896 switch(rgw_fh
->fh
.fh_type
) {
897 case RGW_FS_TYPE_FILE
:
899 if (rgw_fh
->deleted())
907 string obj_name
{rgw_fh
->relative_object_name()};
909 if (rgw_fh
->is_dir() &&
910 (likely(! rgw_fh
->is_bucket()))) {
914 RGWSetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
916 rgw_fh
->create_stat(st
, mask
);
917 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
920 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
921 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
922 req
.emplace_attr(RGW_ATTR_ETAG
, std::move(etag
));
923 req
.emplace_attr(RGW_ATTR_ACL
, std::move(acls
));
925 rc
= rgwlib
.get_fe()->execute_req(&req
);
929 /* special case: materialize placeholder dir */
931 RGWPutObjRequest
req(get_context(), user
->clone(), rgw_fh
->bucket_name(), obj_name
, bl
);
933 rgw_fh
->encode_attrs(ux_key
, ux_attrs
); /* because std::moved */
936 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
937 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
939 rc
= rgwlib
.get_fe()->execute_req(&req
);
943 if ((rc
!= 0) || (rc2
!= 0)) {
947 rgw_fh
->set_ctime(real_clock::to_timespec(real_clock::now()));
950 } /* RGWLibFS::setattr */
952 static inline std::string
prefix_xattr_keystr(const rgw_xattrstr
& key
) {
954 keystr
.reserve(sizeof(RGW_ATTR_META_PREFIX
) + key
.len
);
955 keystr
+= string
{RGW_ATTR_META_PREFIX
};
956 keystr
+= string
{key
.val
, key
.len
};
960 static inline std::string_view
unprefix_xattr_keystr(const std::string
& key
)
962 std::string_view svk
{key
};
963 auto pos
= svk
.find(RGW_ATTR_META_PREFIX
);
964 if (pos
== std::string_view::npos
) {
965 return std::string_view
{""};
966 } else if (pos
== 0) {
967 svk
.remove_prefix(sizeof(RGW_ATTR_META_PREFIX
)-1);
972 int RGWLibFS::getxattrs(RGWFileHandle
* rgw_fh
, rgw_xattrlist
*attrs
,
973 rgw_getxattr_cb cb
, void *cb_arg
,
976 /* cannot store on fs_root, should not on buckets? */
977 if ((rgw_fh
->is_bucket()) ||
978 (rgw_fh
->is_root())) {
983 string obj_name
{rgw_fh
->relative_object_name2()};
985 RGWGetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
987 for (uint32_t ix
= 0; ix
< attrs
->xattr_cnt
; ++ix
) {
988 auto& xattr
= attrs
->xattrs
[ix
];
990 /* pass exposed attr keys as given, else prefix */
991 std::string k
= is_exposed_attr(xattr
.key
)
992 ? std::string
{xattr
.key
.val
, xattr
.key
.len
}
993 : prefix_xattr_keystr(xattr
.key
);
995 req
.emplace_key(std::move(k
));
998 if (ldlog_p1(get_context(), ceph_subsys_rgw
, 15)) {
999 lsubdout(get_context(), rgw
, 15)
1001 << " get keys for: "
1002 << rgw_fh
->object_name()
1005 for (const auto& attr
: req
.get_attrs()) {
1006 lsubdout(get_context(), rgw
, 15)
1007 << "\tkey: " << attr
.first
<< dendl
;
1011 rc
= rgwlib
.get_fe()->execute_req(&req
);
1012 rc2
= req
.get_ret();
1013 rc3
= ((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
;
1015 /* call back w/xattr data */
1017 const auto& attrs
= req
.get_attrs();
1018 for (const auto& attr
: attrs
) {
1020 if (!attr
.second
.has_value())
1023 const auto& k
= attr
.first
;
1024 const auto& v
= attr
.second
.value();
1026 /* return exposed attr keys as given, else unprefix --
1027 * yes, we could have memoized the exposed check, but
1028 * to be efficient it would need to be saved with
1029 * RGWGetAttrs::attrs, I think */
1030 std::string_view svk
=
1031 is_exposed_attr(rgw_xattrstr
{const_cast<char*>(k
.c_str()),
1032 uint32_t(k
.length())})
1034 : unprefix_xattr_keystr(k
);
1036 /* skip entries not matching prefix */
1040 rgw_xattrstr xattr_k
= { const_cast<char*>(svk
.data()),
1041 uint32_t(svk
.length())};
1042 rgw_xattrstr xattr_v
=
1043 {const_cast<char*>(const_cast<buffer::list
&>(v
).c_str()),
1044 uint32_t(v
.length())};
1045 rgw_xattr xattr
= { xattr_k
, xattr_v
};
1046 rgw_xattrlist xattrlist
= { &xattr
, 1 };
1048 cb(&xattrlist
, cb_arg
, RGW_GETXATTR_FLAG_NONE
);
1053 } /* RGWLibFS::getxattrs */
1055 int RGWLibFS::lsxattrs(
1056 RGWFileHandle
* rgw_fh
, rgw_xattrstr
*filter_prefix
, rgw_getxattr_cb cb
,
1057 void *cb_arg
, uint32_t flags
)
1059 /* cannot store on fs_root, should not on buckets? */
1060 if ((rgw_fh
->is_bucket()) ||
1061 (rgw_fh
->is_root())) {
1066 string obj_name
{rgw_fh
->relative_object_name2()};
1068 RGWGetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1070 rc
= rgwlib
.get_fe()->execute_req(&req
);
1071 rc2
= req
.get_ret();
1072 rc3
= ((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
;
1074 /* call back w/xattr data--check for eof */
1076 const auto& keys
= req
.get_attrs();
1077 for (const auto& k
: keys
) {
1079 /* return exposed attr keys as given, else unprefix */
1080 std::string_view svk
=
1081 is_exposed_attr(rgw_xattrstr
{const_cast<char*>(k
.first
.c_str()),
1082 uint32_t(k
.first
.length())})
1084 : unprefix_xattr_keystr(k
.first
);
1086 /* skip entries not matching prefix */
1090 rgw_xattrstr xattr_k
= { const_cast<char*>(svk
.data()),
1091 uint32_t(svk
.length())};
1092 rgw_xattrstr xattr_v
= { nullptr, 0 };
1093 rgw_xattr xattr
= { xattr_k
, xattr_v
};
1094 rgw_xattrlist xattrlist
= { &xattr
, 1 };
1096 auto cbr
= cb(&xattrlist
, cb_arg
, RGW_LSXATTR_FLAG_NONE
);
1097 if (cbr
& RGW_LSXATTR_FLAG_STOP
)
1103 } /* RGWLibFS::lsxattrs */
1105 int RGWLibFS::setxattrs(RGWFileHandle
* rgw_fh
, rgw_xattrlist
*attrs
,
1108 /* cannot store on fs_root, should not on buckets? */
1109 if ((rgw_fh
->is_bucket()) ||
1110 (rgw_fh
->is_root())) {
1115 string obj_name
{rgw_fh
->relative_object_name2()};
1117 RGWSetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1119 for (uint32_t ix
= 0; ix
< attrs
->xattr_cnt
; ++ix
) {
1120 auto& xattr
= attrs
->xattrs
[ix
];
1121 buffer::list attr_bl
;
1122 /* don't allow storing at RGW_ATTR_META_PREFIX */
1123 if (! (xattr
.key
.len
> 0))
1126 /* reject lexical match with any exposed attr */
1127 if (is_exposed_attr(xattr
.key
))
1130 string k
= prefix_xattr_keystr(xattr
.key
);
1131 attr_bl
.append(xattr
.val
.val
, xattr
.val
.len
);
1132 req
.emplace_attr(k
.c_str(), std::move(attr_bl
));
1135 /* don't send null requests */
1136 if (! (req
.get_attrs().size() > 0)) {
1140 rc
= rgwlib
.get_fe()->execute_req(&req
);
1141 rc2
= req
.get_ret();
1143 return (((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
);
1145 } /* RGWLibFS::setxattrs */
1147 int RGWLibFS::rmxattrs(RGWFileHandle
* rgw_fh
, rgw_xattrlist
* attrs
,
1150 /* cannot store on fs_root, should not on buckets? */
1151 if ((rgw_fh
->is_bucket()) ||
1152 (rgw_fh
->is_root())) {
1157 string obj_name
{rgw_fh
->relative_object_name2()};
1159 RGWRMAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1161 for (uint32_t ix
= 0; ix
< attrs
->xattr_cnt
; ++ix
) {
1162 auto& xattr
= attrs
->xattrs
[ix
];
1163 /* don't allow storing at RGW_ATTR_META_PREFIX */
1164 if (! (xattr
.key
.len
> 0)) {
1167 string k
= prefix_xattr_keystr(xattr
.key
);
1168 req
.emplace_key(std::move(k
));
1171 /* don't send null requests */
1172 if (! (req
.get_attrs().size() > 0)) {
1176 rc
= rgwlib
.get_fe()->execute_req(&req
);
1177 rc2
= req
.get_ret();
1179 return (((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
);
1181 } /* RGWLibFS::rmxattrs */
1183 /* called with rgw_fh->mtx held */
1184 void RGWLibFS::update_fh(RGWFileHandle
*rgw_fh
)
1187 string obj_name
{rgw_fh
->relative_object_name()};
1188 buffer::list ux_key
, ux_attrs
;
1190 if (rgw_fh
->is_dir() &&
1191 (likely(! rgw_fh
->is_bucket()))) {
1195 lsubdout(get_context(), rgw
, 17)
1197 << " update old versioned fh : " << obj_name
1200 RGWSetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1202 rgw_fh
->encode_attrs(ux_key
, ux_attrs
, false);
1204 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
1205 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
1207 rc
= rgwlib
.get_fe()->execute_req(&req
);
1208 rc2
= req
.get_ret();
1210 if ((rc
!= 0) || (rc2
!= 0)) {
1211 lsubdout(get_context(), rgw
, 17)
1213 << " update fh failed : " << obj_name
1216 } /* RGWLibFS::update_fh */
1218 void RGWLibFS::close()
1220 state
.flags
|= FLAG_CLOSED
;
1226 explicit ObjUnref(RGWLibFS
* _fs
) : fs(_fs
) {}
1227 void operator()(RGWFileHandle
* fh
) const {
1228 lsubdout(fs
->get_context(), rgw
, 5)
1229 << __PRETTY_FUNCTION__
1231 << " before ObjUnref refs=" << fh
->get_refcnt()
1237 /* force cache drain, forces objects to evict */
1238 fh_cache
.drain(ObjUnref(this),
1239 RGWFileHandle::FHCache::FLAG_LOCK
);
1240 rgwlib
.get_fe()->get_process()->unregister_fs(this);
1242 } /* RGWLibFS::close */
1244 inline std::ostream
& operator<<(std::ostream
&os
, fh_key
const &fhk
) {
1245 os
<< "<fh_key: bucket=";
1246 os
<< fhk
.fh_hk
.bucket
;
1248 os
<< fhk
.fh_hk
.object
;
1253 inline std::ostream
& operator<<(std::ostream
&os
, struct timespec
const &ts
) {
1254 os
<< "<timespec: tv_sec=";
1262 std::ostream
& operator<<(std::ostream
&os
, RGWLibFS::event
const &ev
) {
1265 case RGWLibFS::event::type::READDIR
:
1266 os
<< "type=READDIR;";
1269 os
<< "type=UNKNOWN;";
1272 os
<< "fid=" << ev
.fhk
.fh_hk
.bucket
<< ":" << ev
.fhk
.fh_hk
.object
1273 << ";ts=" << ev
.ts
<< ">";
1280 using directory
= RGWFileHandle::directory
;
1282 /* dirent invalidate timeout--basically, the upper-bound on
1283 * inconsistency with the S3 namespace */
1285 = get_context()->_conf
->rgw_nfs_namespace_expire_secs
;
1287 /* max events to gc in one cycle */
1288 uint32_t max_ev
= get_context()->_conf
->rgw_nfs_max_gc
;
1290 struct timespec now
, expire_ts
;
1293 std::deque
<event
> &events
= state
.events
;
1296 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
);
1297 lsubdout(get_context(), rgw
, 15)
1298 << "GC: top of expire loop"
1300 << " expire_s=" << expire_s
1303 lock_guard
guard(state
.mtx
); /* LOCKED */
1304 lsubdout(get_context(), rgw
, 15)
1306 << " count=" << events
.size()
1309 /* just return if no events */
1310 if (events
.empty()) {
1314 (events
.size() < 500) ? max_ev
: (events
.size() / 4);
1315 for (uint32_t ix
= 0; (ix
< _max_ev
) && (events
.size() > 0); ++ix
) {
1316 event
& ev
= events
.front();
1318 expire_ts
.tv_sec
+= expire_s
;
1319 if (expire_ts
> now
) {
1328 for (auto& ev
: ve
) {
1329 lsubdout(get_context(), rgw
, 15)
1330 << "try-expire ev: " << ev
<< dendl
;
1331 if (likely(ev
.t
== event::type::READDIR
)) {
1332 RGWFileHandle
* rgw_fh
= lookup_handle(ev
.fhk
.fh_hk
);
1333 lsubdout(get_context(), rgw
, 15)
1334 << "ev rgw_fh: " << rgw_fh
<< dendl
;
1336 RGWFileHandle::directory
* d
;
1337 if (unlikely(! rgw_fh
->is_dir())) {
1338 lsubdout(get_context(), rgw
, 0)
1340 << " BUG non-directory found with READDIR event "
1341 << "(" << rgw_fh
->bucket_name() << ","
1342 << rgw_fh
->object_name() << ")"
1346 /* maybe clear state */
1347 d
= get
<directory
>(&rgw_fh
->variant_type
);
1349 struct timespec ev_ts
= ev
.ts
;
1350 lock_guard
guard(rgw_fh
->mtx
);
1351 struct timespec d_last_readdir
= d
->last_readdir
;
1352 if (unlikely(ev_ts
< d_last_readdir
)) {
1353 /* readdir cycle in progress, don't invalidate */
1354 lsubdout(get_context(), rgw
, 15)
1355 << "GC: delay expiration for "
1356 << rgw_fh
->object_name()
1357 << " ev.ts=" << ev_ts
1358 << " last_readdir=" << d_last_readdir
1362 lsubdout(get_context(), rgw
, 15)
1364 << rgw_fh
->object_name()
1366 rgw_fh
->clear_state();
1367 rgw_fh
->invalidate();
1373 } /* event::type::READDIR */
1376 } while (! (stop
|| shutdown
));
1377 } /* RGWLibFS::gc */
1379 std::ostream
& operator<<(std::ostream
&os
,
1380 RGWFileHandle
const &rgw_fh
)
1382 const auto& fhk
= rgw_fh
.get_key();
1383 const auto& fh
= const_cast<RGWFileHandle
&>(rgw_fh
).get_fh();
1384 os
<< "<RGWFileHandle:";
1385 os
<< "addr=" << &rgw_fh
<< ";";
1386 switch (fh
->fh_type
) {
1387 case RGW_FS_TYPE_DIRECTORY
:
1388 os
<< "type=DIRECTORY;";
1390 case RGW_FS_TYPE_FILE
:
1394 os
<< "type=UNKNOWN;";
1397 os
<< "fid=" << fhk
.fh_hk
.bucket
<< ":" << fhk
.fh_hk
.object
<< ";";
1398 os
<< "name=" << rgw_fh
.object_name() << ";";
1399 os
<< "refcnt=" << rgw_fh
.get_refcnt() << ";";
1404 RGWFileHandle::~RGWFileHandle() {
1405 /* !recycle case, handle may STILL be in handle table, BUT
1406 * the partition lock is not held in this path */
1407 if (fh_hook
.is_linked()) {
1408 fs
->fh_cache
.remove(fh
.fh_hk
.object
, this, FHCache::FLAG_LOCK
);
1410 /* cond-unref parent */
1411 if (parent
&& (! parent
->is_mount())) {
1412 /* safe because if parent->unref causes its deletion,
1413 * there are a) by refcnt, no other objects/paths pointing
1414 * to it and b) by the semantics of valid iteration of
1415 * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
1416 * no unsafe iterators reaching it either--n.b., this constraint
1417 * is binding oncode which may in future attempt to e.g.,
1418 * cause the eviction of objects in LRU order */
1419 (void) get_fs()->unref(parent
);
1423 fh_key
RGWFileHandle::make_fhk(const std::string
& name
)
1425 std::string tenant
= get_fs()->get_user()->user_id
.to_str();
1427 /* S3 bucket -- assert mount-at-bucket case reaches here */
1428 return fh_key(name
, name
, tenant
);
1430 std::string key_name
= make_key_name(name
.c_str());
1431 return fh_key(fhk
.fh_hk
.bucket
, key_name
.c_str(), tenant
);
1435 void RGWFileHandle::encode_attrs(ceph::buffer::list
& ux_key1
,
1436 ceph::buffer::list
& ux_attrs1
,
1440 fh_key
fhk(this->fh
.fh_hk
);
1441 encode(fhk
, ux_key1
);
1442 bool need_ondisk_version
=
1443 (fh
.fh_type
== RGW_FS_TYPE_FILE
||
1444 fh
.fh_type
== RGW_FS_TYPE_SYMBOLIC_LINK
);
1445 if (need_ondisk_version
&&
1446 file_ondisk_version
< 0) {
1447 file_ondisk_version
= 0;
1449 encode(*this, ux_attrs1
);
1450 if (need_ondisk_version
&& inc_ov
) {
1451 file_ondisk_version
++;
1453 } /* RGWFileHandle::encode_attrs */
1455 DecodeAttrsResult
RGWFileHandle::decode_attrs(const ceph::buffer::list
* ux_key1
,
1456 const ceph::buffer::list
* ux_attrs1
)
1459 DecodeAttrsResult dar
{ false, false };
1461 auto bl_iter_key1
= ux_key1
->cbegin();
1462 decode(fhk
, bl_iter_key1
);
1465 // decode to a temporary file handle which may not be
1466 // copied to the current file handle if its file_ondisk_version
1468 RGWFileHandle
tmp_fh(fs
);
1469 tmp_fh
.fh
.fh_type
= fh
.fh_type
;
1470 auto bl_iter_unix1
= ux_attrs1
->cbegin();
1471 decode(tmp_fh
, bl_iter_unix1
);
1473 fh
.fh_type
= tmp_fh
.fh
.fh_type
;
1474 // for file handles that represent files and whose file_ondisk_version
1475 // is newer, no updates are need, otherwise, go updating the current
1477 if (!((fh
.fh_type
== RGW_FS_TYPE_FILE
||
1478 fh
.fh_type
== RGW_FS_TYPE_SYMBOLIC_LINK
) &&
1479 file_ondisk_version
>= tmp_fh
.file_ondisk_version
)) {
1480 // make sure the following "encode" always encode a greater version
1481 file_ondisk_version
= tmp_fh
.file_ondisk_version
+ 1;
1482 state
.dev
= tmp_fh
.state
.dev
;
1483 state
.size
= tmp_fh
.state
.size
;
1484 state
.nlink
= tmp_fh
.state
.nlink
;
1485 state
.owner_uid
= tmp_fh
.state
.owner_uid
;
1486 state
.owner_gid
= tmp_fh
.state
.owner_gid
;
1487 state
.unix_mode
= tmp_fh
.state
.unix_mode
;
1488 state
.ctime
= tmp_fh
.state
.ctime
;
1489 state
.mtime
= tmp_fh
.state
.mtime
;
1490 state
.atime
= tmp_fh
.state
.atime
;
1491 state
.version
= tmp_fh
.state
.version
;
1494 if (this->state
.version
< 2) {
1499 } /* RGWFileHandle::decode_attrs */
1501 bool RGWFileHandle::reclaim(const cohort::lru::ObjectFactory
* newobj_fac
) {
1502 lsubdout(fs
->get_context(), rgw
, 17)
1503 << __func__
<< " " << *this
1505 auto factory
= dynamic_cast<const RGWFileHandle::Factory
*>(newobj_fac
);
1506 if (factory
== nullptr) {
1509 /* make sure the reclaiming object is the same partiton with newobject factory,
1510 * then we can recycle the object, and replace with newobject */
1511 if (!fs
->fh_cache
.is_same_partition(factory
->fhk
.fh_hk
.object
, fh
.fh_hk
.object
)) {
1514 /* in the non-delete case, handle may still be in handle table */
1515 if (fh_hook
.is_linked()) {
1516 /* in this case, we are being called from a context which holds
1517 * the partition lock */
1518 fs
->fh_cache
.remove(fh
.fh_hk
.object
, this, FHCache::FLAG_NONE
);
1521 } /* RGWFileHandle::reclaim */
1523 bool RGWFileHandle::has_children() const
1525 if (unlikely(! is_dir()))
1528 RGWRMdirCheck
req(fs
->get_context(),
1529 rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
1531 int rc
= rgwlib
.get_fe()->execute_req(&req
);
1533 return req
.valid
&& req
.has_children
;
1539 std::ostream
& operator<<(std::ostream
&os
,
1540 RGWFileHandle::readdir_offset
const &offset
)
1543 if (unlikely(!! get
<uint64_t*>(&offset
))) {
1544 uint64_t* ioff
= get
<uint64_t*>(offset
);
1548 os
<< get
<const char*>(offset
);
1552 int RGWFileHandle::readdir(rgw_readdir_cb rcb
, void *cb_arg
,
1553 readdir_offset offset
,
1554 bool *eof
, uint32_t flags
)
1556 using event
= RGWLibFS::event
;
1559 struct timespec now
;
1560 CephContext
* cct
= fs
->get_context();
1562 lsubdout(cct
, rgw
, 10)
1563 << __func__
<< " readdir called on "
1567 directory
* d
= get
<directory
>(&variant_type
);
1569 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1570 lock_guard
guard(mtx
);
1571 d
->last_readdir
= now
;
1577 if (likely(!! get
<const char*>(&offset
))) {
1578 mk
= const_cast<char*>(get
<const char*>(offset
));
1581 initial_off
= (*get
<uint64_t*>(offset
) == 0);
1585 RGWListBucketsRequest
req(cct
, rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
1586 this, rcb
, cb_arg
, offset
);
1587 rc
= rgwlib
.get_fe()->execute_req(&req
);
1589 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1590 lock_guard
guard(mtx
);
1594 inc_nlink(req
.d_count
);
1598 RGWReaddirRequest
req(cct
, rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
1599 this, rcb
, cb_arg
, offset
);
1600 rc
= rgwlib
.get_fe()->execute_req(&req
);
1602 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1603 lock_guard
guard(mtx
);
1607 inc_nlink(req
.d_count
);
1612 event
ev(event::type::READDIR
, get_key(), state
.atime
);
1613 lock_guard
sguard(fs
->state
.mtx
);
1614 fs
->state
.push_event(ev
);
1616 lsubdout(fs
->get_context(), rgw
, 15)
1618 << " final link count=" << state
.nlink
1622 } /* RGWFileHandle::readdir */
1624 int RGWFileHandle::write(uint64_t off
, size_t len
, size_t *bytes_written
,
1628 using WriteCompletion
= RGWLibFS::WriteCompletion
;
1630 lock_guard
guard(mtx
);
1634 file
* f
= get
<file
>(&variant_type
);
1639 lsubdout(fs
->get_context(), rgw
, 5)
1641 << " write attempted on deleted object "
1642 << this->object_name()
1644 /* zap write transaction, if any */
1646 delete f
->write_req
;
1647 f
->write_req
= nullptr;
1652 if (! f
->write_req
) {
1653 /* guard--we do not support (e.g., COW-backed) partial writes */
1655 lsubdout(fs
->get_context(), rgw
, 5)
1657 << " " << object_name()
1658 << " non-0 initial write position " << off
1659 << " (mounting with -o sync required)"
1665 std::string object_name
= relative_object_name();
1667 new RGWWriteRequest(rgwlib
.get_store(),
1668 rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
1669 this, bucket_name(), object_name
);
1670 rc
= rgwlib
.get_fe()->start_req(f
->write_req
);
1672 lsubdout(fs
->get_context(), rgw
, 5)
1674 << this->object_name()
1675 << " write start failed " << off
1676 << " (" << rc
<< ")"
1678 /* zap failed write transaction */
1679 delete f
->write_req
;
1680 f
->write_req
= nullptr;
1683 if (stateless_open()) {
1684 /* start write timer */
1685 f
->write_req
->timer_id
=
1686 RGWLibFS::write_timer
.add_event(
1687 std::chrono::seconds(RGWLibFS::write_completion_interval_s
),
1688 WriteCompletion(*this));
1694 if ((static_cast<off_t
>(off
) < f
->write_req
->real_ofs
) &&
1695 ((f
->write_req
->real_ofs
- off
) <= len
)) {
1696 overlap
= f
->write_req
->real_ofs
- off
;
1697 off
= f
->write_req
->real_ofs
;
1698 buffer
= static_cast<char*>(buffer
) + overlap
;
1706 buffer::create_static(len
, static_cast<char*>(buffer
)));
1709 buffer::copy(static_cast<char*>(buffer
), len
));
1712 f
->write_req
->put_data(off
, bl
);
1713 rc
= f
->write_req
->exec_continue();
1716 size_t min_size
= off
+ len
;
1717 if (min_size
> get_size())
1719 if (stateless_open()) {
1720 /* bump write timer */
1721 RGWLibFS::write_timer
.adjust_event(
1722 f
->write_req
->timer_id
, std::chrono::seconds(10));
1725 /* continuation failed (e.g., non-contiguous write position) */
1726 lsubdout(fs
->get_context(), rgw
, 5)
1729 << " failed write at position " << off
1730 << " (fails write transaction) "
1732 /* zap failed write transaction */
1733 delete f
->write_req
;
1734 f
->write_req
= nullptr;
1738 *bytes_written
= (rc
== 0) ? (len
+ overlap
) : 0;
1740 } /* RGWFileHandle::write */
1742 int RGWFileHandle::write_finish(uint32_t flags
)
1744 unique_lock guard
{mtx
, std::defer_lock
};
1747 if (! (flags
& FLAG_LOCKED
)) {
1751 file
* f
= get
<file
>(&variant_type
);
1752 if (f
&& (f
->write_req
)) {
1753 lsubdout(fs
->get_context(), rgw
, 10)
1755 << " finishing write trans on " << object_name()
1757 rc
= rgwlib
.get_fe()->finish_req(f
->write_req
);
1759 rc
= f
->write_req
->get_ret();
1761 delete f
->write_req
;
1762 f
->write_req
= nullptr;
1766 } /* RGWFileHandle::write_finish */
1768 int RGWFileHandle::close()
1770 lock_guard
guard(mtx
);
1772 int rc
= write_finish(FLAG_LOCKED
);
1774 flags
&= ~FLAG_OPEN
;
1775 flags
&= ~FLAG_STATELESS_OPEN
;
1778 } /* RGWFileHandle::close */
1780 RGWFileHandle::file::~file()
1785 void RGWFileHandle::clear_state()
1787 directory
* d
= get
<directory
>(&variant_type
);
1790 d
->last_marker
= rgw_obj_key
{};
1794 void RGWFileHandle::advance_mtime(uint32_t flags
) {
1795 /* intended for use on directories, fast-forward mtime so as to
1796 * ensure a new, higher value for the change attribute */
1797 unique_lock
uniq(mtx
, std::defer_lock
);
1798 if (likely(! (flags
& RGWFileHandle::FLAG_LOCKED
))) {
1802 /* advance mtime only if stored mtime is older than the
1803 * configured namespace expiration */
1804 auto now
= real_clock::now();
1805 auto cmptime
= state
.mtime
;
1807 fs
->get_context()->_conf
->rgw_nfs_namespace_expire_secs
;
1808 if (cmptime
< real_clock::to_timespec(now
)) {
1809 /* sets ctime as well as mtime, to avoid masking updates should
1810 * ctime inexplicably hold a higher value */
1815 void RGWFileHandle::invalidate() {
1816 RGWLibFS
*fs
= get_fs();
1817 if (fs
->invalidate_cb
) {
1818 fs
->invalidate_cb(fs
->invalidate_arg
, get_key().fh_hk
);
1822 int RGWWriteRequest::exec_start() {
1823 struct req_state
* state
= get_state();
1825 /* Object needs a bucket from this point */
1826 state
->object
->set_bucket(state
->bucket
.get());
1828 auto compression_type
=
1829 get_store()->get_zone()->get_params().get_compression_type(
1830 state
->bucket
->get_placement_rule());
1832 /* not obviously supportable */
1833 ceph_assert(! dlo_manifest
);
1834 ceph_assert(! slo_info
);
1836 perfcounter
->inc(l_rgw_put
);
1839 if (state
->object
->empty()) {
1840 ldout(state
->cct
, 0) << __func__
<< " called on empty object" << dendl
;
1844 op_ret
= get_params(null_yield
);
1848 op_ret
= get_system_versioning_params(state
, &olh_epoch
, &version_id
);
1853 /* user-supplied MD5 check skipped (not supplied) */
1854 /* early quota check skipped--we don't have size yet */
1855 /* skipping user-supplied etag--we might have one in future, but
1856 * like data it and other attrs would arrive after open */
1858 aio
.emplace(state
->cct
->_conf
->rgw_put_obj_min_window_size
);
1860 if (state
->bucket
->versioning_enabled()) {
1861 if (!version_id
.empty()) {
1862 state
->object
->set_instance(version_id
);
1864 state
->object
->gen_rand_obj_instance_name();
1865 version_id
= state
->object
->get_instance();
1868 processor
= get_store()->get_atomic_writer(this, state
->yield
, state
->object
->clone(),
1869 state
->bucket_owner
.get_id(), *state
->obj_ctx
,
1870 &state
->dest_placement
, 0, state
->req_id
);
1872 op_ret
= processor
->prepare(state
->yield
);
1874 ldout(state
->cct
, 20) << "processor->prepare() returned ret=" << op_ret
1878 filter
= &*processor
;
1879 if (compression_type
!= "none") {
1880 plugin
= Compressor::create(state
->cct
, compression_type
);
1882 ldout(state
->cct
, 1) << "Cannot load plugin for rgw_compression_type "
1883 << compression_type
<< dendl
;
1885 compressor
.emplace(state
->cct
, plugin
, filter
);
1886 filter
= &*compressor
;
1894 int RGWWriteRequest::exec_continue()
1896 struct req_state
* state
= get_state();
1899 /* check guards (e.g., contig write) */
1901 ldout(state
->cct
, 5)
1902 << " chunks arrived in wrong order"
1903 << " (mounting with -o sync required)"
1908 op_ret
= state
->bucket
->check_quota(this, user_quota
, bucket_quota
, real_ofs
, null_yield
, true);
1909 /* max_size exceed */
1913 size_t len
= data
.length();
1917 hash
.Update((const unsigned char *)data
.c_str(), data
.length());
1918 op_ret
= filter
->process(std::move(data
), ofs
);
1922 bytes_written
+= len
;
1926 } /* exec_continue */
1928 int RGWWriteRequest::exec_finish()
1930 buffer::list bl
, aclbl
, ux_key
, ux_attrs
;
1931 map
<string
, string
>::iterator iter
;
1932 char calc_md5
[CEPH_CRYPTO_MD5_DIGESTSIZE
* 2 + 1];
1933 unsigned char m
[CEPH_CRYPTO_MD5_DIGESTSIZE
];
1934 struct req_state
* state
= get_state();
1936 size_t osize
= rgw_fh
->get_size();
1937 struct timespec octime
= rgw_fh
->get_ctime();
1938 struct timespec omtime
= rgw_fh
->get_mtime();
1939 real_time appx_t
= real_clock::now();
1941 state
->obj_size
= bytes_written
;
1942 perfcounter
->inc(l_rgw_put_b
, state
->obj_size
);
1944 // flush data in filters
1945 op_ret
= filter
->process({}, state
->obj_size
);
1950 op_ret
= state
->bucket
->check_quota(this, user_quota
, bucket_quota
, state
->obj_size
, null_yield
, true);
1951 /* max_size exceed */
1958 if (compressor
&& compressor
->is_compressed()) {
1960 RGWCompressionInfo cs_info
;
1961 cs_info
.compression_type
= plugin
->get_type_name();
1962 cs_info
.orig_size
= state
->obj_size
;
1963 cs_info
.blocks
= std::move(compressor
->get_compression_blocks());
1964 encode(cs_info
, tmp
);
1965 attrs
[RGW_ATTR_COMPRESSION
] = tmp
;
1966 ldpp_dout(this, 20) << "storing " << RGW_ATTR_COMPRESSION
1967 << " with type=" << cs_info
.compression_type
1968 << ", orig_size=" << cs_info
.orig_size
1969 << ", blocks=" << cs_info
.blocks
.size() << dendl
;
1972 buf_to_hex(m
, CEPH_CRYPTO_MD5_DIGESTSIZE
, calc_md5
);
1975 bl
.append(etag
.c_str(), etag
.size() + 1);
1976 emplace_attr(RGW_ATTR_ETAG
, std::move(bl
));
1978 policy
.encode(aclbl
);
1979 emplace_attr(RGW_ATTR_ACL
, std::move(aclbl
));
1982 rgw_fh
->set_mtime(real_clock::to_timespec(appx_t
));
1983 rgw_fh
->set_ctime(real_clock::to_timespec(appx_t
));
1984 rgw_fh
->set_size(bytes_written
);
1985 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
1987 emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
1988 emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
1990 for (iter
= state
->generic_attrs
.begin(); iter
!= state
->generic_attrs
.end();
1992 buffer::list
& attrbl
= attrs
[iter
->first
];
1993 const string
& val
= iter
->second
;
1994 attrbl
.append(val
.c_str(), val
.size() + 1);
1997 op_ret
= rgw_get_request_metadata(this, state
->cct
, state
->info
, attrs
);
2001 encode_delete_at_attr(delete_at
, attrs
);
2003 /* Add a custom metadata to expose the information whether an object
2004 * is an SLO or not. Appending the attribute must be performed AFTER
2005 * processing any input from user in order to prohibit overwriting. */
2006 if (unlikely(!! slo_info
)) {
2007 buffer::list slo_userindicator_bl
;
2009 encode("True", slo_userindicator_bl
);
2010 emplace_attr(RGW_ATTR_SLO_UINDICATOR
, std::move(slo_userindicator_bl
));
2013 op_ret
= processor
->complete(state
->obj_size
, etag
, &mtime
, real_time(), attrs
,
2014 (delete_at
? *delete_at
: real_time()),
2015 if_match
, if_nomatch
, nullptr, nullptr, nullptr,
2018 /* revert attr updates */
2019 rgw_fh
->set_mtime(omtime
);
2020 rgw_fh
->set_ctime(octime
);
2021 rgw_fh
->set_size(osize
);
2025 perfcounter
->tinc(l_rgw_put_lat
, state
->time_elapsed());
2029 } /* namespace rgw */
2034 void rgwfile_version(int *major
, int *minor
, int *extra
)
2037 *major
= LIBRGW_FILE_VER_MAJOR
;
2039 *minor
= LIBRGW_FILE_VER_MINOR
;
2041 *extra
= LIBRGW_FILE_VER_EXTRA
;
2045 attach rgw namespace
2047 int rgw_mount(librgw_t rgw
, const char *uid
, const char *acc_key
,
2048 const char *sec_key
, struct rgw_fs
**rgw_fs
,
2053 /* stash access data for "mount" */
2054 RGWLibFS
* new_fs
= new RGWLibFS(static_cast<CephContext
*>(rgw
), uid
, acc_key
,
2056 ceph_assert(new_fs
);
2058 const DoutPrefix
dp(rgwlib
.get_store()->ctx(), dout_subsys
, "rgw mount: ");
2059 rc
= new_fs
->authorize(&dp
, rgwlib
.get_store());
2065 /* register fs for shared gc */
2066 rgwlib
.get_fe()->get_process()->register_fs(new_fs
);
2068 struct rgw_fs
*fs
= new_fs
->get_fs();
2071 /* XXX we no longer assume "/" is unique, but we aren't tracking the
2079 int rgw_mount2(librgw_t rgw
, const char *uid
, const char *acc_key
,
2080 const char *sec_key
, const char *root
, struct rgw_fs
**rgw_fs
,
2085 /* if the config has no value for path/root, choose "/" */
2086 RGWLibFS
* new_fs
{nullptr};
2088 (!strcmp(root
, ""))) {
2089 /* stash access data for "mount" */
2090 new_fs
= new RGWLibFS(
2091 static_cast<CephContext
*>(rgw
), uid
, acc_key
, sec_key
, "/");
2094 /* stash access data for "mount" */
2095 new_fs
= new RGWLibFS(
2096 static_cast<CephContext
*>(rgw
), uid
, acc_key
, sec_key
, root
);
2099 ceph_assert(new_fs
); /* should we be using ceph_assert? */
2101 const DoutPrefix
dp(rgwlib
.get_store()->ctx(), dout_subsys
, "rgw mount2: ");
2102 rc
= new_fs
->authorize(&dp
, rgwlib
.get_store());
2108 /* register fs for shared gc */
2109 rgwlib
.get_fe()->get_process()->register_fs(new_fs
);
2111 struct rgw_fs
*fs
= new_fs
->get_fs();
2114 /* XXX we no longer assume "/" is unique, but we aren't tracking the
2123 register invalidate callbacks
2125 int rgw_register_invalidate(struct rgw_fs
*rgw_fs
, rgw_fh_callback_t cb
,
2126 void *arg
, uint32_t flags
)
2129 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2130 return fs
->register_invalidate(cb
, arg
, flags
);
2134 detach rgw namespace
2136 int rgw_umount(struct rgw_fs
*rgw_fs
, uint32_t flags
)
2138 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2144 get filesystem attributes
2146 int rgw_statfs(struct rgw_fs
*rgw_fs
,
2147 struct rgw_file_handle
*parent_fh
,
2148 struct rgw_statvfs
*vfs_st
, uint32_t flags
)
2150 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2151 struct rados_cluster_stat_t stats
;
2153 RGWGetClusterStatReq
req(fs
->get_context(),
2154 rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
2156 int rc
= rgwlib
.get_fe()->execute_req(&req
);
2158 lderr(fs
->get_context()) << "ERROR: getting total cluster usage"
2159 << cpp_strerror(-rc
) << dendl
;
2163 //Set block size to 1M.
2164 constexpr uint32_t CEPH_BLOCK_SHIFT
= 20;
2165 vfs_st
->f_bsize
= 1 << CEPH_BLOCK_SHIFT
;
2166 vfs_st
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
2167 vfs_st
->f_blocks
= stats
.kb
>> (CEPH_BLOCK_SHIFT
- 10);
2168 vfs_st
->f_bfree
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
2169 vfs_st
->f_bavail
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
2170 vfs_st
->f_files
= stats
.num_objects
;
2171 vfs_st
->f_ffree
= -1;
2172 vfs_st
->f_fsid
[0] = fs
->get_fsid();
2173 vfs_st
->f_fsid
[1] = fs
->get_fsid();
2175 vfs_st
->f_namemax
= 4096;
2180 generic create -- create an empty regular file
2182 int rgw_create(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
2183 const char *name
, struct stat
*st
, uint32_t mask
,
2184 struct rgw_file_handle
**fh
, uint32_t posix_flags
,
2189 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2190 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2193 (parent
->is_root()) ||
2194 (parent
->is_file())) {
2199 MkObjResult fhr
= fs
->create(parent
, name
, st
, mask
, flags
);
2200 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
2203 *fh
= nfh
->get_fh();
2209 create a symbolic link
2211 int rgw_symlink(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
2212 const char *name
, const char *link_path
, struct stat
*st
, uint32_t mask
,
2213 struct rgw_file_handle
**fh
, uint32_t posix_flags
,
2218 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2219 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2222 (parent
->is_root()) ||
2223 (parent
->is_file())) {
2228 MkObjResult fhr
= fs
->symlink(parent
, name
, link_path
, st
, mask
, flags
);
2229 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
2232 *fh
= nfh
->get_fh();
2238 create a new directory
2240 int rgw_mkdir(struct rgw_fs
*rgw_fs
,
2241 struct rgw_file_handle
*parent_fh
,
2242 const char *name
, struct stat
*st
, uint32_t mask
,
2243 struct rgw_file_handle
**fh
, uint32_t flags
)
2247 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2248 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2255 MkObjResult fhr
= fs
->mkdir(parent
, name
, st
, mask
, flags
);
2256 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
2259 *fh
= nfh
->get_fh();
2267 int rgw_rename(struct rgw_fs
*rgw_fs
,
2268 struct rgw_file_handle
*src
, const char* src_name
,
2269 struct rgw_file_handle
*dst
, const char* dst_name
,
2272 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2274 RGWFileHandle
* src_fh
= get_rgwfh(src
);
2275 RGWFileHandle
* dst_fh
= get_rgwfh(dst
);
2277 return fs
->rename(src_fh
, dst_fh
, src_name
, dst_name
);
2281 remove file or directory
2283 int rgw_unlink(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
2284 const char *name
, uint32_t flags
)
2286 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2287 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2289 return fs
->unlink(parent
, name
);
2293 lookup object by name (POSIX style)
2295 int rgw_lookup(struct rgw_fs
*rgw_fs
,
2296 struct rgw_file_handle
*parent_fh
, const char* path
,
2297 struct rgw_file_handle
**fh
,
2298 struct stat
*st
, uint32_t mask
, uint32_t flags
)
2300 //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2301 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2303 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2305 (! parent
->is_dir())) {
2310 RGWFileHandle
* rgw_fh
;
2313 if (parent
->is_root()) {
2314 /* special: parent lookup--note lack of ref()! */
2315 if (unlikely((strcmp(path
, "..") == 0) ||
2316 (strcmp(path
, "/") == 0))) {
2319 RGWLibFS::BucketStats bstat
;
2320 fhr
= fs
->stat_bucket(parent
, path
, bstat
, RGWFileHandle::FLAG_NONE
);
2321 rgw_fh
= get
<0>(fhr
);
2326 /* special: after readdir--note extra ref()! */
2327 if (unlikely((strcmp(path
, "..") == 0))) {
2329 lsubdout(fs
->get_context(), rgw
, 17)
2330 << __func__
<< " BANG"<< *rgw_fh
2334 enum rgw_fh_type fh_type
= fh_type_of(flags
);
2336 uint32_t sl_flags
= (flags
& RGW_LOOKUP_FLAG_RCB
)
2337 ? RGWFileHandle::FLAG_IN_CB
2338 : RGWFileHandle::FLAG_EXACT_MATCH
;
2340 bool fast_attrs
= fs
->get_context()->_conf
->rgw_nfs_s3_fast_attrs
;
2342 if ((flags
& RGW_LOOKUP_FLAG_RCB
) && fast_attrs
) {
2343 /* FAKE STAT--this should mean, interpolate special
2344 * owner, group, and perms masks */
2345 fhr
= fs
->fake_leaf(parent
, path
, fh_type
, st
, mask
, sl_flags
);
2347 if ((fh_type
== RGW_FS_TYPE_DIRECTORY
) && fast_attrs
) {
2348 /* trust cached dir, if present */
2349 fhr
= fs
->lookup_fh(parent
, path
, RGWFileHandle::FLAG_DIRECTORY
);
2351 rgw_fh
= get
<0>(fhr
);
2355 fhr
= fs
->stat_leaf(parent
, path
, fh_type
, sl_flags
);
2357 if (! get
<0>(fhr
)) {
2358 if (! (flags
& RGW_LOOKUP_FLAG_CREATE
))
2361 fhr
= fs
->lookup_fh(parent
, path
, RGWFileHandle::FLAG_CREATE
);
2363 rgw_fh
= get
<0>(fhr
);
2368 struct rgw_file_handle
*rfh
= rgw_fh
->get_fh();
2375 lookup object by handle (NFS style)
2377 int rgw_lookup_handle(struct rgw_fs
*rgw_fs
, struct rgw_fh_hk
*fh_hk
,
2378 struct rgw_file_handle
**fh
, uint32_t flags
)
2380 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2382 RGWFileHandle
* rgw_fh
= fs
->lookup_handle(*fh_hk
);
2388 struct rgw_file_handle
*rfh
= rgw_fh
->get_fh();
2395 * release file handle
2397 int rgw_fh_rele(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2400 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2401 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2403 lsubdout(fs
->get_context(), rgw
, 17)
2404 << __func__
<< " " << *rgw_fh
2412 get unix attributes for object
2414 int rgw_getattr(struct rgw_fs
*rgw_fs
,
2415 struct rgw_file_handle
*fh
, struct stat
*st
, uint32_t flags
)
2417 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2418 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2420 return fs
->getattr(rgw_fh
, st
);
2424 set unix attributes for object
2426 int rgw_setattr(struct rgw_fs
*rgw_fs
,
2427 struct rgw_file_handle
*fh
, struct stat
*st
,
2428 uint32_t mask
, uint32_t flags
)
2430 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2431 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2433 return fs
->setattr(rgw_fh
, st
, mask
, flags
);
2439 int rgw_truncate(struct rgw_fs
*rgw_fs
,
2440 struct rgw_file_handle
*fh
, uint64_t size
, uint32_t flags
)
2448 int rgw_open(struct rgw_fs
*rgw_fs
,
2449 struct rgw_file_handle
*fh
, uint32_t posix_flags
, uint32_t flags
)
2451 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2454 * need to track specific opens--at least read opens and
2455 * a write open; we need to know when a write open is returned,
2456 * that closes a write transaction
2458 * for now, we will support single-open only, it's preferable to
2459 * anything we can otherwise do without access to the NFS state
2461 if (! rgw_fh
->is_file())
2464 return rgw_fh
->open(flags
);
2470 int rgw_close(struct rgw_fs
*rgw_fs
,
2471 struct rgw_file_handle
*fh
, uint32_t flags
)
2473 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2474 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2475 int rc
= rgw_fh
->close(/* XXX */);
2477 if (flags
& RGW_CLOSE_FLAG_RELE
)
2483 int rgw_readdir(struct rgw_fs
*rgw_fs
,
2484 struct rgw_file_handle
*parent_fh
, uint64_t *offset
,
2485 rgw_readdir_cb rcb
, void *cb_arg
, bool *eof
,
2488 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2494 lsubdout(parent
->get_fs()->get_context(), rgw
, 15)
2496 << " offset=" << *offset
2499 if ((*offset
== 0) &&
2500 (flags
& RGW_READDIR_FLAG_DOTDOT
)) {
2501 /* send '.' and '..' with their NFS-defined offsets */
2502 rcb(".", cb_arg
, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2503 rcb("..", cb_arg
, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2506 int rc
= parent
->readdir(rcb
, cb_arg
, offset
, eof
, flags
);
2510 /* enumeration continuing from name */
2511 int rgw_readdir2(struct rgw_fs
*rgw_fs
,
2512 struct rgw_file_handle
*parent_fh
, const char *name
,
2513 rgw_readdir_cb rcb
, void *cb_arg
, bool *eof
,
2516 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2522 lsubdout(parent
->get_fs()->get_context(), rgw
, 15)
2524 << " offset=" << ((name
) ? name
: "(nil)")
2528 (flags
& RGW_READDIR_FLAG_DOTDOT
)) {
2529 /* send '.' and '..' with their NFS-defined offsets */
2530 rcb(".", cb_arg
, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2531 rcb("..", cb_arg
, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2534 int rc
= parent
->readdir(rcb
, cb_arg
, name
, eof
, flags
);
2536 } /* rgw_readdir2 */
2538 /* project offset of dirent name */
2539 int rgw_dirent_offset(struct rgw_fs
*rgw_fs
,
2540 struct rgw_file_handle
*parent_fh
,
2541 const char *name
, int64_t *offset
,
2544 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2549 std::string sname
{name
};
2550 int rc
= parent
->offset_of(sname
, offset
, flags
);
2557 int rgw_read(struct rgw_fs
*rgw_fs
,
2558 struct rgw_file_handle
*fh
, uint64_t offset
,
2559 size_t length
, size_t *bytes_read
, void *buffer
,
2562 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2563 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2565 return fs
->read(rgw_fh
, offset
, length
, bytes_read
, buffer
, flags
);
2571 int rgw_readlink(struct rgw_fs
*rgw_fs
,
2572 struct rgw_file_handle
*fh
, uint64_t offset
,
2573 size_t length
, size_t *bytes_read
, void *buffer
,
2576 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2577 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2579 return fs
->readlink(rgw_fh
, offset
, length
, bytes_read
, buffer
, flags
);
2585 int rgw_write(struct rgw_fs
*rgw_fs
,
2586 struct rgw_file_handle
*fh
, uint64_t offset
,
2587 size_t length
, size_t *bytes_written
, void *buffer
,
2590 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2595 if (! rgw_fh
->is_file())
2598 if (! rgw_fh
->is_open()) {
2599 if (flags
& RGW_OPEN_FLAG_V3
) {
2600 rc
= rgw_fh
->open(flags
);
2607 rc
= rgw_fh
->write(offset
, length
, bytes_written
, buffer
);
2613 read data from file (vector)
2618 struct rgw_vio
* vio
;
2621 RGWReadV(buffer::list
& _bl
, rgw_vio
* _vio
) : vio(_vio
) {
2622 bl
= std::move(_bl
);
2625 struct rgw_vio
* get_vio() { return vio
; }
2627 const auto& buffers() { return bl
.buffers(); }
2629 unsigned /* XXX */ length() { return bl
.length(); }
2633 void rgw_readv_rele(struct rgw_uio
*uio
, uint32_t flags
)
2635 RGWReadV
* rdv
= static_cast<RGWReadV
*>(uio
->uio_p1
);
2637 ::operator delete(rdv
);
2640 int rgw_readv(struct rgw_fs
*rgw_fs
,
2641 struct rgw_file_handle
*fh
, rgw_uio
*uio
, uint32_t flags
)
2644 CephContext
* cct
= static_cast<CephContext
*>(rgw_fs
->rgw
);
2645 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2646 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2648 if (! rgw_fh
->is_file())
2654 RGWGetObjRequest
req(cct
, fs
->get_user(), rgw_fh
->bucket_name(),
2655 rgw_fh
->object_name(), uio
->uio_offset
, uio
->uio_resid
,
2657 req
.do_hexdump
= false;
2659 rc
= rgwlib
.get_fe()->execute_req(&req
);
2662 RGWReadV
* rdv
= static_cast<RGWReadV
*>(
2663 ::operator new(sizeof(RGWReadV
) +
2664 (bl
.buffers().size() * sizeof(struct rgw_vio
))));
2667 RGWReadV(bl
, reinterpret_cast<rgw_vio
*>(rdv
+sizeof(RGWReadV
)));
2670 uio
->uio_cnt
= rdv
->buffers().size();
2671 uio
->uio_resid
= rdv
->length();
2672 uio
->uio_vio
= rdv
->get_vio();
2673 uio
->uio_rele
= rgw_readv_rele
;
2676 auto& buffers
= rdv
->buffers();
2677 for (auto& bp
: buffers
) {
2678 rgw_vio
*vio
= &(uio
->uio_vio
[ix
]);
2679 vio
->vio_base
= const_cast<char*>(bp
.c_str());
2680 vio
->vio_len
= bp
.length();
2681 vio
->vio_u1
= nullptr;
2682 vio
->vio_p1
= nullptr;
2694 write data to file (vector)
2696 int rgw_writev(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2697 rgw_uio
*uio
, uint32_t flags
)
2700 // not supported - rest of function is ignored
2703 CephContext
* cct
= static_cast<CephContext
*>(rgw_fs
->rgw
);
2704 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2705 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2707 if (! rgw_fh
->is_file())
2711 for (unsigned int ix
= 0; ix
< uio
->uio_cnt
; ++ix
) {
2712 rgw_vio
*vio
= &(uio
->uio_vio
[ix
]);
2714 buffer::create_static(vio
->vio_len
,
2715 static_cast<char*>(vio
->vio_base
)));
2718 std::string oname
= rgw_fh
->relative_object_name();
2719 RGWPutObjRequest
req(cct
, rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
2720 rgw_fh
->bucket_name(), oname
, bl
);
2722 int rc
= rgwlib
.get_fe()->execute_req(&req
);
2724 /* XXX update size (in request) */
2732 int rgw_fsync(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*handle
,
2738 int rgw_commit(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2739 uint64_t offset
, uint64_t length
, uint32_t flags
)
2741 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2743 return rgw_fh
->commit(offset
, length
, RGWFileHandle::FLAG_NONE
);
2750 int rgw_getxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2751 rgw_xattrlist
*attrs
, rgw_getxattr_cb cb
, void *cb_arg
,
2754 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2755 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2757 return fs
->getxattrs(rgw_fh
, attrs
, cb
, cb_arg
, flags
);
2760 int rgw_lsxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2761 rgw_xattrstr
*filter_prefix
/* ignored */,
2762 rgw_getxattr_cb cb
, void *cb_arg
, uint32_t flags
)
2764 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2765 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2767 return fs
->lsxattrs(rgw_fh
, filter_prefix
, cb
, cb_arg
, flags
);
2770 int rgw_setxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2771 rgw_xattrlist
*attrs
, uint32_t flags
)
2773 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2774 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2776 return fs
->setxattrs(rgw_fh
, attrs
, flags
);
2779 int rgw_rmxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2780 rgw_xattrlist
*attrs
, uint32_t flags
)
2782 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2783 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2785 return fs
->rmxattrs(rgw_fh
, attrs
, flags
);