1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
4 #include "include/compat.h"
5 #include "include/rados/rgw_file.h"
11 #include "rgw_resolve.h"
15 #include "rgw_acl_s3.h"
16 #include "rgw_frontend.h"
17 #include "rgw_request.h"
18 #include "rgw_process.h"
19 #include "rgw_rest_user.h"
20 #include "rgw_rest_s3.h"
21 #include "rgw_os_lib.h"
22 #include "rgw_auth_s3.h"
24 #include "rgw_bucket.h"
27 #include "rgw_lib_frontend.h"
28 #include "rgw_perf_counters.h"
29 #include "common/errno.h"
31 #include "services/svc_zone.h"
35 #define dout_subsys ceph_subsys_rgw
44 const string
RGWFileHandle::root_name
= "/";
46 std::atomic
<uint32_t> RGWLibFS::fs_inst_counter
;
48 uint32_t RGWLibFS::write_completion_interval_s
= 10;
50 ceph::timer
<ceph::mono_clock
> RGWLibFS::write_timer
{
51 ceph::construct_suspended
};
53 inline int valid_fs_bucket_name(const string
& name
) {
54 int rc
= valid_s3_bucket_name(name
, false /* relaxed */);
56 if (name
.size() > 255)
63 inline int valid_fs_object_name(const string
& name
) {
64 int rc
= valid_s3_object_name(name
);
66 if (name
.size() > 1024)
76 std::size_t operator()(const rgw_xattrstr
& att
) const noexcept
{
77 return XXH64(att
.val
, att
.len
, 5882300);
84 bool operator()(const rgw_xattrstr
& lhs
, const rgw_xattrstr
& rhs
) const {
85 return ((lhs
.len
== rhs
.len
) &&
86 (strncmp(lhs
.val
, rhs
.val
, lhs
.len
) == 0));
90 /* well-known attributes */
91 static const std::unordered_set
<
92 rgw_xattrstr
, XattrHash
, XattrEqual
> rgw_exposed_attrs
= {
93 rgw_xattrstr
{const_cast<char*>(RGW_ATTR_ETAG
), sizeof(RGW_ATTR_ETAG
)-1}
96 static inline bool is_exposed_attr(const rgw_xattrstr
& k
) {
97 return (rgw_exposed_attrs
.find(k
) != rgw_exposed_attrs
.end());
100 LookupFHResult
RGWLibFS::stat_bucket(RGWFileHandle
* parent
, const char *path
,
101 RGWLibFS::BucketStats
& bs
,
104 LookupFHResult fhr
{nullptr, 0};
105 std::string bucket_name
{path
};
106 RGWStatBucketRequest
req(cct
, user
->clone(), bucket_name
, bs
);
108 int rc
= rgwlib
.get_fe()->execute_req(&req
);
110 (req
.get_ret() == 0) &&
112 fhr
= lookup_fh(parent
, path
,
113 (flags
& RGWFileHandle::FLAG_LOCKED
)|
114 RGWFileHandle::FLAG_CREATE
|
115 RGWFileHandle::FLAG_BUCKET
);
117 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
118 if (! (flags
& RGWFileHandle::FLAG_LOCKED
)) {
121 rgw_fh
->set_times(req
.get_ctime());
122 /* restore attributes */
123 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
124 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
125 if (ux_key
&& ux_attrs
) {
126 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
127 if (get
<0>(dar
) || get
<1>(dar
)) {
131 if (! (flags
& RGWFileHandle::FLAG_LOCKED
)) {
132 rgw_fh
->mtx
.unlock();
139 LookupFHResult
RGWLibFS::fake_leaf(RGWFileHandle
* parent
,
141 enum rgw_fh_type type
,
142 struct stat
*st
, uint32_t st_mask
,
145 /* synthesize a minimal handle from parent, path, type, and st */
148 flags
|= RGWFileHandle::FLAG_CREATE
;
151 case RGW_FS_TYPE_DIRECTORY
:
152 flags
|= RGWFileHandle::FLAG_DIRECTORY
;
159 LookupFHResult fhr
= lookup_fh(parent
, path
, flags
);
161 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
163 lock_guard
guard(rgw_fh
->mtx
);
164 if (st_mask
& RGW_SETATTR_SIZE
) {
165 rgw_fh
->set_size(st
->st_size
);
167 if (st_mask
& RGW_SETATTR_MTIME
) {
168 rgw_fh
->set_times(st
->st_mtim
);
173 } /* RGWLibFS::fake_leaf */
175 LookupFHResult
RGWLibFS::stat_leaf(RGWFileHandle
* parent
,
177 enum rgw_fh_type type
,
180 /* find either-of <object_name>, <object_name/>, only one of
181 * which should exist; atomicity? */
184 LookupFHResult fhr
{nullptr, 0};
186 /* XXX the need for two round-trip operations to identify file or
187 * directory leaf objects is unecessary--the current proposed
188 * mechanism to avoid this is to store leaf object names with an
189 * object locator w/o trailing slash */
191 std::string obj_path
= parent
->format_child_name(path
, false);
193 for (auto ix
: { 0, 1, 2 }) {
198 if (type
== RGW_FS_TYPE_DIRECTORY
)
201 RGWStatObjRequest
req(cct
, user
->clone(),
202 parent
->bucket_name(), obj_path
,
203 RGWStatObjRequest::FLAG_NONE
);
204 int rc
= rgwlib
.get_fe()->execute_req(&req
);
206 (req
.get_ret() == 0)) {
207 fhr
= lookup_fh(parent
, path
, RGWFileHandle::FLAG_CREATE
);
209 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
210 lock_guard
guard(rgw_fh
->mtx
);
211 rgw_fh
->set_size(req
.get_size());
212 rgw_fh
->set_times(req
.get_mtime());
213 /* restore attributes */
214 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
215 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
216 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
217 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
218 if (!(flags
& RGWFileHandle::FLAG_IN_CB
) &&
219 ux_key
&& ux_attrs
) {
220 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
221 if (get
<0>(dar
) || get
<1>(dar
)) {
234 if (type
== RGW_FS_TYPE_FILE
)
238 RGWStatObjRequest
req(cct
, user
->clone(),
239 parent
->bucket_name(), obj_path
,
240 RGWStatObjRequest::FLAG_NONE
);
241 int rc
= rgwlib
.get_fe()->execute_req(&req
);
243 (req
.get_ret() == 0)) {
244 fhr
= lookup_fh(parent
, path
, RGWFileHandle::FLAG_DIRECTORY
);
246 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
247 lock_guard
guard(rgw_fh
->mtx
);
248 rgw_fh
->set_size(req
.get_size());
249 rgw_fh
->set_times(req
.get_mtime());
250 /* restore attributes */
251 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
252 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
253 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
254 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
255 if (!(flags
& RGWFileHandle::FLAG_IN_CB
) &&
256 ux_key
&& ux_attrs
) {
257 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
258 if (get
<0>(dar
) || get
<1>(dar
)) {
269 std::string object_name
{path
};
270 RGWStatLeafRequest
req(cct
, user
->clone(),
271 parent
, object_name
);
272 int rc
= rgwlib
.get_fe()->execute_req(&req
);
274 (req
.get_ret() == 0)) {
276 /* we need rgw object's key name equal to file name, if
278 if ((flags
& RGWFileHandle::FLAG_EXACT_MATCH
) &&
279 !req
.exact_matched
) {
280 lsubdout(get_context(), rgw
, 15)
282 << ": stat leaf not exact match file name = "
286 fhr
= lookup_fh(parent
, path
,
287 RGWFileHandle::FLAG_CREATE
|
289 RGWFileHandle::FLAG_DIRECTORY
:
290 RGWFileHandle::FLAG_NONE
));
291 /* XXX we don't have an object--in general, there need not
292 * be one (just a path segment in some other object). In
293 * actual leaf an object exists, but we'd need another round
294 * trip to get attrs */
296 /* for now use the parent object's mtime */
297 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
298 lock_guard
guard(rgw_fh
->mtx
);
299 rgw_fh
->set_mtime(parent
->get_mtime());
312 } /* RGWLibFS::stat_leaf */
314 int RGWLibFS::read(RGWFileHandle
* rgw_fh
, uint64_t offset
, size_t length
,
315 size_t* bytes_read
, void* buffer
, uint32_t flags
)
317 if (! rgw_fh
->is_file())
320 if (rgw_fh
->deleted())
323 RGWReadRequest
req(get_context(), user
->clone(), rgw_fh
, offset
, length
, buffer
);
325 int rc
= rgwlib
.get_fe()->execute_req(&req
);
327 ((rc
= req
.get_ret()) == 0)) {
328 lock_guard
guard(rgw_fh
->mtx
);
329 rgw_fh
->set_atime(real_clock::to_timespec(real_clock::now()));
330 *bytes_read
= req
.nread
;
336 int RGWLibFS::readlink(RGWFileHandle
* rgw_fh
, uint64_t offset
, size_t length
,
337 size_t* bytes_read
, void* buffer
, uint32_t flags
)
339 if (! rgw_fh
->is_link())
342 if (rgw_fh
->deleted())
345 RGWReadRequest
req(get_context(), user
->clone(), rgw_fh
, offset
, length
, buffer
);
347 int rc
= rgwlib
.get_fe()->execute_req(&req
);
349 ((rc
= req
.get_ret()) == 0)) {
350 lock_guard(rgw_fh
->mtx
);
351 rgw_fh
->set_atime(real_clock::to_timespec(real_clock::now()));
352 *bytes_read
= req
.nread
;
358 int RGWLibFS::unlink(RGWFileHandle
* rgw_fh
, const char* name
, uint32_t flags
)
362 RGWFileHandle
* parent
= nullptr;
363 RGWFileHandle
* bkt_fh
= nullptr;
365 if (unlikely(flags
& RGWFileHandle::FLAG_UNLINK_THIS
)) {
367 parent
= rgw_fh
->get_parent();
371 LookupFHResult fhr
= lookup_fh(parent
, name
, RGWFileHandle::FLAG_LOCK
);
372 rgw_fh
= get
<0>(fhr
);
376 if (parent
->is_root()) {
377 /* a bucket may have an object storing Unix attributes, check
378 * for and delete it */
380 fhr
= stat_bucket(parent
, name
, bs
, (rgw_fh
) ?
381 RGWFileHandle::FLAG_LOCKED
:
382 RGWFileHandle::FLAG_NONE
);
383 bkt_fh
= get
<0>(fhr
);
384 if (unlikely(! bkt_fh
)) {
385 /* implies !rgw_fh, so also !LOCKED */
389 if (bs
.num_entries
> 1) {
390 unref(bkt_fh
); /* return stat_bucket ref */
391 if (likely(!! rgw_fh
)) { /* return lock and ref from
392 * lookup_fh (or caller in the
394 * RGWFileHandle::FLAG_UNLINK_THIS) */
395 rgw_fh
->mtx
.unlock();
400 /* delete object w/key "<bucket>/" (uxattrs), if any */
402 RGWDeleteObjRequest
req(cct
, user
->clone(), bkt_fh
->bucket_name(), oname
);
403 rc
= rgwlib
.get_fe()->execute_req(&req
);
404 /* don't care if ENOENT */
409 RGWDeleteBucketRequest
req(cct
, user
->clone(), bname
);
410 rc
= rgwlib
.get_fe()->execute_req(&req
);
419 /* XXX for now, peform a hard lookup to deduce the type of
420 * object to be deleted ("foo" vs. "foo/")--also, ensures
421 * atomicity at this endpoint */
422 struct rgw_file_handle
*fh
;
423 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &fh
,
424 nullptr /* st */, 0 /* mask */,
425 RGW_LOOKUP_FLAG_NONE
);
430 rgw_fh
= get_rgwfh(fh
);
431 rgw_fh
->mtx
.lock(); /* LOCKED */
434 std::string oname
= rgw_fh
->relative_object_name();
435 if (rgw_fh
->is_dir()) {
436 /* for the duration of our cache timer, trust positive
438 if (rgw_fh
->has_children()) {
439 rgw_fh
->mtx
.unlock();
445 RGWDeleteObjRequest
req(cct
, user
->clone(), parent
->bucket_name(), oname
);
446 rc
= rgwlib
.get_fe()->execute_req(&req
);
452 /* ENOENT when raced with other s3 gateway */
453 if (! rc
|| rc
== -ENOENT
) {
454 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
455 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
456 RGWFileHandle::FHCache::FLAG_LOCK
);
460 real_time t
= real_clock::now();
461 parent
->set_mtime(real_clock::to_timespec(t
));
462 parent
->set_ctime(real_clock::to_timespec(t
));
465 rgw_fh
->mtx
.unlock();
469 } /* RGWLibFS::unlink */
471 int RGWLibFS::rename(RGWFileHandle
* src_fh
, RGWFileHandle
* dst_fh
,
472 const char *_src_name
, const char *_dst_name
)
475 /* XXX initial implementation: try-copy, and delete if copy
480 std::string src_name
{_src_name
};
481 std::string dst_name
{_dst_name
};
484 LookupFHResult fhr
= lookup_fh(src_fh
, _src_name
, RGWFileHandle::FLAG_LOCK
);
485 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
487 /* should not happen */
489 ldout(get_context(), 0) << __func__
490 << " BUG no such src renaming path="
496 /* forbid renaming of directories (unreasonable at scale) */
497 if (rgw_fh
->is_dir()) {
498 ldout(get_context(), 12) << __func__
499 << " rejecting attempt to rename directory path="
500 << rgw_fh
->full_object_name()
506 /* forbid renaming open files (violates intent, for now) */
507 if (rgw_fh
->is_open()) {
508 ldout(get_context(), 12) << __func__
509 << " rejecting attempt to rename open file path="
510 << rgw_fh
->full_object_name()
516 t
= real_clock::now();
518 for (int ix
: {0, 1}) {
522 RGWCopyObjRequest
req(cct
, user
->clone(), src_fh
, dst_fh
, src_name
, dst_name
);
523 int rc
= rgwlib
.get_fe()->execute_req(&req
);
525 ((rc
= req
.get_ret()) != 0)) {
526 ldout(get_context(), 1)
528 << " rename step 0 failed src="
529 << src_fh
->full_object_name() << " " << src_name
530 << " dst=" << dst_fh
->full_object_name()
536 ldout(get_context(), 12)
538 << " rename step 0 success src="
539 << src_fh
->full_object_name() << " " << src_name
540 << " dst=" << dst_fh
->full_object_name()
544 /* update dst change id */
545 dst_fh
->set_times(t
);
550 rc
= this->unlink(rgw_fh
/* LOCKED */, _src_name
,
551 RGWFileHandle::FLAG_UNLINK_THIS
);
554 ldout(get_context(), 12)
556 << " rename step 1 success src="
557 << src_fh
->full_object_name() << " " << src_name
558 << " dst=" << dst_fh
->full_object_name()
562 /* update src change id */
563 src_fh
->set_times(t
);
565 ldout(get_context(), 1)
567 << " rename step 1 failed src="
568 << src_fh
->full_object_name() << " " << src_name
569 << " dst=" << dst_fh
->full_object_name()
581 rgw_fh
->mtx
.unlock(); /* !LOCKED */
582 unref(rgw_fh
); /* -ref */
586 } /* RGWLibFS::rename */
588 MkObjResult
RGWLibFS::mkdir(RGWFileHandle
* parent
, const char *name
,
589 struct stat
*st
, uint32_t mask
, uint32_t flags
)
592 rgw_file_handle
*lfh
;
594 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
595 nullptr /* st */, 0 /* mask */,
596 RGW_LOOKUP_FLAG_NONE
);
599 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
600 // ignore return code
601 return MkObjResult
{nullptr, -EEXIST
};
604 MkObjResult mkr
{nullptr, -EINVAL
};
606 RGWFileHandle
* rgw_fh
= nullptr;
607 buffer::list ux_key
, ux_attrs
;
609 fhr
= lookup_fh(parent
, name
,
610 RGWFileHandle::FLAG_CREATE
|
611 RGWFileHandle::FLAG_DIRECTORY
|
612 RGWFileHandle::FLAG_LOCK
);
613 rgw_fh
= get
<0>(fhr
);
615 rgw_fh
->create_stat(st
, mask
);
616 rgw_fh
->set_times(real_clock::now());
618 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
620 rgw_fh
->stat(st
, RGWFileHandle::FLAG_LOCKED
);
621 get
<0>(mkr
) = rgw_fh
;
627 if (parent
->is_root()) {
630 /* enforce S3 name restrictions */
631 rc
= valid_fs_bucket_name(bname
);
633 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
634 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
635 RGWFileHandle::FHCache::FLAG_LOCK
);
636 rgw_fh
->mtx
.unlock();
638 get
<0>(mkr
) = nullptr;
643 RGWCreateBucketRequest
req(get_context(), user
->clone(), bname
);
646 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
647 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
649 rc
= rgwlib
.get_fe()->execute_req(&req
);
652 /* create an object representing the directory */
654 string dir_name
= parent
->format_child_name(name
, true);
656 /* need valid S3 name (characters, length <= 1024, etc) */
657 rc
= valid_fs_object_name(dir_name
);
659 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
660 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
661 RGWFileHandle::FHCache::FLAG_LOCK
);
662 rgw_fh
->mtx
.unlock();
664 get
<0>(mkr
) = nullptr;
669 RGWPutObjRequest
req(get_context(), user
->clone(), parent
->bucket_name(), dir_name
, bl
);
672 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
673 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
675 rc
= rgwlib
.get_fe()->execute_req(&req
);
682 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
683 rgw_fh
->mtx
.unlock(); /* !LOCKED */
685 get
<0>(mkr
) = nullptr;
690 real_time t
= real_clock::now();
691 parent
->set_mtime(real_clock::to_timespec(t
));
692 parent
->set_ctime(real_clock::to_timespec(t
));
693 rgw_fh
->mtx
.unlock(); /* !LOCKED */
699 } /* RGWLibFS::mkdir */
701 MkObjResult
RGWLibFS::create(RGWFileHandle
* parent
, const char *name
,
702 struct stat
*st
, uint32_t mask
, uint32_t flags
)
708 rgw_file_handle
*lfh
;
709 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
710 nullptr /* st */, 0 /* mask */,
711 RGW_LOOKUP_FLAG_NONE
);
714 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
715 // ignore return code
716 return MkObjResult
{nullptr, -EEXIST
};
719 /* expand and check name */
720 std::string obj_name
= parent
->format_child_name(name
, false);
721 rc
= valid_fs_object_name(obj_name
);
723 return MkObjResult
{nullptr, rc
};
728 RGWPutObjRequest
req(cct
, user
->clone(), parent
->bucket_name(), obj_name
, bl
);
729 MkObjResult mkr
{nullptr, -EINVAL
};
731 rc
= rgwlib
.get_fe()->execute_req(&req
);
737 LookupFHResult fhr
= lookup_fh(parent
, name
, RGWFileHandle::FLAG_CREATE
|
738 RGWFileHandle::FLAG_LOCK
);
739 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
741 if (get
<1>(fhr
) & RGWFileHandle::FLAG_CREATE
) {
742 /* fill in stat data */
743 real_time t
= real_clock::now();
744 rgw_fh
->create_stat(st
, mask
);
745 rgw_fh
->set_times(t
);
747 parent
->set_mtime(real_clock::to_timespec(t
));
748 parent
->set_ctime(real_clock::to_timespec(t
));
751 (void) rgw_fh
->stat(st
, RGWFileHandle::FLAG_LOCKED
);
753 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
754 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
756 get
<0>(mkr
) = rgw_fh
;
757 rgw_fh
->mtx
.unlock();
764 /* case like : quota exceed will be considered as fail too*/
769 } /* RGWLibFS::create */
771 MkObjResult
RGWLibFS::symlink(RGWFileHandle
* parent
, const char *name
,
772 const char* link_path
, struct stat
*st
, uint32_t mask
, uint32_t flags
)
778 rgw_file_handle
*lfh
;
779 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
780 nullptr /* st */, 0 /* mask */,
781 RGW_LOOKUP_FLAG_NONE
);
784 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
785 // ignore return code
786 return MkObjResult
{nullptr, -EEXIST
};
789 MkObjResult mkr
{nullptr, -EINVAL
};
791 RGWFileHandle
* rgw_fh
= nullptr;
792 buffer::list ux_key
, ux_attrs
;
794 fhr
= lookup_fh(parent
, name
,
795 RGWFileHandle::FLAG_CREATE
|
796 RGWFileHandle::FLAG_SYMBOLIC_LINK
|
797 RGWFileHandle::FLAG_LOCK
);
798 rgw_fh
= get
<0>(fhr
);
800 rgw_fh
->create_stat(st
, mask
);
801 rgw_fh
->set_times(real_clock::now());
803 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
806 get
<0>(mkr
) = rgw_fh
;
812 /* need valid S3 name (characters, length <= 1024, etc) */
813 rc
= valid_fs_object_name(name
);
815 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
816 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
817 RGWFileHandle::FHCache::FLAG_LOCK
);
818 rgw_fh
->mtx
.unlock();
820 get
<0>(mkr
) = nullptr;
825 string obj_name
= std::string(name
);
826 /* create an object representing the directory */
832 buffer::create_static(len
, static_cast<char*>(buffer
)));
836 buffer::copy(link_path
, strlen(link_path
)));
839 RGWPutObjRequest
req(get_context(), user
->clone(), parent
->bucket_name(), obj_name
, bl
);
842 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
843 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
845 rc
= rgwlib
.get_fe()->execute_req(&req
);
850 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
851 rgw_fh
->mtx
.unlock(); /* !LOCKED */
853 get
<0>(mkr
) = nullptr;
858 real_time t
= real_clock::now();
859 parent
->set_mtime(real_clock::to_timespec(t
));
860 parent
->set_ctime(real_clock::to_timespec(t
));
861 rgw_fh
->mtx
.unlock(); /* !LOCKED */
867 } /* RGWLibFS::symlink */
869 int RGWLibFS::getattr(RGWFileHandle
* rgw_fh
, struct stat
* st
)
871 switch(rgw_fh
->fh
.fh_type
) {
872 case RGW_FS_TYPE_FILE
:
874 if (rgw_fh
->deleted())
881 /* if rgw_fh is a directory, mtime will be advanced */
882 return rgw_fh
->stat(st
);
883 } /* RGWLibFS::getattr */
885 int RGWLibFS::setattr(RGWFileHandle
* rgw_fh
, struct stat
* st
, uint32_t mask
,
889 buffer::list ux_key
, ux_attrs
;
890 buffer::list etag
= rgw_fh
->get_etag();
891 buffer::list acls
= rgw_fh
->get_acls();
893 lock_guard
guard(rgw_fh
->mtx
);
895 switch(rgw_fh
->fh
.fh_type
) {
896 case RGW_FS_TYPE_FILE
:
898 if (rgw_fh
->deleted())
906 string obj_name
{rgw_fh
->relative_object_name()};
908 if (rgw_fh
->is_dir() &&
909 (likely(! rgw_fh
->is_bucket()))) {
913 RGWSetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
915 rgw_fh
->create_stat(st
, mask
);
916 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
919 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
920 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
921 req
.emplace_attr(RGW_ATTR_ETAG
, std::move(etag
));
922 req
.emplace_attr(RGW_ATTR_ACL
, std::move(acls
));
924 rc
= rgwlib
.get_fe()->execute_req(&req
);
928 /* special case: materialize placeholder dir */
930 RGWPutObjRequest
req(get_context(), user
->clone(), rgw_fh
->bucket_name(), obj_name
, bl
);
932 rgw_fh
->encode_attrs(ux_key
, ux_attrs
); /* because std::moved */
935 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
936 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
938 rc
= rgwlib
.get_fe()->execute_req(&req
);
942 if ((rc
!= 0) || (rc2
!= 0)) {
946 rgw_fh
->set_ctime(real_clock::to_timespec(real_clock::now()));
949 } /* RGWLibFS::setattr */
951 static inline std::string
prefix_xattr_keystr(const rgw_xattrstr
& key
) {
953 keystr
.reserve(sizeof(RGW_ATTR_META_PREFIX
) + key
.len
);
954 keystr
+= string
{RGW_ATTR_META_PREFIX
};
955 keystr
+= string
{key
.val
, key
.len
};
959 static inline std::string_view
unprefix_xattr_keystr(const std::string
& key
)
961 std::string_view svk
{key
};
962 auto pos
= svk
.find(RGW_ATTR_META_PREFIX
);
963 if (pos
== std::string_view::npos
) {
964 return std::string_view
{""};
965 } else if (pos
== 0) {
966 svk
.remove_prefix(sizeof(RGW_ATTR_META_PREFIX
)-1);
971 int RGWLibFS::getxattrs(RGWFileHandle
* rgw_fh
, rgw_xattrlist
*attrs
,
972 rgw_getxattr_cb cb
, void *cb_arg
,
975 /* cannot store on fs_root, should not on buckets? */
976 if ((rgw_fh
->is_bucket()) ||
977 (rgw_fh
->is_root())) {
982 string obj_name
{rgw_fh
->relative_object_name2()};
984 RGWGetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
986 for (uint32_t ix
= 0; ix
< attrs
->xattr_cnt
; ++ix
) {
987 auto& xattr
= attrs
->xattrs
[ix
];
989 /* pass exposed attr keys as given, else prefix */
990 std::string k
= is_exposed_attr(xattr
.key
)
991 ? std::string
{xattr
.key
.val
, xattr
.key
.len
}
992 : prefix_xattr_keystr(xattr
.key
);
994 req
.emplace_key(std::move(k
));
997 if (ldlog_p1(get_context(), ceph_subsys_rgw
, 15)) {
998 lsubdout(get_context(), rgw
, 15)
1000 << " get keys for: "
1001 << rgw_fh
->object_name()
1004 for (const auto& attr
: req
.get_attrs()) {
1005 lsubdout(get_context(), rgw
, 15)
1006 << "\tkey: " << attr
.first
<< dendl
;
1010 rc
= rgwlib
.get_fe()->execute_req(&req
);
1011 rc2
= req
.get_ret();
1012 rc3
= ((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
;
1014 /* call back w/xattr data */
1016 const auto& attrs
= req
.get_attrs();
1017 for (const auto& attr
: attrs
) {
1019 if (!attr
.second
.has_value())
1022 const auto& k
= attr
.first
;
1023 const auto& v
= attr
.second
.value();
1025 /* return exposed attr keys as given, else unprefix --
1026 * yes, we could have memoized the exposed check, but
1027 * to be efficient it would need to be saved with
1028 * RGWGetAttrs::attrs, I think */
1029 std::string_view svk
=
1030 is_exposed_attr(rgw_xattrstr
{const_cast<char*>(k
.c_str()),
1031 uint32_t(k
.length())})
1033 : unprefix_xattr_keystr(k
);
1035 /* skip entries not matching prefix */
1039 rgw_xattrstr xattr_k
= { const_cast<char*>(svk
.data()),
1040 uint32_t(svk
.length())};
1041 rgw_xattrstr xattr_v
=
1042 {const_cast<char*>(const_cast<buffer::list
&>(v
).c_str()),
1043 uint32_t(v
.length())};
1044 rgw_xattr xattr
= { xattr_k
, xattr_v
};
1045 rgw_xattrlist xattrlist
= { &xattr
, 1 };
1047 cb(&xattrlist
, cb_arg
, RGW_GETXATTR_FLAG_NONE
);
1052 } /* RGWLibFS::getxattrs */
1054 int RGWLibFS::lsxattrs(
1055 RGWFileHandle
* rgw_fh
, rgw_xattrstr
*filter_prefix
, rgw_getxattr_cb cb
,
1056 void *cb_arg
, uint32_t flags
)
1058 /* cannot store on fs_root, should not on buckets? */
1059 if ((rgw_fh
->is_bucket()) ||
1060 (rgw_fh
->is_root())) {
1065 string obj_name
{rgw_fh
->relative_object_name2()};
1067 RGWGetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1069 rc
= rgwlib
.get_fe()->execute_req(&req
);
1070 rc2
= req
.get_ret();
1071 rc3
= ((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
;
1073 /* call back w/xattr data--check for eof */
1075 const auto& keys
= req
.get_attrs();
1076 for (const auto& k
: keys
) {
1078 /* return exposed attr keys as given, else unprefix */
1079 std::string_view svk
=
1080 is_exposed_attr(rgw_xattrstr
{const_cast<char*>(k
.first
.c_str()),
1081 uint32_t(k
.first
.length())})
1083 : unprefix_xattr_keystr(k
.first
);
1085 /* skip entries not matching prefix */
1089 rgw_xattrstr xattr_k
= { const_cast<char*>(svk
.data()),
1090 uint32_t(svk
.length())};
1091 rgw_xattrstr xattr_v
= { nullptr, 0 };
1092 rgw_xattr xattr
= { xattr_k
, xattr_v
};
1093 rgw_xattrlist xattrlist
= { &xattr
, 1 };
1095 auto cbr
= cb(&xattrlist
, cb_arg
, RGW_LSXATTR_FLAG_NONE
);
1096 if (cbr
& RGW_LSXATTR_FLAG_STOP
)
1102 } /* RGWLibFS::lsxattrs */
1104 int RGWLibFS::setxattrs(RGWFileHandle
* rgw_fh
, rgw_xattrlist
*attrs
,
1107 /* cannot store on fs_root, should not on buckets? */
1108 if ((rgw_fh
->is_bucket()) ||
1109 (rgw_fh
->is_root())) {
1114 string obj_name
{rgw_fh
->relative_object_name2()};
1116 RGWSetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1118 for (uint32_t ix
= 0; ix
< attrs
->xattr_cnt
; ++ix
) {
1119 auto& xattr
= attrs
->xattrs
[ix
];
1120 buffer::list attr_bl
;
1121 /* don't allow storing at RGW_ATTR_META_PREFIX */
1122 if (! (xattr
.key
.len
> 0))
1125 /* reject lexical match with any exposed attr */
1126 if (is_exposed_attr(xattr
.key
))
1129 string k
= prefix_xattr_keystr(xattr
.key
);
1130 attr_bl
.append(xattr
.val
.val
, xattr
.val
.len
);
1131 req
.emplace_attr(k
.c_str(), std::move(attr_bl
));
1134 /* don't send null requests */
1135 if (! (req
.get_attrs().size() > 0)) {
1139 rc
= rgwlib
.get_fe()->execute_req(&req
);
1140 rc2
= req
.get_ret();
1142 return (((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
);
1144 } /* RGWLibFS::setxattrs */
1146 int RGWLibFS::rmxattrs(RGWFileHandle
* rgw_fh
, rgw_xattrlist
* attrs
,
1149 /* cannot store on fs_root, should not on buckets? */
1150 if ((rgw_fh
->is_bucket()) ||
1151 (rgw_fh
->is_root())) {
1156 string obj_name
{rgw_fh
->relative_object_name2()};
1158 RGWRMAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1160 for (uint32_t ix
= 0; ix
< attrs
->xattr_cnt
; ++ix
) {
1161 auto& xattr
= attrs
->xattrs
[ix
];
1162 /* don't allow storing at RGW_ATTR_META_PREFIX */
1163 if (! (xattr
.key
.len
> 0)) {
1166 string k
= prefix_xattr_keystr(xattr
.key
);
1167 req
.emplace_key(std::move(k
));
1170 /* don't send null requests */
1171 if (! (req
.get_attrs().size() > 0)) {
1175 rc
= rgwlib
.get_fe()->execute_req(&req
);
1176 rc2
= req
.get_ret();
1178 return (((rc
== 0) && (rc2
== 0)) ? 0 : -EIO
);
1180 } /* RGWLibFS::rmxattrs */
1182 /* called with rgw_fh->mtx held */
1183 void RGWLibFS::update_fh(RGWFileHandle
*rgw_fh
)
1186 string obj_name
{rgw_fh
->relative_object_name()};
1187 buffer::list ux_key
, ux_attrs
;
1189 if (rgw_fh
->is_dir() &&
1190 (likely(! rgw_fh
->is_bucket()))) {
1194 lsubdout(get_context(), rgw
, 17)
1196 << " update old versioned fh : " << obj_name
1199 RGWSetAttrsRequest
req(cct
, user
->clone(), rgw_fh
->bucket_name(), obj_name
);
1201 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
1203 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
1204 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
1206 rc
= rgwlib
.get_fe()->execute_req(&req
);
1207 rc2
= req
.get_ret();
1209 if ((rc
!= 0) || (rc2
!= 0)) {
1210 lsubdout(get_context(), rgw
, 17)
1212 << " update fh failed : " << obj_name
1215 } /* RGWLibFS::update_fh */
1217 void RGWLibFS::close()
1219 state
.flags
|= FLAG_CLOSED
;
1225 explicit ObjUnref(RGWLibFS
* _fs
) : fs(_fs
) {}
1226 void operator()(RGWFileHandle
* fh
) const {
1227 lsubdout(fs
->get_context(), rgw
, 5)
1228 << __PRETTY_FUNCTION__
1230 << " before ObjUnref refs=" << fh
->get_refcnt()
1236 /* force cache drain, forces objects to evict */
1237 fh_cache
.drain(ObjUnref(this),
1238 RGWFileHandle::FHCache::FLAG_LOCK
);
1239 rgwlib
.get_fe()->get_process()->unregister_fs(this);
1241 } /* RGWLibFS::close */
1243 inline std::ostream
& operator<<(std::ostream
&os
, fh_key
const &fhk
) {
1244 os
<< "<fh_key: bucket=";
1245 os
<< fhk
.fh_hk
.bucket
;
1247 os
<< fhk
.fh_hk
.object
;
1252 inline std::ostream
& operator<<(std::ostream
&os
, struct timespec
const &ts
) {
1253 os
<< "<timespec: tv_sec=";
1261 std::ostream
& operator<<(std::ostream
&os
, RGWLibFS::event
const &ev
) {
1264 case RGWLibFS::event::type::READDIR
:
1265 os
<< "type=READDIR;";
1268 os
<< "type=UNKNOWN;";
1271 os
<< "fid=" << ev
.fhk
.fh_hk
.bucket
<< ":" << ev
.fhk
.fh_hk
.object
1272 << ";ts=" << ev
.ts
<< ">";
1279 using directory
= RGWFileHandle::directory
;
1281 /* dirent invalidate timeout--basically, the upper-bound on
1282 * inconsistency with the S3 namespace */
1284 = get_context()->_conf
->rgw_nfs_namespace_expire_secs
;
1286 /* max events to gc in one cycle */
1287 uint32_t max_ev
= get_context()->_conf
->rgw_nfs_max_gc
;
1289 struct timespec now
, expire_ts
;
1292 std::deque
<event
> &events
= state
.events
;
1295 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
);
1296 lsubdout(get_context(), rgw
, 15)
1297 << "GC: top of expire loop"
1299 << " expire_s=" << expire_s
1302 lock_guard
guard(state
.mtx
); /* LOCKED */
1303 lsubdout(get_context(), rgw
, 15)
1305 << " count=" << events
.size()
1308 /* just return if no events */
1309 if (events
.empty()) {
1313 (events
.size() < 500) ? max_ev
: (events
.size() / 4);
1314 for (uint32_t ix
= 0; (ix
< _max_ev
) && (events
.size() > 0); ++ix
) {
1315 event
& ev
= events
.front();
1317 expire_ts
.tv_sec
+= expire_s
;
1318 if (expire_ts
> now
) {
1327 for (auto& ev
: ve
) {
1328 lsubdout(get_context(), rgw
, 15)
1329 << "try-expire ev: " << ev
<< dendl
;
1330 if (likely(ev
.t
== event::type::READDIR
)) {
1331 RGWFileHandle
* rgw_fh
= lookup_handle(ev
.fhk
.fh_hk
);
1332 lsubdout(get_context(), rgw
, 15)
1333 << "ev rgw_fh: " << rgw_fh
<< dendl
;
1335 RGWFileHandle::directory
* d
;
1336 if (unlikely(! rgw_fh
->is_dir())) {
1337 lsubdout(get_context(), rgw
, 0)
1339 << " BUG non-directory found with READDIR event "
1340 << "(" << rgw_fh
->bucket_name() << ","
1341 << rgw_fh
->object_name() << ")"
1345 /* maybe clear state */
1346 d
= get
<directory
>(&rgw_fh
->variant_type
);
1348 struct timespec ev_ts
= ev
.ts
;
1349 lock_guard
guard(rgw_fh
->mtx
);
1350 struct timespec d_last_readdir
= d
->last_readdir
;
1351 if (unlikely(ev_ts
< d_last_readdir
)) {
1352 /* readdir cycle in progress, don't invalidate */
1353 lsubdout(get_context(), rgw
, 15)
1354 << "GC: delay expiration for "
1355 << rgw_fh
->object_name()
1356 << " ev.ts=" << ev_ts
1357 << " last_readdir=" << d_last_readdir
1361 lsubdout(get_context(), rgw
, 15)
1363 << rgw_fh
->object_name()
1365 rgw_fh
->clear_state();
1366 rgw_fh
->invalidate();
1372 } /* event::type::READDIR */
1375 } while (! (stop
|| shutdown
));
1376 } /* RGWLibFS::gc */
1378 std::ostream
& operator<<(std::ostream
&os
,
1379 RGWFileHandle
const &rgw_fh
)
1381 const auto& fhk
= rgw_fh
.get_key();
1382 const auto& fh
= const_cast<RGWFileHandle
&>(rgw_fh
).get_fh();
1383 os
<< "<RGWFileHandle:";
1384 os
<< "addr=" << &rgw_fh
<< ";";
1385 switch (fh
->fh_type
) {
1386 case RGW_FS_TYPE_DIRECTORY
:
1387 os
<< "type=DIRECTORY;";
1389 case RGW_FS_TYPE_FILE
:
1393 os
<< "type=UNKNOWN;";
1396 os
<< "fid=" << fhk
.fh_hk
.bucket
<< ":" << fhk
.fh_hk
.object
<< ";";
1397 os
<< "name=" << rgw_fh
.object_name() << ";";
1398 os
<< "refcnt=" << rgw_fh
.get_refcnt() << ";";
1403 RGWFileHandle::~RGWFileHandle() {
1404 /* !recycle case, handle may STILL be in handle table, BUT
1405 * the partition lock is not held in this path */
1406 if (fh_hook
.is_linked()) {
1407 fs
->fh_cache
.remove(fh
.fh_hk
.object
, this, FHCache::FLAG_LOCK
);
1409 /* cond-unref parent */
1410 if (parent
&& (! parent
->is_mount())) {
1411 /* safe because if parent->unref causes its deletion,
1412 * there are a) by refcnt, no other objects/paths pointing
1413 * to it and b) by the semantics of valid iteration of
1414 * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
1415 * no unsafe iterators reaching it either--n.b., this constraint
1416 * is binding oncode which may in future attempt to e.g.,
1417 * cause the eviction of objects in LRU order */
1418 (void) get_fs()->unref(parent
);
1422 fh_key
RGWFileHandle::make_fhk(const std::string
& name
)
1424 std::string tenant
= get_fs()->get_user()->user_id
.to_str();
1426 /* S3 bucket -- assert mount-at-bucket case reaches here */
1427 return fh_key(name
, name
, tenant
);
1429 std::string key_name
= make_key_name(name
.c_str());
1430 return fh_key(fhk
.fh_hk
.bucket
, key_name
.c_str(), tenant
);
1434 void RGWFileHandle::encode_attrs(ceph::buffer::list
& ux_key1
,
1435 ceph::buffer::list
& ux_attrs1
)
1438 fh_key
fhk(this->fh
.fh_hk
);
1439 encode(fhk
, ux_key1
);
1440 encode(*this, ux_attrs1
);
1441 } /* RGWFileHandle::encode_attrs */
1443 DecodeAttrsResult
RGWFileHandle::decode_attrs(const ceph::buffer::list
* ux_key1
,
1444 const ceph::buffer::list
* ux_attrs1
)
1447 DecodeAttrsResult dar
{ false, false };
1449 auto bl_iter_key1
= ux_key1
->cbegin();
1450 decode(fhk
, bl_iter_key1
);
1453 auto bl_iter_unix1
= ux_attrs1
->cbegin();
1454 decode(*this, bl_iter_unix1
);
1455 if (this->state
.version
< 2) {
1460 } /* RGWFileHandle::decode_attrs */
1462 bool RGWFileHandle::reclaim(const cohort::lru::ObjectFactory
* newobj_fac
) {
1463 lsubdout(fs
->get_context(), rgw
, 17)
1464 << __func__
<< " " << *this
1466 auto factory
= dynamic_cast<const RGWFileHandle::Factory
*>(newobj_fac
);
1467 if (factory
== nullptr) {
1470 /* make sure the reclaiming object is the same partiton with newobject factory,
1471 * then we can recycle the object, and replace with newobject */
1472 if (!fs
->fh_cache
.is_same_partition(factory
->fhk
.fh_hk
.object
, fh
.fh_hk
.object
)) {
1475 /* in the non-delete case, handle may still be in handle table */
1476 if (fh_hook
.is_linked()) {
1477 /* in this case, we are being called from a context which holds
1478 * the partition lock */
1479 fs
->fh_cache
.remove(fh
.fh_hk
.object
, this, FHCache::FLAG_NONE
);
1482 } /* RGWFileHandle::reclaim */
1484 bool RGWFileHandle::has_children() const
1486 if (unlikely(! is_dir()))
1489 RGWRMdirCheck
req(fs
->get_context(),
1490 rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
1492 int rc
= rgwlib
.get_fe()->execute_req(&req
);
1494 return req
.valid
&& req
.has_children
;
1500 std::ostream
& operator<<(std::ostream
&os
,
1501 RGWFileHandle::readdir_offset
const &offset
)
1504 if (unlikely(!! get
<uint64_t*>(&offset
))) {
1505 uint64_t* ioff
= get
<uint64_t*>(offset
);
1509 os
<< get
<const char*>(offset
);
1513 int RGWFileHandle::readdir(rgw_readdir_cb rcb
, void *cb_arg
,
1514 readdir_offset offset
,
1515 bool *eof
, uint32_t flags
)
1517 using event
= RGWLibFS::event
;
1520 struct timespec now
;
1521 CephContext
* cct
= fs
->get_context();
1523 lsubdout(cct
, rgw
, 10)
1524 << __func__
<< " readdir called on "
1528 directory
* d
= get
<directory
>(&variant_type
);
1530 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1531 lock_guard
guard(mtx
);
1532 d
->last_readdir
= now
;
1538 if (likely(!! get
<const char*>(&offset
))) {
1539 mk
= const_cast<char*>(get
<const char*>(offset
));
1542 initial_off
= (*get
<uint64_t*>(offset
) == 0);
1546 RGWListBucketsRequest
req(cct
, rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
1547 this, rcb
, cb_arg
, offset
);
1548 rc
= rgwlib
.get_fe()->execute_req(&req
);
1550 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1551 lock_guard
guard(mtx
);
1555 inc_nlink(req
.d_count
);
1559 RGWReaddirRequest
req(cct
, rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
1560 this, rcb
, cb_arg
, offset
);
1561 rc
= rgwlib
.get_fe()->execute_req(&req
);
1563 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1564 lock_guard
guard(mtx
);
1568 inc_nlink(req
.d_count
);
1573 event
ev(event::type::READDIR
, get_key(), state
.atime
);
1574 lock_guard
sguard(fs
->state
.mtx
);
1575 fs
->state
.push_event(ev
);
1577 lsubdout(fs
->get_context(), rgw
, 15)
1579 << " final link count=" << state
.nlink
1583 } /* RGWFileHandle::readdir */
1585 int RGWFileHandle::write(uint64_t off
, size_t len
, size_t *bytes_written
,
1589 using WriteCompletion
= RGWLibFS::WriteCompletion
;
1591 lock_guard
guard(mtx
);
1595 file
* f
= get
<file
>(&variant_type
);
1600 lsubdout(fs
->get_context(), rgw
, 5)
1602 << " write attempted on deleted object "
1603 << this->object_name()
1605 /* zap write transaction, if any */
1607 delete f
->write_req
;
1608 f
->write_req
= nullptr;
1613 if (! f
->write_req
) {
1614 /* guard--we do not support (e.g., COW-backed) partial writes */
1616 lsubdout(fs
->get_context(), rgw
, 5)
1618 << " " << object_name()
1619 << " non-0 initial write position " << off
1620 << " (mounting with -o sync required)"
1626 std::string object_name
= relative_object_name();
1628 new RGWWriteRequest(rgwlib
.get_store(),
1629 rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
1630 this, bucket_name(), object_name
);
1631 rc
= rgwlib
.get_fe()->start_req(f
->write_req
);
1633 lsubdout(fs
->get_context(), rgw
, 5)
1635 << this->object_name()
1636 << " write start failed " << off
1637 << " (" << rc
<< ")"
1639 /* zap failed write transaction */
1640 delete f
->write_req
;
1641 f
->write_req
= nullptr;
1644 if (stateless_open()) {
1645 /* start write timer */
1646 f
->write_req
->timer_id
=
1647 RGWLibFS::write_timer
.add_event(
1648 std::chrono::seconds(RGWLibFS::write_completion_interval_s
),
1649 WriteCompletion(*this));
1655 if ((static_cast<off_t
>(off
) < f
->write_req
->real_ofs
) &&
1656 ((f
->write_req
->real_ofs
- off
) <= len
)) {
1657 overlap
= f
->write_req
->real_ofs
- off
;
1658 off
= f
->write_req
->real_ofs
;
1659 buffer
= static_cast<char*>(buffer
) + overlap
;
1667 buffer::create_static(len
, static_cast<char*>(buffer
)));
1670 buffer::copy(static_cast<char*>(buffer
), len
));
1673 f
->write_req
->put_data(off
, bl
);
1674 rc
= f
->write_req
->exec_continue();
1677 size_t min_size
= off
+ len
;
1678 if (min_size
> get_size())
1680 if (stateless_open()) {
1681 /* bump write timer */
1682 RGWLibFS::write_timer
.adjust_event(
1683 f
->write_req
->timer_id
, std::chrono::seconds(10));
1686 /* continuation failed (e.g., non-contiguous write position) */
1687 lsubdout(fs
->get_context(), rgw
, 5)
1690 << " failed write at position " << off
1691 << " (fails write transaction) "
1693 /* zap failed write transaction */
1694 delete f
->write_req
;
1695 f
->write_req
= nullptr;
1699 *bytes_written
= (rc
== 0) ? (len
+ overlap
) : 0;
1701 } /* RGWFileHandle::write */
1703 int RGWFileHandle::write_finish(uint32_t flags
)
1705 unique_lock guard
{mtx
, std::defer_lock
};
1708 if (! (flags
& FLAG_LOCKED
)) {
1712 file
* f
= get
<file
>(&variant_type
);
1713 if (f
&& (f
->write_req
)) {
1714 lsubdout(fs
->get_context(), rgw
, 10)
1716 << " finishing write trans on " << object_name()
1718 rc
= rgwlib
.get_fe()->finish_req(f
->write_req
);
1720 rc
= f
->write_req
->get_ret();
1722 delete f
->write_req
;
1723 f
->write_req
= nullptr;
1727 } /* RGWFileHandle::write_finish */
1729 int RGWFileHandle::close()
1731 lock_guard
guard(mtx
);
1733 int rc
= write_finish(FLAG_LOCKED
);
1735 flags
&= ~FLAG_OPEN
;
1736 flags
&= ~FLAG_STATELESS_OPEN
;
1739 } /* RGWFileHandle::close */
1741 RGWFileHandle::file::~file()
1746 void RGWFileHandle::clear_state()
1748 directory
* d
= get
<directory
>(&variant_type
);
1751 d
->last_marker
= rgw_obj_key
{};
1755 void RGWFileHandle::advance_mtime(uint32_t flags
) {
1756 /* intended for use on directories, fast-forward mtime so as to
1757 * ensure a new, higher value for the change attribute */
1758 unique_lock
uniq(mtx
, std::defer_lock
);
1759 if (likely(! (flags
& RGWFileHandle::FLAG_LOCKED
))) {
1763 /* advance mtime only if stored mtime is older than the
1764 * configured namespace expiration */
1765 auto now
= real_clock::now();
1766 auto cmptime
= state
.mtime
;
1768 fs
->get_context()->_conf
->rgw_nfs_namespace_expire_secs
;
1769 if (cmptime
< real_clock::to_timespec(now
)) {
1770 /* sets ctime as well as mtime, to avoid masking updates should
1771 * ctime inexplicably hold a higher value */
1776 void RGWFileHandle::invalidate() {
1777 RGWLibFS
*fs
= get_fs();
1778 if (fs
->invalidate_cb
) {
1779 fs
->invalidate_cb(fs
->invalidate_arg
, get_key().fh_hk
);
1783 int RGWWriteRequest::exec_start() {
1784 struct req_state
* state
= get_state();
1786 /* Object needs a bucket from this point */
1787 state
->object
->set_bucket(state
->bucket
.get());
1789 auto compression_type
=
1790 get_store()->get_zone()->get_params().get_compression_type(
1791 state
->bucket
->get_placement_rule());
1793 /* not obviously supportable */
1794 ceph_assert(! dlo_manifest
);
1795 ceph_assert(! slo_info
);
1797 perfcounter
->inc(l_rgw_put
);
1800 if (state
->object
->empty()) {
1801 ldout(state
->cct
, 0) << __func__
<< " called on empty object" << dendl
;
1805 op_ret
= get_params(null_yield
);
1809 op_ret
= get_system_versioning_params(state
, &olh_epoch
, &version_id
);
1814 /* user-supplied MD5 check skipped (not supplied) */
1815 /* early quota check skipped--we don't have size yet */
1816 /* skipping user-supplied etag--we might have one in future, but
1817 * like data it and other attrs would arrive after open */
1819 aio
.emplace(state
->cct
->_conf
->rgw_put_obj_min_window_size
);
1821 if (state
->bucket
->versioning_enabled()) {
1822 if (!version_id
.empty()) {
1823 state
->object
->set_instance(version_id
);
1825 state
->object
->gen_rand_obj_instance_name();
1826 version_id
= state
->object
->get_instance();
1829 processor
= get_store()->get_atomic_writer(this, state
->yield
, state
->object
->clone(),
1830 state
->bucket_owner
.get_id(), *state
->obj_ctx
,
1831 &state
->dest_placement
, 0, state
->req_id
);
1833 op_ret
= processor
->prepare(state
->yield
);
1835 ldout(state
->cct
, 20) << "processor->prepare() returned ret=" << op_ret
1839 filter
= &*processor
;
1840 if (compression_type
!= "none") {
1841 plugin
= Compressor::create(state
->cct
, compression_type
);
1843 ldout(state
->cct
, 1) << "Cannot load plugin for rgw_compression_type "
1844 << compression_type
<< dendl
;
1846 compressor
.emplace(state
->cct
, plugin
, filter
);
1847 filter
= &*compressor
;
1855 int RGWWriteRequest::exec_continue()
1857 struct req_state
* state
= get_state();
1860 /* check guards (e.g., contig write) */
1862 ldout(state
->cct
, 5)
1863 << " chunks arrived in wrong order"
1864 << " (mounting with -o sync required)"
1869 op_ret
= state
->bucket
->check_quota(this, user_quota
, bucket_quota
, real_ofs
, null_yield
, true);
1870 /* max_size exceed */
1874 size_t len
= data
.length();
1878 hash
.Update((const unsigned char *)data
.c_str(), data
.length());
1879 op_ret
= filter
->process(std::move(data
), ofs
);
1883 bytes_written
+= len
;
1887 } /* exec_continue */
1889 int RGWWriteRequest::exec_finish()
1891 buffer::list bl
, aclbl
, ux_key
, ux_attrs
;
1892 map
<string
, string
>::iterator iter
;
1893 char calc_md5
[CEPH_CRYPTO_MD5_DIGESTSIZE
* 2 + 1];
1894 unsigned char m
[CEPH_CRYPTO_MD5_DIGESTSIZE
];
1895 struct req_state
* state
= get_state();
1897 size_t osize
= rgw_fh
->get_size();
1898 struct timespec octime
= rgw_fh
->get_ctime();
1899 struct timespec omtime
= rgw_fh
->get_mtime();
1900 real_time appx_t
= real_clock::now();
1902 state
->obj_size
= bytes_written
;
1903 perfcounter
->inc(l_rgw_put_b
, state
->obj_size
);
1905 // flush data in filters
1906 op_ret
= filter
->process({}, state
->obj_size
);
1911 op_ret
= state
->bucket
->check_quota(this, user_quota
, bucket_quota
, state
->obj_size
, null_yield
, true);
1912 /* max_size exceed */
1919 if (compressor
&& compressor
->is_compressed()) {
1921 RGWCompressionInfo cs_info
;
1922 cs_info
.compression_type
= plugin
->get_type_name();
1923 cs_info
.orig_size
= state
->obj_size
;
1924 cs_info
.blocks
= std::move(compressor
->get_compression_blocks());
1925 encode(cs_info
, tmp
);
1926 attrs
[RGW_ATTR_COMPRESSION
] = tmp
;
1927 ldpp_dout(this, 20) << "storing " << RGW_ATTR_COMPRESSION
1928 << " with type=" << cs_info
.compression_type
1929 << ", orig_size=" << cs_info
.orig_size
1930 << ", blocks=" << cs_info
.blocks
.size() << dendl
;
1933 buf_to_hex(m
, CEPH_CRYPTO_MD5_DIGESTSIZE
, calc_md5
);
1936 bl
.append(etag
.c_str(), etag
.size() + 1);
1937 emplace_attr(RGW_ATTR_ETAG
, std::move(bl
));
1939 policy
.encode(aclbl
);
1940 emplace_attr(RGW_ATTR_ACL
, std::move(aclbl
));
1943 rgw_fh
->set_mtime(real_clock::to_timespec(appx_t
));
1944 rgw_fh
->set_ctime(real_clock::to_timespec(appx_t
));
1945 rgw_fh
->set_size(bytes_written
);
1946 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
1948 emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
1949 emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
1951 for (iter
= state
->generic_attrs
.begin(); iter
!= state
->generic_attrs
.end();
1953 buffer::list
& attrbl
= attrs
[iter
->first
];
1954 const string
& val
= iter
->second
;
1955 attrbl
.append(val
.c_str(), val
.size() + 1);
1958 op_ret
= rgw_get_request_metadata(this, state
->cct
, state
->info
, attrs
);
1962 encode_delete_at_attr(delete_at
, attrs
);
1964 /* Add a custom metadata to expose the information whether an object
1965 * is an SLO or not. Appending the attribute must be performed AFTER
1966 * processing any input from user in order to prohibit overwriting. */
1967 if (unlikely(!! slo_info
)) {
1968 buffer::list slo_userindicator_bl
;
1970 encode("True", slo_userindicator_bl
);
1971 emplace_attr(RGW_ATTR_SLO_UINDICATOR
, std::move(slo_userindicator_bl
));
1974 op_ret
= processor
->complete(state
->obj_size
, etag
, &mtime
, real_time(), attrs
,
1975 (delete_at
? *delete_at
: real_time()),
1976 if_match
, if_nomatch
, nullptr, nullptr, nullptr,
1979 /* revert attr updates */
1980 rgw_fh
->set_mtime(omtime
);
1981 rgw_fh
->set_ctime(octime
);
1982 rgw_fh
->set_size(osize
);
1986 perfcounter
->tinc(l_rgw_put_lat
, state
->time_elapsed());
1990 } /* namespace rgw */
1995 void rgwfile_version(int *major
, int *minor
, int *extra
)
1998 *major
= LIBRGW_FILE_VER_MAJOR
;
2000 *minor
= LIBRGW_FILE_VER_MINOR
;
2002 *extra
= LIBRGW_FILE_VER_EXTRA
;
2006 attach rgw namespace
2008 int rgw_mount(librgw_t rgw
, const char *uid
, const char *acc_key
,
2009 const char *sec_key
, struct rgw_fs
**rgw_fs
,
2014 /* stash access data for "mount" */
2015 RGWLibFS
* new_fs
= new RGWLibFS(static_cast<CephContext
*>(rgw
), uid
, acc_key
,
2017 ceph_assert(new_fs
);
2019 const DoutPrefix
dp(rgwlib
.get_store()->ctx(), dout_subsys
, "rgw mount: ");
2020 rc
= new_fs
->authorize(&dp
, rgwlib
.get_store());
2026 /* register fs for shared gc */
2027 rgwlib
.get_fe()->get_process()->register_fs(new_fs
);
2029 struct rgw_fs
*fs
= new_fs
->get_fs();
2032 /* XXX we no longer assume "/" is unique, but we aren't tracking the
2040 int rgw_mount2(librgw_t rgw
, const char *uid
, const char *acc_key
,
2041 const char *sec_key
, const char *root
, struct rgw_fs
**rgw_fs
,
2046 /* if the config has no value for path/root, choose "/" */
2047 RGWLibFS
* new_fs
{nullptr};
2049 (!strcmp(root
, ""))) {
2050 /* stash access data for "mount" */
2051 new_fs
= new RGWLibFS(
2052 static_cast<CephContext
*>(rgw
), uid
, acc_key
, sec_key
, "/");
2055 /* stash access data for "mount" */
2056 new_fs
= new RGWLibFS(
2057 static_cast<CephContext
*>(rgw
), uid
, acc_key
, sec_key
, root
);
2060 ceph_assert(new_fs
); /* should we be using ceph_assert? */
2062 const DoutPrefix
dp(rgwlib
.get_store()->ctx(), dout_subsys
, "rgw mount2: ");
2063 rc
= new_fs
->authorize(&dp
, rgwlib
.get_store());
2069 /* register fs for shared gc */
2070 rgwlib
.get_fe()->get_process()->register_fs(new_fs
);
2072 struct rgw_fs
*fs
= new_fs
->get_fs();
2075 /* XXX we no longer assume "/" is unique, but we aren't tracking the
2084 register invalidate callbacks
2086 int rgw_register_invalidate(struct rgw_fs
*rgw_fs
, rgw_fh_callback_t cb
,
2087 void *arg
, uint32_t flags
)
2090 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2091 return fs
->register_invalidate(cb
, arg
, flags
);
2095 detach rgw namespace
2097 int rgw_umount(struct rgw_fs
*rgw_fs
, uint32_t flags
)
2099 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2105 get filesystem attributes
2107 int rgw_statfs(struct rgw_fs
*rgw_fs
,
2108 struct rgw_file_handle
*parent_fh
,
2109 struct rgw_statvfs
*vfs_st
, uint32_t flags
)
2111 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2112 struct rados_cluster_stat_t stats
;
2114 RGWGetClusterStatReq
req(fs
->get_context(),
2115 rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
2117 int rc
= rgwlib
.get_fe()->execute_req(&req
);
2119 lderr(fs
->get_context()) << "ERROR: getting total cluster usage"
2120 << cpp_strerror(-rc
) << dendl
;
2124 //Set block size to 1M.
2125 constexpr uint32_t CEPH_BLOCK_SHIFT
= 20;
2126 vfs_st
->f_bsize
= 1 << CEPH_BLOCK_SHIFT
;
2127 vfs_st
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
2128 vfs_st
->f_blocks
= stats
.kb
>> (CEPH_BLOCK_SHIFT
- 10);
2129 vfs_st
->f_bfree
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
2130 vfs_st
->f_bavail
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
2131 vfs_st
->f_files
= stats
.num_objects
;
2132 vfs_st
->f_ffree
= -1;
2133 vfs_st
->f_fsid
[0] = fs
->get_fsid();
2134 vfs_st
->f_fsid
[1] = fs
->get_fsid();
2136 vfs_st
->f_namemax
= 4096;
2141 generic create -- create an empty regular file
2143 int rgw_create(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
2144 const char *name
, struct stat
*st
, uint32_t mask
,
2145 struct rgw_file_handle
**fh
, uint32_t posix_flags
,
2150 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2151 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2154 (parent
->is_root()) ||
2155 (parent
->is_file())) {
2160 MkObjResult fhr
= fs
->create(parent
, name
, st
, mask
, flags
);
2161 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
2164 *fh
= nfh
->get_fh();
2170 create a symbolic link
2172 int rgw_symlink(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
2173 const char *name
, const char *link_path
, struct stat
*st
, uint32_t mask
,
2174 struct rgw_file_handle
**fh
, uint32_t posix_flags
,
2179 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2180 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2183 (parent
->is_root()) ||
2184 (parent
->is_file())) {
2189 MkObjResult fhr
= fs
->symlink(parent
, name
, link_path
, st
, mask
, flags
);
2190 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
2193 *fh
= nfh
->get_fh();
2199 create a new directory
2201 int rgw_mkdir(struct rgw_fs
*rgw_fs
,
2202 struct rgw_file_handle
*parent_fh
,
2203 const char *name
, struct stat
*st
, uint32_t mask
,
2204 struct rgw_file_handle
**fh
, uint32_t flags
)
2208 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2209 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2216 MkObjResult fhr
= fs
->mkdir(parent
, name
, st
, mask
, flags
);
2217 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
2220 *fh
= nfh
->get_fh();
2228 int rgw_rename(struct rgw_fs
*rgw_fs
,
2229 struct rgw_file_handle
*src
, const char* src_name
,
2230 struct rgw_file_handle
*dst
, const char* dst_name
,
2233 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2235 RGWFileHandle
* src_fh
= get_rgwfh(src
);
2236 RGWFileHandle
* dst_fh
= get_rgwfh(dst
);
2238 return fs
->rename(src_fh
, dst_fh
, src_name
, dst_name
);
2242 remove file or directory
2244 int rgw_unlink(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
2245 const char *name
, uint32_t flags
)
2247 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2248 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2250 return fs
->unlink(parent
, name
);
2254 lookup object by name (POSIX style)
2256 int rgw_lookup(struct rgw_fs
*rgw_fs
,
2257 struct rgw_file_handle
*parent_fh
, const char* path
,
2258 struct rgw_file_handle
**fh
,
2259 struct stat
*st
, uint32_t mask
, uint32_t flags
)
2261 //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2262 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2264 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2266 (! parent
->is_dir())) {
2271 RGWFileHandle
* rgw_fh
;
2274 if (parent
->is_root()) {
2275 /* special: parent lookup--note lack of ref()! */
2276 if (unlikely((strcmp(path
, "..") == 0) ||
2277 (strcmp(path
, "/") == 0))) {
2280 RGWLibFS::BucketStats bstat
;
2281 fhr
= fs
->stat_bucket(parent
, path
, bstat
, RGWFileHandle::FLAG_NONE
);
2282 rgw_fh
= get
<0>(fhr
);
2287 /* special: after readdir--note extra ref()! */
2288 if (unlikely((strcmp(path
, "..") == 0))) {
2290 lsubdout(fs
->get_context(), rgw
, 17)
2291 << __func__
<< " BANG"<< *rgw_fh
2295 enum rgw_fh_type fh_type
= fh_type_of(flags
);
2297 uint32_t sl_flags
= (flags
& RGW_LOOKUP_FLAG_RCB
)
2298 ? RGWFileHandle::FLAG_IN_CB
2299 : RGWFileHandle::FLAG_EXACT_MATCH
;
2301 bool fast_attrs
= fs
->get_context()->_conf
->rgw_nfs_s3_fast_attrs
;
2303 if ((flags
& RGW_LOOKUP_FLAG_RCB
) && fast_attrs
) {
2304 /* FAKE STAT--this should mean, interpolate special
2305 * owner, group, and perms masks */
2306 fhr
= fs
->fake_leaf(parent
, path
, fh_type
, st
, mask
, sl_flags
);
2308 if ((fh_type
== RGW_FS_TYPE_DIRECTORY
) && fast_attrs
) {
2309 /* trust cached dir, if present */
2310 fhr
= fs
->lookup_fh(parent
, path
, RGWFileHandle::FLAG_DIRECTORY
);
2312 rgw_fh
= get
<0>(fhr
);
2316 fhr
= fs
->stat_leaf(parent
, path
, fh_type
, sl_flags
);
2318 if (! get
<0>(fhr
)) {
2319 if (! (flags
& RGW_LOOKUP_FLAG_CREATE
))
2322 fhr
= fs
->lookup_fh(parent
, path
, RGWFileHandle::FLAG_CREATE
);
2324 rgw_fh
= get
<0>(fhr
);
2329 struct rgw_file_handle
*rfh
= rgw_fh
->get_fh();
2336 lookup object by handle (NFS style)
2338 int rgw_lookup_handle(struct rgw_fs
*rgw_fs
, struct rgw_fh_hk
*fh_hk
,
2339 struct rgw_file_handle
**fh
, uint32_t flags
)
2341 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2343 RGWFileHandle
* rgw_fh
= fs
->lookup_handle(*fh_hk
);
2349 struct rgw_file_handle
*rfh
= rgw_fh
->get_fh();
2356 * release file handle
2358 int rgw_fh_rele(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2361 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2362 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2364 lsubdout(fs
->get_context(), rgw
, 17)
2365 << __func__
<< " " << *rgw_fh
2373 get unix attributes for object
2375 int rgw_getattr(struct rgw_fs
*rgw_fs
,
2376 struct rgw_file_handle
*fh
, struct stat
*st
, uint32_t flags
)
2378 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2379 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2381 return fs
->getattr(rgw_fh
, st
);
2385 set unix attributes for object
2387 int rgw_setattr(struct rgw_fs
*rgw_fs
,
2388 struct rgw_file_handle
*fh
, struct stat
*st
,
2389 uint32_t mask
, uint32_t flags
)
2391 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2392 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2394 return fs
->setattr(rgw_fh
, st
, mask
, flags
);
2400 int rgw_truncate(struct rgw_fs
*rgw_fs
,
2401 struct rgw_file_handle
*fh
, uint64_t size
, uint32_t flags
)
2409 int rgw_open(struct rgw_fs
*rgw_fs
,
2410 struct rgw_file_handle
*fh
, uint32_t posix_flags
, uint32_t flags
)
2412 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2415 * need to track specific opens--at least read opens and
2416 * a write open; we need to know when a write open is returned,
2417 * that closes a write transaction
2419 * for now, we will support single-open only, it's preferable to
2420 * anything we can otherwise do without access to the NFS state
2422 if (! rgw_fh
->is_file())
2425 return rgw_fh
->open(flags
);
2431 int rgw_close(struct rgw_fs
*rgw_fs
,
2432 struct rgw_file_handle
*fh
, uint32_t flags
)
2434 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2435 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2436 int rc
= rgw_fh
->close(/* XXX */);
2438 if (flags
& RGW_CLOSE_FLAG_RELE
)
2444 int rgw_readdir(struct rgw_fs
*rgw_fs
,
2445 struct rgw_file_handle
*parent_fh
, uint64_t *offset
,
2446 rgw_readdir_cb rcb
, void *cb_arg
, bool *eof
,
2449 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2455 lsubdout(parent
->get_fs()->get_context(), rgw
, 15)
2457 << " offset=" << *offset
2460 if ((*offset
== 0) &&
2461 (flags
& RGW_READDIR_FLAG_DOTDOT
)) {
2462 /* send '.' and '..' with their NFS-defined offsets */
2463 rcb(".", cb_arg
, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2464 rcb("..", cb_arg
, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2467 int rc
= parent
->readdir(rcb
, cb_arg
, offset
, eof
, flags
);
2471 /* enumeration continuing from name */
2472 int rgw_readdir2(struct rgw_fs
*rgw_fs
,
2473 struct rgw_file_handle
*parent_fh
, const char *name
,
2474 rgw_readdir_cb rcb
, void *cb_arg
, bool *eof
,
2477 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2483 lsubdout(parent
->get_fs()->get_context(), rgw
, 15)
2485 << " offset=" << ((name
) ? name
: "(nil)")
2489 (flags
& RGW_READDIR_FLAG_DOTDOT
)) {
2490 /* send '.' and '..' with their NFS-defined offsets */
2491 rcb(".", cb_arg
, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2492 rcb("..", cb_arg
, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2495 int rc
= parent
->readdir(rcb
, cb_arg
, name
, eof
, flags
);
2497 } /* rgw_readdir2 */
2499 /* project offset of dirent name */
2500 int rgw_dirent_offset(struct rgw_fs
*rgw_fs
,
2501 struct rgw_file_handle
*parent_fh
,
2502 const char *name
, int64_t *offset
,
2505 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2510 std::string sname
{name
};
2511 int rc
= parent
->offset_of(sname
, offset
, flags
);
2518 int rgw_read(struct rgw_fs
*rgw_fs
,
2519 struct rgw_file_handle
*fh
, uint64_t offset
,
2520 size_t length
, size_t *bytes_read
, void *buffer
,
2523 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2524 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2526 return fs
->read(rgw_fh
, offset
, length
, bytes_read
, buffer
, flags
);
2532 int rgw_readlink(struct rgw_fs
*rgw_fs
,
2533 struct rgw_file_handle
*fh
, uint64_t offset
,
2534 size_t length
, size_t *bytes_read
, void *buffer
,
2537 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2538 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2540 return fs
->readlink(rgw_fh
, offset
, length
, bytes_read
, buffer
, flags
);
2546 int rgw_write(struct rgw_fs
*rgw_fs
,
2547 struct rgw_file_handle
*fh
, uint64_t offset
,
2548 size_t length
, size_t *bytes_written
, void *buffer
,
2551 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2556 if (! rgw_fh
->is_file())
2559 if (! rgw_fh
->is_open()) {
2560 if (flags
& RGW_OPEN_FLAG_V3
) {
2561 rc
= rgw_fh
->open(flags
);
2568 rc
= rgw_fh
->write(offset
, length
, bytes_written
, buffer
);
2574 read data from file (vector)
2579 struct rgw_vio
* vio
;
2582 RGWReadV(buffer::list
& _bl
, rgw_vio
* _vio
) : vio(_vio
) {
2583 bl
= std::move(_bl
);
2586 struct rgw_vio
* get_vio() { return vio
; }
2588 const auto& buffers() { return bl
.buffers(); }
2590 unsigned /* XXX */ length() { return bl
.length(); }
2594 void rgw_readv_rele(struct rgw_uio
*uio
, uint32_t flags
)
2596 RGWReadV
* rdv
= static_cast<RGWReadV
*>(uio
->uio_p1
);
2598 ::operator delete(rdv
);
2601 int rgw_readv(struct rgw_fs
*rgw_fs
,
2602 struct rgw_file_handle
*fh
, rgw_uio
*uio
, uint32_t flags
)
2605 CephContext
* cct
= static_cast<CephContext
*>(rgw_fs
->rgw
);
2606 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2607 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2609 if (! rgw_fh
->is_file())
2615 RGWGetObjRequest
req(cct
, fs
->get_user(), rgw_fh
->bucket_name(),
2616 rgw_fh
->object_name(), uio
->uio_offset
, uio
->uio_resid
,
2618 req
.do_hexdump
= false;
2620 rc
= rgwlib
.get_fe()->execute_req(&req
);
2623 RGWReadV
* rdv
= static_cast<RGWReadV
*>(
2624 ::operator new(sizeof(RGWReadV
) +
2625 (bl
.buffers().size() * sizeof(struct rgw_vio
))));
2628 RGWReadV(bl
, reinterpret_cast<rgw_vio
*>(rdv
+sizeof(RGWReadV
)));
2631 uio
->uio_cnt
= rdv
->buffers().size();
2632 uio
->uio_resid
= rdv
->length();
2633 uio
->uio_vio
= rdv
->get_vio();
2634 uio
->uio_rele
= rgw_readv_rele
;
2637 auto& buffers
= rdv
->buffers();
2638 for (auto& bp
: buffers
) {
2639 rgw_vio
*vio
= &(uio
->uio_vio
[ix
]);
2640 vio
->vio_base
= const_cast<char*>(bp
.c_str());
2641 vio
->vio_len
= bp
.length();
2642 vio
->vio_u1
= nullptr;
2643 vio
->vio_p1
= nullptr;
2655 write data to file (vector)
2657 int rgw_writev(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2658 rgw_uio
*uio
, uint32_t flags
)
2661 // not supported - rest of function is ignored
2664 CephContext
* cct
= static_cast<CephContext
*>(rgw_fs
->rgw
);
2665 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2666 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2668 if (! rgw_fh
->is_file())
2672 for (unsigned int ix
= 0; ix
< uio
->uio_cnt
; ++ix
) {
2673 rgw_vio
*vio
= &(uio
->uio_vio
[ix
]);
2675 buffer::create_static(vio
->vio_len
,
2676 static_cast<char*>(vio
->vio_base
)));
2679 std::string oname
= rgw_fh
->relative_object_name();
2680 RGWPutObjRequest
req(cct
, rgwlib
.get_store()->get_user(fs
->get_user()->user_id
),
2681 rgw_fh
->bucket_name(), oname
, bl
);
2683 int rc
= rgwlib
.get_fe()->execute_req(&req
);
2685 /* XXX update size (in request) */
2693 int rgw_fsync(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*handle
,
2699 int rgw_commit(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2700 uint64_t offset
, uint64_t length
, uint32_t flags
)
2702 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2704 return rgw_fh
->commit(offset
, length
, RGWFileHandle::FLAG_NONE
);
2711 int rgw_getxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2712 rgw_xattrlist
*attrs
, rgw_getxattr_cb cb
, void *cb_arg
,
2715 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2716 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2718 return fs
->getxattrs(rgw_fh
, attrs
, cb
, cb_arg
, flags
);
2721 int rgw_lsxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2722 rgw_xattrstr
*filter_prefix
/* ignored */,
2723 rgw_getxattr_cb cb
, void *cb_arg
, uint32_t flags
)
2725 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2726 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2728 return fs
->lsxattrs(rgw_fh
, filter_prefix
, cb
, cb_arg
, flags
);
2731 int rgw_setxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2732 rgw_xattrlist
*attrs
, uint32_t flags
)
2734 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2735 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2737 return fs
->setxattrs(rgw_fh
, attrs
, flags
);
2740 int rgw_rmxattrs(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2741 rgw_xattrlist
*attrs
, uint32_t flags
)
2743 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2744 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2746 return fs
->rmxattrs(rgw_fh
, attrs
, flags
);