1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "include/compat.h"
5 #include "include/rados/rgw_file.h"
11 #include "rgw_rados.h"
12 #include "rgw_resolve.h"
16 #include "rgw_acl_s3.h"
17 #include "rgw_frontend.h"
18 #include "rgw_request.h"
19 #include "rgw_process.h"
20 #include "rgw_rest_user.h"
21 #include "rgw_rest_s3.h"
22 #include "rgw_os_lib.h"
23 #include "rgw_auth_s3.h"
25 #include "rgw_bucket.h"
28 #include "rgw_lib_frontend.h"
29 #include "rgw_perf_counters.h"
30 #include "common/errno.h"
34 #define dout_subsys ceph_subsys_rgw
42 const string
RGWFileHandle::root_name
= "/";
44 std::atomic
<uint32_t> RGWLibFS::fs_inst_counter
;
46 uint32_t RGWLibFS::write_completion_interval_s
= 10;
48 ceph::timer
<ceph::mono_clock
> RGWLibFS::write_timer
{
49 ceph::construct_suspended
};
51 inline int valid_fs_bucket_name(const string
& name
) {
52 int rc
= valid_s3_bucket_name(name
, false /* relaxed */);
54 if (name
.size() > 255)
61 inline int valid_fs_object_name(const string
& name
) {
62 int rc
= valid_s3_object_name(name
);
64 if (name
.size() > 1024)
71 LookupFHResult
RGWLibFS::stat_bucket(RGWFileHandle
* parent
, const char *path
,
72 RGWLibFS::BucketStats
& bs
,
75 LookupFHResult fhr
{nullptr, 0};
76 std::string bucket_name
{path
};
77 RGWStatBucketRequest
req(cct
, get_user(), bucket_name
, bs
);
79 int rc
= rgwlib
.get_fe()->execute_req(&req
);
81 (req
.get_ret() == 0) &&
83 fhr
= lookup_fh(parent
, path
,
84 (flags
& RGWFileHandle::FLAG_LOCKED
)|
85 RGWFileHandle::FLAG_CREATE
|
86 RGWFileHandle::FLAG_BUCKET
);
88 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
89 if (! (flags
& RGWFileHandle::FLAG_LOCKED
)) {
92 rgw_fh
->set_times(req
.get_ctime());
93 /* restore attributes */
94 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
95 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
96 if (ux_key
&& ux_attrs
) {
97 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
98 if (get
<0>(dar
) || get
<1>(dar
)) {
102 if (! (flags
& RGWFileHandle::FLAG_LOCKED
)) {
103 rgw_fh
->mtx
.unlock();
110 LookupFHResult
RGWLibFS::fake_leaf(RGWFileHandle
* parent
,
112 enum rgw_fh_type type
,
113 struct stat
*st
, uint32_t st_mask
,
116 /* synthesize a minimal handle from parent, path, type, and st */
119 flags
|= RGWFileHandle::FLAG_CREATE
;
122 case RGW_FS_TYPE_DIRECTORY
:
123 flags
|= RGWFileHandle::FLAG_DIRECTORY
;
130 LookupFHResult fhr
= lookup_fh(parent
, path
, flags
);
132 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
134 lock_guard
guard(rgw_fh
->mtx
);
135 if (st_mask
& RGW_SETATTR_SIZE
) {
136 rgw_fh
->set_size(st
->st_size
);
138 if (st_mask
& RGW_SETATTR_MTIME
) {
139 rgw_fh
->set_times(st
->st_mtim
);
144 } /* RGWLibFS::fake_leaf */
146 LookupFHResult
RGWLibFS::stat_leaf(RGWFileHandle
* parent
,
148 enum rgw_fh_type type
,
151 /* find either-of <object_name>, <object_name/>, only one of
152 * which should exist; atomicity? */
155 LookupFHResult fhr
{nullptr, 0};
157 /* XXX the need for two round-trip operations to identify file or
158 * directory leaf objects is unecessary--the current proposed
159 * mechanism to avoid this is to store leaf object names with an
160 * object locator w/o trailing slash */
162 std::string obj_path
= parent
->format_child_name(path
, false);
164 for (auto ix
: { 0, 1, 2 }) {
169 if (type
== RGW_FS_TYPE_DIRECTORY
)
172 RGWStatObjRequest
req(cct
, get_user(),
173 parent
->bucket_name(), obj_path
,
174 RGWStatObjRequest::FLAG_NONE
);
175 int rc
= rgwlib
.get_fe()->execute_req(&req
);
177 (req
.get_ret() == 0)) {
178 fhr
= lookup_fh(parent
, path
, RGWFileHandle::FLAG_CREATE
);
180 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
181 lock_guard
guard(rgw_fh
->mtx
);
182 rgw_fh
->set_size(req
.get_size());
183 rgw_fh
->set_times(req
.get_mtime());
184 /* restore attributes */
185 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
186 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
187 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
188 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
189 if (ux_key
&& ux_attrs
) {
190 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
191 if (get
<0>(dar
) || get
<1>(dar
)) {
204 if (type
== RGW_FS_TYPE_FILE
)
208 RGWStatObjRequest
req(cct
, get_user(),
209 parent
->bucket_name(), obj_path
,
210 RGWStatObjRequest::FLAG_NONE
);
211 int rc
= rgwlib
.get_fe()->execute_req(&req
);
213 (req
.get_ret() == 0)) {
214 fhr
= lookup_fh(parent
, path
, RGWFileHandle::FLAG_DIRECTORY
);
216 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
217 lock_guard
guard(rgw_fh
->mtx
);
218 rgw_fh
->set_size(req
.get_size());
219 rgw_fh
->set_times(req
.get_mtime());
220 /* restore attributes */
221 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
222 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
223 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
224 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
225 if (ux_key
&& ux_attrs
) {
226 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
227 if (get
<0>(dar
) || get
<1>(dar
)) {
238 std::string object_name
{path
};
239 RGWStatLeafRequest
req(cct
, get_user(), parent
, object_name
);
240 int rc
= rgwlib
.get_fe()->execute_req(&req
);
242 (req
.get_ret() == 0)) {
244 /* we need rgw object's key name equal to file name, if
246 if ((flags
& RGWFileHandle::FLAG_EXACT_MATCH
) &&
247 !req
.exact_matched
) {
248 lsubdout(get_context(), rgw
, 15)
250 << ": stat leaf not exact match file name = "
254 fhr
= lookup_fh(parent
, path
,
255 RGWFileHandle::FLAG_CREATE
|
257 RGWFileHandle::FLAG_DIRECTORY
:
258 RGWFileHandle::FLAG_NONE
));
259 /* XXX we don't have an object--in general, there need not
260 * be one (just a path segment in some other object). In
261 * actual leaf an object exists, but we'd need another round
262 * trip to get attrs */
264 /* for now use the parent object's mtime */
265 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
266 lock_guard
guard(rgw_fh
->mtx
);
267 rgw_fh
->set_mtime(parent
->get_mtime());
280 } /* RGWLibFS::stat_leaf */
282 int RGWLibFS::read(RGWFileHandle
* rgw_fh
, uint64_t offset
, size_t length
,
283 size_t* bytes_read
, void* buffer
, uint32_t flags
)
285 if (! rgw_fh
->is_file())
288 if (rgw_fh
->deleted())
291 RGWReadRequest
req(get_context(), get_user(), rgw_fh
, offset
, length
,
294 int rc
= rgwlib
.get_fe()->execute_req(&req
);
296 (req
.get_ret() == 0)) {
297 lock_guard
guard(rgw_fh
->mtx
);
298 rgw_fh
->set_atime(real_clock::to_timespec(real_clock::now()));
299 *bytes_read
= req
.nread
;
305 int RGWLibFS::readlink(RGWFileHandle
* rgw_fh
, uint64_t offset
, size_t length
,
306 size_t* bytes_read
, void* buffer
, uint32_t flags
)
308 if (! rgw_fh
->is_link())
311 if (rgw_fh
->deleted())
314 RGWReadRequest
req(get_context(), get_user(), rgw_fh
, offset
, length
,
317 int rc
= rgwlib
.get_fe()->execute_req(&req
);
319 (req
.get_ret() == 0)) {
320 lock_guard(rgw_fh
->mtx
);
321 rgw_fh
->set_atime(real_clock::to_timespec(real_clock::now()));
322 *bytes_read
= req
.nread
;
328 int RGWLibFS::unlink(RGWFileHandle
* rgw_fh
, const char* name
, uint32_t flags
)
332 RGWFileHandle
* parent
= nullptr;
333 RGWFileHandle
* bkt_fh
= nullptr;
335 if (unlikely(flags
& RGWFileHandle::FLAG_UNLINK_THIS
)) {
337 parent
= rgw_fh
->get_parent();
341 LookupFHResult fhr
= lookup_fh(parent
, name
, RGWFileHandle::FLAG_LOCK
);
342 rgw_fh
= get
<0>(fhr
);
346 if (parent
->is_root()) {
347 /* a bucket may have an object storing Unix attributes, check
348 * for and delete it */
350 fhr
= stat_bucket(parent
, name
, bs
, (rgw_fh
) ?
351 RGWFileHandle::FLAG_LOCKED
:
352 RGWFileHandle::FLAG_NONE
);
353 bkt_fh
= get
<0>(fhr
);
354 if (unlikely(! bkt_fh
)) {
355 /* implies !rgw_fh, so also !LOCKED */
359 if (bs
.num_entries
> 1) {
360 unref(bkt_fh
); /* return stat_bucket ref */
361 if (likely(!! rgw_fh
)) { /* return lock and ref from
362 * lookup_fh (or caller in the
364 * RGWFileHandle::FLAG_UNLINK_THIS) */
365 rgw_fh
->mtx
.unlock();
370 /* delete object w/key "<bucket>/" (uxattrs), if any */
372 RGWDeleteObjRequest
req(cct
, get_user(), bkt_fh
->bucket_name(), oname
);
373 rc
= rgwlib
.get_fe()->execute_req(&req
);
374 /* don't care if ENOENT */
379 RGWDeleteBucketRequest
req(cct
, get_user(), bname
);
380 rc
= rgwlib
.get_fe()->execute_req(&req
);
389 /* XXX for now, peform a hard lookup to deduce the type of
390 * object to be deleted ("foo" vs. "foo/")--also, ensures
391 * atomicity at this endpoint */
392 struct rgw_file_handle
*fh
;
393 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &fh
,
394 nullptr /* st */, 0 /* mask */,
395 RGW_LOOKUP_FLAG_NONE
);
400 rgw_fh
= get_rgwfh(fh
);
401 rgw_fh
->mtx
.lock(); /* LOCKED */
404 std::string oname
= rgw_fh
->relative_object_name();
405 if (rgw_fh
->is_dir()) {
406 /* for the duration of our cache timer, trust positive
408 if (rgw_fh
->has_children()) {
409 rgw_fh
->mtx
.unlock();
415 RGWDeleteObjRequest
req(cct
, get_user(), parent
->bucket_name(),
417 rc
= rgwlib
.get_fe()->execute_req(&req
);
423 /* ENOENT when raced with other s3 gateway */
424 if (! rc
|| rc
== -ENOENT
) {
425 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
426 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
427 RGWFileHandle::FHCache::FLAG_LOCK
);
431 real_time t
= real_clock::now();
432 parent
->set_mtime(real_clock::to_timespec(t
));
433 parent
->set_ctime(real_clock::to_timespec(t
));
436 rgw_fh
->mtx
.unlock();
440 } /* RGWLibFS::unlink */
442 int RGWLibFS::rename(RGWFileHandle
* src_fh
, RGWFileHandle
* dst_fh
,
443 const char *_src_name
, const char *_dst_name
)
446 /* XXX initial implementation: try-copy, and delete if copy
452 std::string src_name
{_src_name
};
453 std::string dst_name
{_dst_name
};
456 LookupFHResult fhr
= lookup_fh(src_fh
, _src_name
, RGWFileHandle::FLAG_LOCK
);
457 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
459 /* should not happen */
461 ldout(get_context(), 0) << __func__
462 << " BUG no such src renaming path="
468 /* forbid renaming of directories (unreasonable at scale) */
469 if (rgw_fh
->is_dir()) {
470 ldout(get_context(), 12) << __func__
471 << " rejecting attempt to rename directory path="
472 << rgw_fh
->full_object_name()
478 /* forbid renaming open files (violates intent, for now) */
479 if (rgw_fh
->is_open()) {
480 ldout(get_context(), 12) << __func__
481 << " rejecting attempt to rename open file path="
482 << rgw_fh
->full_object_name()
488 t
= real_clock::now();
490 for (int ix
: {0, 1}) {
494 RGWCopyObjRequest
req(cct
, get_user(), src_fh
, dst_fh
, src_name
,
496 int rc
= rgwlib
.get_fe()->execute_req(&req
);
498 ((rc
= req
.get_ret()) != 0)) {
499 ldout(get_context(), 1)
501 << " rename step 0 failed src="
502 << src_fh
->full_object_name() << " " << src_name
503 << " dst=" << dst_fh
->full_object_name()
509 ldout(get_context(), 12)
511 << " rename step 0 success src="
512 << src_fh
->full_object_name() << " " << src_name
513 << " dst=" << dst_fh
->full_object_name()
517 /* update dst change id */
518 dst_fh
->set_times(t
);
523 rc
= this->unlink(rgw_fh
/* LOCKED */, _src_name
,
524 RGWFileHandle::FLAG_UNLINK_THIS
);
527 ldout(get_context(), 12)
529 << " rename step 1 success src="
530 << src_fh
->full_object_name() << " " << src_name
531 << " dst=" << dst_fh
->full_object_name()
535 /* update src change id */
536 src_fh
->set_times(t
);
538 ldout(get_context(), 1)
540 << " rename step 1 failed src="
541 << src_fh
->full_object_name() << " " << src_name
542 << " dst=" << dst_fh
->full_object_name()
554 rgw_fh
->mtx
.unlock(); /* !LOCKED */
555 unref(rgw_fh
); /* -ref */
559 } /* RGWLibFS::rename */
561 MkObjResult
RGWLibFS::mkdir(RGWFileHandle
* parent
, const char *name
,
562 struct stat
*st
, uint32_t mask
, uint32_t flags
)
565 rgw_file_handle
*lfh
;
567 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
568 nullptr /* st */, 0 /* mask */,
569 RGW_LOOKUP_FLAG_NONE
);
572 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
573 return MkObjResult
{nullptr, -EEXIST
};
576 MkObjResult mkr
{nullptr, -EINVAL
};
578 RGWFileHandle
* rgw_fh
= nullptr;
579 buffer::list ux_key
, ux_attrs
;
581 fhr
= lookup_fh(parent
, name
,
582 RGWFileHandle::FLAG_CREATE
|
583 RGWFileHandle::FLAG_DIRECTORY
|
584 RGWFileHandle::FLAG_LOCK
);
585 rgw_fh
= get
<0>(fhr
);
587 rgw_fh
->create_stat(st
, mask
);
588 rgw_fh
->set_times(real_clock::now());
590 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
592 rgw_fh
->stat(st
, RGWFileHandle::FLAG_LOCKED
);
593 get
<0>(mkr
) = rgw_fh
;
599 if (parent
->is_root()) {
602 /* enforce S3 name restrictions */
603 rc
= valid_fs_bucket_name(bname
);
605 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
606 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
607 RGWFileHandle::FHCache::FLAG_LOCK
);
608 rgw_fh
->mtx
.unlock();
610 get
<0>(mkr
) = nullptr;
615 RGWCreateBucketRequest
req(get_context(), get_user(), bname
);
618 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
619 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
621 rc
= rgwlib
.get_fe()->execute_req(&req
);
624 /* create an object representing the directory */
626 string dir_name
= parent
->format_child_name(name
, true);
628 /* need valid S3 name (characters, length <= 1024, etc) */
629 rc
= valid_fs_object_name(dir_name
);
631 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
632 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
633 RGWFileHandle::FHCache::FLAG_LOCK
);
634 rgw_fh
->mtx
.unlock();
636 get
<0>(mkr
) = nullptr;
641 RGWPutObjRequest
req(get_context(), get_user(), parent
->bucket_name(),
645 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
646 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
648 rc
= rgwlib
.get_fe()->execute_req(&req
);
655 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
656 rgw_fh
->mtx
.unlock(); /* !LOCKED */
658 get
<0>(mkr
) = nullptr;
663 real_time t
= real_clock::now();
664 parent
->set_mtime(real_clock::to_timespec(t
));
665 parent
->set_ctime(real_clock::to_timespec(t
));
666 rgw_fh
->mtx
.unlock(); /* !LOCKED */
672 } /* RGWLibFS::mkdir */
674 MkObjResult
RGWLibFS::create(RGWFileHandle
* parent
, const char *name
,
675 struct stat
*st
, uint32_t mask
, uint32_t flags
)
681 rgw_file_handle
*lfh
;
682 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
683 nullptr /* st */, 0 /* mask */,
684 RGW_LOOKUP_FLAG_NONE
);
687 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
688 return MkObjResult
{nullptr, -EEXIST
};
691 /* expand and check name */
692 std::string obj_name
= parent
->format_child_name(name
, false);
693 rc
= valid_fs_object_name(obj_name
);
695 return MkObjResult
{nullptr, rc
};
700 RGWPutObjRequest
req(cct
, get_user(), parent
->bucket_name(), obj_name
, bl
);
701 MkObjResult mkr
{nullptr, -EINVAL
};
703 rc
= rgwlib
.get_fe()->execute_req(&req
);
709 LookupFHResult fhr
= lookup_fh(parent
, name
, RGWFileHandle::FLAG_CREATE
|
710 RGWFileHandle::FLAG_LOCK
);
711 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
713 if (get
<1>(fhr
) & RGWFileHandle::FLAG_CREATE
) {
714 /* fill in stat data */
715 real_time t
= real_clock::now();
716 rgw_fh
->create_stat(st
, mask
);
717 rgw_fh
->set_times(t
);
719 parent
->set_mtime(real_clock::to_timespec(t
));
720 parent
->set_ctime(real_clock::to_timespec(t
));
723 (void) rgw_fh
->stat(st
, RGWFileHandle::FLAG_LOCKED
);
725 rgw_fh
->set_etag(*(req
.get_attr(RGW_ATTR_ETAG
)));
726 rgw_fh
->set_acls(*(req
.get_attr(RGW_ATTR_ACL
)));
728 get
<0>(mkr
) = rgw_fh
;
729 rgw_fh
->mtx
.unlock();
736 /* case like : quota exceed will be considered as fail too*/
741 } /* RGWLibFS::create */
743 MkObjResult
RGWLibFS::symlink(RGWFileHandle
* parent
, const char *name
,
744 const char* link_path
, struct stat
*st
, uint32_t mask
, uint32_t flags
)
750 rgw_file_handle
*lfh
;
751 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
752 nullptr /* st */, 0 /* mask */,
753 RGW_LOOKUP_FLAG_NONE
);
756 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
757 return MkObjResult
{nullptr, -EEXIST
};
760 MkObjResult mkr
{nullptr, -EINVAL
};
762 RGWFileHandle
* rgw_fh
= nullptr;
763 buffer::list ux_key
, ux_attrs
;
765 fhr
= lookup_fh(parent
, name
,
766 RGWFileHandle::FLAG_CREATE
|
767 RGWFileHandle::FLAG_SYMBOLIC_LINK
|
768 RGWFileHandle::FLAG_LOCK
);
769 rgw_fh
= get
<0>(fhr
);
771 rgw_fh
->create_stat(st
, mask
);
772 rgw_fh
->set_times(real_clock::now());
774 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
777 get
<0>(mkr
) = rgw_fh
;
783 /* need valid S3 name (characters, length <= 1024, etc) */
784 rc
= valid_fs_object_name(name
);
786 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
787 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
788 RGWFileHandle::FHCache::FLAG_LOCK
);
789 rgw_fh
->mtx
.unlock();
791 get
<0>(mkr
) = nullptr;
796 string obj_name
= std::string(name
);
797 /* create an object representing the directory */
803 buffer::create_static(len
, static_cast<char*>(buffer
)));
807 buffer::copy(link_path
, strlen(link_path
)));
810 RGWPutObjRequest
req(get_context(), get_user(), parent
->bucket_name(),
814 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
815 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
817 rc
= rgwlib
.get_fe()->execute_req(&req
);
822 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
823 rgw_fh
->mtx
.unlock(); /* !LOCKED */
825 get
<0>(mkr
) = nullptr;
830 real_time t
= real_clock::now();
831 parent
->set_mtime(real_clock::to_timespec(t
));
832 parent
->set_ctime(real_clock::to_timespec(t
));
833 rgw_fh
->mtx
.unlock(); /* !LOCKED */
839 } /* RGWLibFS::symlink */
841 int RGWLibFS::getattr(RGWFileHandle
* rgw_fh
, struct stat
* st
)
843 switch(rgw_fh
->fh
.fh_type
) {
844 case RGW_FS_TYPE_FILE
:
846 if (rgw_fh
->deleted())
853 /* if rgw_fh is a directory, mtime will be advanced */
854 return rgw_fh
->stat(st
);
855 } /* RGWLibFS::getattr */
857 int RGWLibFS::setattr(RGWFileHandle
* rgw_fh
, struct stat
* st
, uint32_t mask
,
861 buffer::list ux_key
, ux_attrs
;
862 buffer::list etag
= rgw_fh
->get_etag();
863 buffer::list acls
= rgw_fh
->get_acls();
865 lock_guard
guard(rgw_fh
->mtx
);
867 switch(rgw_fh
->fh
.fh_type
) {
868 case RGW_FS_TYPE_FILE
:
870 if (rgw_fh
->deleted())
878 string obj_name
{rgw_fh
->relative_object_name()};
880 if (rgw_fh
->is_dir() &&
881 (likely(! rgw_fh
->is_bucket()))) {
885 RGWSetAttrsRequest
req(cct
, get_user(), rgw_fh
->bucket_name(), obj_name
);
887 rgw_fh
->create_stat(st
, mask
);
888 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
891 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
892 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
893 req
.emplace_attr(RGW_ATTR_ETAG
, std::move(etag
));
894 req
.emplace_attr(RGW_ATTR_ACL
, std::move(acls
));
896 rc
= rgwlib
.get_fe()->execute_req(&req
);
900 /* special case: materialize placeholder dir */
902 RGWPutObjRequest
req(get_context(), get_user(), rgw_fh
->bucket_name(),
905 rgw_fh
->encode_attrs(ux_key
, ux_attrs
); /* because std::moved */
908 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
909 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
911 rc
= rgwlib
.get_fe()->execute_req(&req
);
915 if ((rc
!= 0) || (rc2
!= 0)) {
919 rgw_fh
->set_ctime(real_clock::to_timespec(real_clock::now()));
922 } /* RGWLibFS::setattr */
924 /* called under rgw_fh->mtx held */
925 void RGWLibFS::update_fh(RGWFileHandle
*rgw_fh
)
928 string obj_name
{rgw_fh
->relative_object_name()};
929 buffer::list ux_key
, ux_attrs
;
931 if (rgw_fh
->is_dir() &&
932 (likely(! rgw_fh
->is_bucket()))) {
936 lsubdout(get_context(), rgw
, 17)
938 << " update old versioned fh : " << obj_name
941 RGWSetAttrsRequest
req(cct
, get_user(), rgw_fh
->bucket_name(), obj_name
);
943 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
945 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
946 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
948 rc
= rgwlib
.get_fe()->execute_req(&req
);
951 if ((rc
!= 0) || (rc2
!= 0)) {
952 lsubdout(get_context(), rgw
, 17)
954 << " update fh failed : " << obj_name
957 } /* RGWLibFS::update_fh */
959 void RGWLibFS::close()
961 state
.flags
|= FLAG_CLOSED
;
967 explicit ObjUnref(RGWLibFS
* _fs
) : fs(_fs
) {}
968 void operator()(RGWFileHandle
* fh
) const {
969 lsubdout(fs
->get_context(), rgw
, 5)
972 << " before ObjUnref refs=" << fh
->get_refcnt()
978 /* force cache drain, forces objects to evict */
979 fh_cache
.drain(ObjUnref(this),
980 RGWFileHandle::FHCache::FLAG_LOCK
);
981 rgwlib
.get_fe()->get_process()->unregister_fs(this);
983 } /* RGWLibFS::close */
985 inline std::ostream
& operator<<(std::ostream
&os
, fh_key
const &fhk
) {
986 os
<< "<fh_key: bucket=";
987 os
<< fhk
.fh_hk
.bucket
;
989 os
<< fhk
.fh_hk
.object
;
994 inline std::ostream
& operator<<(std::ostream
&os
, struct timespec
const &ts
) {
995 os
<< "<timespec: tv_sec=";
1003 std::ostream
& operator<<(std::ostream
&os
, RGWLibFS::event
const &ev
) {
1006 case RGWLibFS::event::type::READDIR
:
1007 os
<< "type=READDIR;";
1010 os
<< "type=UNKNOWN;";
1013 os
<< "fid=" << ev
.fhk
.fh_hk
.bucket
<< ":" << ev
.fhk
.fh_hk
.object
1014 << ";ts=" << ev
.ts
<< ">";
1021 using directory
= RGWFileHandle::directory
;
1023 /* dirent invalidate timeout--basically, the upper-bound on
1024 * inconsistency with the S3 namespace */
1026 = get_context()->_conf
->rgw_nfs_namespace_expire_secs
;
1028 /* max events to gc in one cycle */
1029 uint32_t max_ev
= get_context()->_conf
->rgw_nfs_max_gc
;
1031 struct timespec now
, expire_ts
;
1034 std::deque
<event
> &events
= state
.events
;
1037 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
);
1038 lsubdout(get_context(), rgw
, 15)
1039 << "GC: top of expire loop"
1041 << " expire_s=" << expire_s
1044 lock_guard
guard(state
.mtx
); /* LOCKED */
1045 lsubdout(get_context(), rgw
, 15)
1047 << " count=" << events
.size()
1050 /* just return if no events */
1051 if (events
.empty()) {
1055 (events
.size() < 500) ? max_ev
: (events
.size() / 4);
1056 for (uint32_t ix
= 0; (ix
< _max_ev
) && (events
.size() > 0); ++ix
) {
1057 event
& ev
= events
.front();
1059 expire_ts
.tv_sec
+= expire_s
;
1060 if (expire_ts
> now
) {
1069 for (auto& ev
: ve
) {
1070 lsubdout(get_context(), rgw
, 15)
1071 << "try-expire ev: " << ev
<< dendl
;
1072 if (likely(ev
.t
== event::type::READDIR
)) {
1073 RGWFileHandle
* rgw_fh
= lookup_handle(ev
.fhk
.fh_hk
);
1074 lsubdout(get_context(), rgw
, 15)
1075 << "ev rgw_fh: " << rgw_fh
<< dendl
;
1077 RGWFileHandle::directory
* d
;
1078 if (unlikely(! rgw_fh
->is_dir())) {
1079 lsubdout(get_context(), rgw
, 0)
1081 << " BUG non-directory found with READDIR event "
1082 << "(" << rgw_fh
->bucket_name() << ","
1083 << rgw_fh
->object_name() << ")"
1087 /* maybe clear state */
1088 d
= get
<directory
>(&rgw_fh
->variant_type
);
1090 struct timespec ev_ts
= ev
.ts
;
1091 lock_guard
guard(rgw_fh
->mtx
);
1092 struct timespec d_last_readdir
= d
->last_readdir
;
1093 if (unlikely(ev_ts
< d_last_readdir
)) {
1094 /* readdir cycle in progress, don't invalidate */
1095 lsubdout(get_context(), rgw
, 15)
1096 << "GC: delay expiration for "
1097 << rgw_fh
->object_name()
1098 << " ev.ts=" << ev_ts
1099 << " last_readdir=" << d_last_readdir
1103 lsubdout(get_context(), rgw
, 15)
1105 << rgw_fh
->object_name()
1107 rgw_fh
->clear_state();
1108 rgw_fh
->invalidate();
1114 } /* event::type::READDIR */
1117 } while (! (stop
|| shutdown
));
1118 } /* RGWLibFS::gc */
1120 std::ostream
& operator<<(std::ostream
&os
,
1121 RGWFileHandle
const &rgw_fh
)
1123 const auto& fhk
= rgw_fh
.get_key();
1124 const auto& fh
= const_cast<RGWFileHandle
&>(rgw_fh
).get_fh();
1125 os
<< "<RGWFileHandle:";
1126 os
<< "addr=" << &rgw_fh
<< ";";
1127 switch (fh
->fh_type
) {
1128 case RGW_FS_TYPE_DIRECTORY
:
1129 os
<< "type=DIRECTORY;";
1131 case RGW_FS_TYPE_FILE
:
1135 os
<< "type=UNKNOWN;";
1138 os
<< "fid=" << fhk
.fh_hk
.bucket
<< ":" << fhk
.fh_hk
.object
<< ";";
1139 os
<< "name=" << rgw_fh
.object_name() << ";";
1140 os
<< "refcnt=" << rgw_fh
.get_refcnt() << ";";
1145 RGWFileHandle::~RGWFileHandle() {
1146 /* !recycle case, handle may STILL be in handle table, BUT
1147 * the partition lock is not held in this path */
1148 if (fh_hook
.is_linked()) {
1149 fs
->fh_cache
.remove(fh
.fh_hk
.object
, this, FHCache::FLAG_LOCK
);
1151 /* cond-unref parent */
1152 if (parent
&& (! parent
->is_mount())) {
1153 /* safe because if parent->unref causes its deletion,
1154 * there are a) by refcnt, no other objects/paths pointing
1155 * to it and b) by the semantics of valid iteration of
1156 * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
1157 * no unsafe iterators reaching it either--n.b., this constraint
1158 * is binding oncode which may in future attempt to e.g.,
1159 * cause the eviction of objects in LRU order */
1160 (void) get_fs()->unref(parent
);
1164 fh_key
RGWFileHandle::make_fhk(const std::string
& name
)
1166 std::string tenant
= get_fs()->get_user()->user_id
.to_str();
1168 /* S3 bucket -- assert mount-at-bucket case reaches here */
1169 return fh_key(name
, name
, tenant
);
1171 std::string key_name
= make_key_name(name
.c_str());
1172 return fh_key(fhk
.fh_hk
.bucket
, key_name
.c_str(), tenant
);
1176 void RGWFileHandle::encode_attrs(ceph::buffer::list
& ux_key1
,
1177 ceph::buffer::list
& ux_attrs1
)
1180 fh_key
fhk(this->fh
.fh_hk
);
1181 encode(fhk
, ux_key1
);
1182 encode(*this, ux_attrs1
);
1183 } /* RGWFileHandle::encode_attrs */
1185 DecodeAttrsResult
RGWFileHandle::decode_attrs(const ceph::buffer::list
* ux_key1
,
1186 const ceph::buffer::list
* ux_attrs1
)
1189 DecodeAttrsResult dar
{ false, false };
1191 auto bl_iter_key1
= ux_key1
->cbegin();
1192 decode(fhk
, bl_iter_key1
);
1195 auto bl_iter_unix1
= ux_attrs1
->cbegin();
1196 decode(*this, bl_iter_unix1
);
1197 if (this->state
.version
< 2) {
1202 } /* RGWFileHandle::decode_attrs */
1204 bool RGWFileHandle::reclaim() {
1205 lsubdout(fs
->get_context(), rgw
, 17)
1206 << __func__
<< " " << *this
1208 /* in the non-delete case, handle may still be in handle table */
1209 if (fh_hook
.is_linked()) {
1210 /* in this case, we are being called from a context which holds
1211 * the partition lock */
1212 fs
->fh_cache
.remove(fh
.fh_hk
.object
, this, FHCache::FLAG_NONE
);
1215 } /* RGWFileHandle::reclaim */
1217 bool RGWFileHandle::has_children() const
1219 if (unlikely(! is_dir()))
1222 RGWRMdirCheck
req(fs
->get_context(), fs
->get_user(), this);
1223 int rc
= rgwlib
.get_fe()->execute_req(&req
);
1225 return req
.valid
&& req
.has_children
;
1231 std::ostream
& operator<<(std::ostream
&os
,
1232 RGWFileHandle::readdir_offset
const &offset
)
1235 if (unlikely(!! get
<uint64_t*>(&offset
))) {
1236 uint64_t* ioff
= get
<uint64_t*>(offset
);
1240 os
<< get
<const char*>(offset
);
1244 int RGWFileHandle::readdir(rgw_readdir_cb rcb
, void *cb_arg
,
1245 readdir_offset offset
,
1246 bool *eof
, uint32_t flags
)
1248 using event
= RGWLibFS::event
;
1251 struct timespec now
;
1252 CephContext
* cct
= fs
->get_context();
1254 lsubdout(cct
, rgw
, 10)
1255 << __func__
<< " readdir called on "
1259 directory
* d
= get
<directory
>(&variant_type
);
1261 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1262 lock_guard
guard(mtx
);
1263 d
->last_readdir
= now
;
1269 if (likely(!! get
<const char*>(&offset
))) {
1270 mk
= const_cast<char*>(get
<const char*>(offset
));
1273 initial_off
= (*get
<uint64_t*>(offset
) == 0);
1277 RGWListBucketsRequest
req(cct
, fs
->get_user(), this, rcb
, cb_arg
,
1279 rc
= rgwlib
.get_fe()->execute_req(&req
);
1281 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1282 lock_guard
guard(mtx
);
1286 inc_nlink(req
.d_count
);
1290 RGWReaddirRequest
req(cct
, fs
->get_user(), this, rcb
, cb_arg
, offset
);
1291 rc
= rgwlib
.get_fe()->execute_req(&req
);
1293 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1294 lock_guard
guard(mtx
);
1298 inc_nlink(req
.d_count
);
1303 event
ev(event::type::READDIR
, get_key(), state
.atime
);
1304 lock_guard
sguard(fs
->state
.mtx
);
1305 fs
->state
.push_event(ev
);
1307 lsubdout(fs
->get_context(), rgw
, 15)
1309 << " final link count=" << state
.nlink
1313 } /* RGWFileHandle::readdir */
1315 int RGWFileHandle::write(uint64_t off
, size_t len
, size_t *bytes_written
,
1319 using WriteCompletion
= RGWLibFS::WriteCompletion
;
1321 lock_guard
guard(mtx
);
1325 file
* f
= get
<file
>(&variant_type
);
1330 lsubdout(fs
->get_context(), rgw
, 5)
1332 << " write attempted on deleted object "
1333 << this->object_name()
1335 /* zap write transaction, if any */
1337 delete f
->write_req
;
1338 f
->write_req
= nullptr;
1343 if (! f
->write_req
) {
1344 /* guard--we do not support (e.g., COW-backed) partial writes */
1346 lsubdout(fs
->get_context(), rgw
, 5)
1348 << " " << object_name()
1349 << " non-0 initial write position " << off
1350 << " (mounting with -o sync required)"
1356 std::string object_name
= relative_object_name();
1358 new RGWWriteRequest(fs
->get_context(), fs
->get_user(), this,
1359 bucket_name(), object_name
);
1360 rc
= rgwlib
.get_fe()->start_req(f
->write_req
);
1362 lsubdout(fs
->get_context(), rgw
, 5)
1364 << this->object_name()
1365 << " write start failed " << off
1366 << " (" << rc
<< ")"
1368 /* zap failed write transaction */
1369 delete f
->write_req
;
1370 f
->write_req
= nullptr;
1373 if (stateless_open()) {
1374 /* start write timer */
1375 f
->write_req
->timer_id
=
1376 RGWLibFS::write_timer
.add_event(
1377 std::chrono::seconds(RGWLibFS::write_completion_interval_s
),
1378 WriteCompletion(*this));
1384 if ((static_cast<off_t
>(off
) < f
->write_req
->real_ofs
) &&
1385 ((f
->write_req
->real_ofs
- off
) <= len
)) {
1386 overlap
= f
->write_req
->real_ofs
- off
;
1387 off
= f
->write_req
->real_ofs
;
1388 buffer
= static_cast<char*>(buffer
) + overlap
;
1396 buffer::create_static(len
, static_cast<char*>(buffer
)));
1399 buffer::copy(static_cast<char*>(buffer
), len
));
1402 f
->write_req
->put_data(off
, bl
);
1403 rc
= f
->write_req
->exec_continue();
1406 size_t min_size
= off
+ len
;
1407 if (min_size
> get_size())
1409 if (stateless_open()) {
1410 /* bump write timer */
1411 RGWLibFS::write_timer
.adjust_event(
1412 f
->write_req
->timer_id
, std::chrono::seconds(10));
1415 /* continuation failed (e.g., non-contiguous write position) */
1416 lsubdout(fs
->get_context(), rgw
, 5)
1419 << " failed write at position " << off
1420 << " (fails write transaction) "
1422 /* zap failed write transaction */
1423 delete f
->write_req
;
1424 f
->write_req
= nullptr;
1428 *bytes_written
= (rc
== 0) ? (len
+ overlap
) : 0;
1430 } /* RGWFileHandle::write */
1432 int RGWFileHandle::write_finish(uint32_t flags
)
1434 unique_lock guard
{mtx
, std::defer_lock
};
1437 if (! (flags
& FLAG_LOCKED
)) {
1441 file
* f
= get
<file
>(&variant_type
);
1442 if (f
&& (f
->write_req
)) {
1443 lsubdout(fs
->get_context(), rgw
, 10)
1445 << " finishing write trans on " << object_name()
1447 rc
= rgwlib
.get_fe()->finish_req(f
->write_req
);
1449 rc
= f
->write_req
->get_ret();
1451 delete f
->write_req
;
1452 f
->write_req
= nullptr;
1456 } /* RGWFileHandle::write_finish */
1458 int RGWFileHandle::close()
1460 lock_guard
guard(mtx
);
1462 int rc
= write_finish(FLAG_LOCKED
);
1464 flags
&= ~FLAG_OPEN
;
1465 flags
&= ~FLAG_STATELESS_OPEN
;
1468 } /* RGWFileHandle::close */
1470 RGWFileHandle::file::~file()
1475 void RGWFileHandle::clear_state()
1477 directory
* d
= get
<directory
>(&variant_type
);
1480 d
->last_marker
= rgw_obj_key
{};
1484 void RGWFileHandle::advance_mtime(uint32_t flags
) {
1485 /* intended for use on directories, fast-forward mtime so as to
1486 * ensure a new, higher value for the change attribute */
1487 unique_lock
uniq(mtx
, std::defer_lock
);
1488 if (likely(! (flags
& RGWFileHandle::FLAG_LOCKED
))) {
1492 /* advance mtime only if stored mtime is older than the
1493 * configured namespace expiration */
1494 auto now
= real_clock::now();
1495 auto cmptime
= state
.mtime
;
1497 fs
->get_context()->_conf
->rgw_nfs_namespace_expire_secs
;
1498 if (cmptime
< real_clock::to_timespec(now
)) {
1499 /* sets ctime as well as mtime, to avoid masking updates should
1500 * ctime inexplicably hold a higher value */
1505 void RGWFileHandle::invalidate() {
1506 RGWLibFS
*fs
= get_fs();
1507 if (fs
->invalidate_cb
) {
1508 fs
->invalidate_cb(fs
->invalidate_arg
, get_key().fh_hk
);
1512 int RGWWriteRequest::exec_start() {
1513 struct req_state
* s
= get_state();
1515 auto compression_type
=
1516 get_store()->svc
.zone
->get_zone_params().get_compression_type(
1517 s
->bucket_info
.placement_rule
);
1519 /* not obviously supportable */
1520 ceph_assert(! dlo_manifest
);
1521 ceph_assert(! slo_info
);
1523 perfcounter
->inc(l_rgw_put
);
1525 rgw_obj obj
{s
->bucket
, s
->object
};
1527 if (s
->object
.empty()) {
1528 ldout(s
->cct
, 0) << __func__
<< " called on empty object" << dendl
;
1532 op_ret
= get_params();
1536 op_ret
= get_system_versioning_params(s
, &olh_epoch
, &version_id
);
1541 /* user-supplied MD5 check skipped (not supplied) */
1542 /* early quota check skipped--we don't have size yet */
1543 /* skipping user-supplied etag--we might have one in future, but
1544 * like data it and other attrs would arrive after open */
1546 aio
.emplace(s
->cct
->_conf
->rgw_put_obj_min_window_size
);
1548 if (s
->bucket_info
.versioning_enabled()) {
1549 if (!version_id
.empty()) {
1550 obj
.key
.set_instance(version_id
);
1552 get_store()->gen_rand_obj_instance_name(&obj
);
1553 version_id
= obj
.key
.instance
;
1556 processor
.emplace(&*aio
, get_store(), s
->bucket_info
,
1558 s
->bucket_owner
.get_id(),
1559 *static_cast<RGWObjectCtx
*>(s
->obj_ctx
),
1560 obj
, olh_epoch
, s
->req_id
);
1562 op_ret
= processor
->prepare();
1564 ldout(s
->cct
, 20) << "processor->prepare() returned ret=" << op_ret
1568 filter
= &*processor
;
1569 if (compression_type
!= "none") {
1570 plugin
= Compressor::create(s
->cct
, compression_type
);
1572 ldout(s
->cct
, 1) << "Cannot load plugin for rgw_compression_type "
1573 << compression_type
<< dendl
;
1575 compressor
.emplace(s
->cct
, plugin
, filter
);
1576 filter
= &*compressor
;
1584 int RGWWriteRequest::exec_continue()
1586 struct req_state
* s
= get_state();
1589 /* check guards (e.g., contig write) */
1592 << " chunks arrived in wrong order"
1593 << " (mounting with -o sync required)"
1598 op_ret
= get_store()->check_quota(s
->bucket_owner
.get_id(), s
->bucket
,
1599 user_quota
, bucket_quota
, real_ofs
, true);
1600 /* max_size exceed */
1604 size_t len
= data
.length();
1608 hash
.Update((const unsigned char *)data
.c_str(), data
.length());
1609 op_ret
= filter
->process(std::move(data
), ofs
);
1613 bytes_written
+= len
;
1617 } /* exec_continue */
1619 int RGWWriteRequest::exec_finish()
1621 buffer::list bl
, aclbl
, ux_key
, ux_attrs
;
1622 map
<string
, string
>::iterator iter
;
1623 char calc_md5
[CEPH_CRYPTO_MD5_DIGESTSIZE
* 2 + 1];
1624 unsigned char m
[CEPH_CRYPTO_MD5_DIGESTSIZE
];
1625 struct req_state
* s
= get_state();
1627 size_t osize
= rgw_fh
->get_size();
1628 struct timespec octime
= rgw_fh
->get_ctime();
1629 struct timespec omtime
= rgw_fh
->get_mtime();
1630 real_time appx_t
= real_clock::now();
1632 s
->obj_size
= bytes_written
;
1633 perfcounter
->inc(l_rgw_put_b
, s
->obj_size
);
1635 // flush data in filters
1636 op_ret
= filter
->process({}, s
->obj_size
);
1641 op_ret
= get_store()->check_quota(s
->bucket_owner
.get_id(), s
->bucket
,
1642 user_quota
, bucket_quota
, s
->obj_size
, true);
1643 /* max_size exceed */
1648 op_ret
= get_store()->check_bucket_shards(s
->bucket_info
, s
->bucket
,
1656 if (compressor
&& compressor
->is_compressed()) {
1658 RGWCompressionInfo cs_info
;
1659 cs_info
.compression_type
= plugin
->get_type_name();
1660 cs_info
.orig_size
= s
->obj_size
;
1661 cs_info
.blocks
= std::move(compressor
->get_compression_blocks());
1662 encode(cs_info
, tmp
);
1663 attrs
[RGW_ATTR_COMPRESSION
] = tmp
;
1664 ldout(s
->cct
, 20) << "storing " << RGW_ATTR_COMPRESSION
1665 << " with type=" << cs_info
.compression_type
1666 << ", orig_size=" << cs_info
.orig_size
1667 << ", blocks=" << cs_info
.blocks
.size() << dendl
;
1670 buf_to_hex(m
, CEPH_CRYPTO_MD5_DIGESTSIZE
, calc_md5
);
1673 bl
.append(etag
.c_str(), etag
.size() + 1);
1674 emplace_attr(RGW_ATTR_ETAG
, std::move(bl
));
1676 policy
.encode(aclbl
);
1677 emplace_attr(RGW_ATTR_ACL
, std::move(aclbl
));
1680 rgw_fh
->set_mtime(real_clock::to_timespec(appx_t
));
1681 rgw_fh
->set_ctime(real_clock::to_timespec(appx_t
));
1682 rgw_fh
->set_size(bytes_written
);
1683 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
1685 emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
1686 emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
1688 for (iter
= s
->generic_attrs
.begin(); iter
!= s
->generic_attrs
.end();
1690 buffer::list
& attrbl
= attrs
[iter
->first
];
1691 const string
& val
= iter
->second
;
1692 attrbl
.append(val
.c_str(), val
.size() + 1);
1695 op_ret
= rgw_get_request_metadata(s
->cct
, s
->info
, attrs
);
1699 encode_delete_at_attr(delete_at
, attrs
);
1701 /* Add a custom metadata to expose the information whether an object
1702 * is an SLO or not. Appending the attribute must be performed AFTER
1703 * processing any input from user in order to prohibit overwriting. */
1704 if (unlikely(!! slo_info
)) {
1705 buffer::list slo_userindicator_bl
;
1707 encode("True", slo_userindicator_bl
);
1708 emplace_attr(RGW_ATTR_SLO_UINDICATOR
, std::move(slo_userindicator_bl
));
1711 op_ret
= processor
->complete(s
->obj_size
, etag
, &mtime
, real_time(), attrs
,
1712 (delete_at
? *delete_at
: real_time()),
1713 if_match
, if_nomatch
, nullptr, nullptr, nullptr);
1715 /* revert attr updates */
1716 rgw_fh
->set_mtime(omtime
);
1717 rgw_fh
->set_ctime(octime
);
1718 rgw_fh
->set_size(osize
);
1722 perfcounter
->tinc(l_rgw_put_lat
, s
->time_elapsed());
1726 } /* namespace rgw */
1731 void rgwfile_version(int *major
, int *minor
, int *extra
)
1734 *major
= LIBRGW_FILE_VER_MAJOR
;
1736 *minor
= LIBRGW_FILE_VER_MINOR
;
1738 *extra
= LIBRGW_FILE_VER_EXTRA
;
1742 attach rgw namespace
1744 int rgw_mount(librgw_t rgw
, const char *uid
, const char *acc_key
,
1745 const char *sec_key
, struct rgw_fs
**rgw_fs
,
1750 /* stash access data for "mount" */
1751 RGWLibFS
* new_fs
= new RGWLibFS(static_cast<CephContext
*>(rgw
), uid
, acc_key
,
1753 ceph_assert(new_fs
);
1755 rc
= new_fs
->authorize(rgwlib
.get_store());
1761 /* register fs for shared gc */
1762 rgwlib
.get_fe()->get_process()->register_fs(new_fs
);
1764 struct rgw_fs
*fs
= new_fs
->get_fs();
1767 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1775 int rgw_mount2(librgw_t rgw
, const char *uid
, const char *acc_key
,
1776 const char *sec_key
, const char *root
, struct rgw_fs
**rgw_fs
,
1781 /* stash access data for "mount" */
1782 RGWLibFS
* new_fs
= new RGWLibFS(static_cast<CephContext
*>(rgw
), uid
, acc_key
,
1784 ceph_assert(new_fs
);
1786 rc
= new_fs
->authorize(rgwlib
.get_store());
1792 /* register fs for shared gc */
1793 rgwlib
.get_fe()->get_process()->register_fs(new_fs
);
1795 struct rgw_fs
*fs
= new_fs
->get_fs();
1798 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1807 register invalidate callbacks
1809 int rgw_register_invalidate(struct rgw_fs
*rgw_fs
, rgw_fh_callback_t cb
,
1810 void *arg
, uint32_t flags
)
1813 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1814 return fs
->register_invalidate(cb
, arg
, flags
);
1818 detach rgw namespace
1820 int rgw_umount(struct rgw_fs
*rgw_fs
, uint32_t flags
)
1822 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1828 get filesystem attributes
1830 int rgw_statfs(struct rgw_fs
*rgw_fs
,
1831 struct rgw_file_handle
*parent_fh
,
1832 struct rgw_statvfs
*vfs_st
, uint32_t flags
)
1834 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1835 struct rados_cluster_stat_t stats
;
1837 RGWGetClusterStatReq
req(fs
->get_context(), fs
->get_user(), stats
);
1838 int rc
= rgwlib
.get_fe()->execute_req(&req
);
1840 lderr(fs
->get_context()) << "ERROR: getting total cluster usage"
1841 << cpp_strerror(-rc
) << dendl
;
1845 //Set block size to 1M.
1846 constexpr uint32_t CEPH_BLOCK_SHIFT
= 20;
1847 vfs_st
->f_bsize
= 1 << CEPH_BLOCK_SHIFT
;
1848 vfs_st
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
1849 vfs_st
->f_blocks
= stats
.kb
>> (CEPH_BLOCK_SHIFT
- 10);
1850 vfs_st
->f_bfree
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
1851 vfs_st
->f_bavail
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
1852 vfs_st
->f_files
= stats
.num_objects
;
1853 vfs_st
->f_ffree
= -1;
1854 vfs_st
->f_fsid
[0] = fs
->get_fsid();
1855 vfs_st
->f_fsid
[1] = fs
->get_fsid();
1857 vfs_st
->f_namemax
= 4096;
1862 generic create -- create an empty regular file
1864 int rgw_create(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
1865 const char *name
, struct stat
*st
, uint32_t mask
,
1866 struct rgw_file_handle
**fh
, uint32_t posix_flags
,
1871 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1872 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1875 (parent
->is_root()) ||
1876 (parent
->is_file())) {
1881 MkObjResult fhr
= fs
->create(parent
, name
, st
, mask
, flags
);
1882 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
1885 *fh
= nfh
->get_fh();
1891 create a symbolic link
1893 int rgw_symlink(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
1894 const char *name
, const char *link_path
, struct stat
*st
, uint32_t mask
,
1895 struct rgw_file_handle
**fh
, uint32_t posix_flags
,
1900 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1901 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1904 (parent
->is_root()) ||
1905 (parent
->is_file())) {
1910 MkObjResult fhr
= fs
->symlink(parent
, name
, link_path
, st
, mask
, flags
);
1911 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
1914 *fh
= nfh
->get_fh();
1920 create a new directory
1922 int rgw_mkdir(struct rgw_fs
*rgw_fs
,
1923 struct rgw_file_handle
*parent_fh
,
1924 const char *name
, struct stat
*st
, uint32_t mask
,
1925 struct rgw_file_handle
**fh
, uint32_t flags
)
1929 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1930 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1937 MkObjResult fhr
= fs
->mkdir(parent
, name
, st
, mask
, flags
);
1938 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
1941 *fh
= nfh
->get_fh();
1949 int rgw_rename(struct rgw_fs
*rgw_fs
,
1950 struct rgw_file_handle
*src
, const char* src_name
,
1951 struct rgw_file_handle
*dst
, const char* dst_name
,
1954 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1956 RGWFileHandle
* src_fh
= get_rgwfh(src
);
1957 RGWFileHandle
* dst_fh
= get_rgwfh(dst
);
1959 return fs
->rename(src_fh
, dst_fh
, src_name
, dst_name
);
1963 remove file or directory
1965 int rgw_unlink(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
1966 const char *name
, uint32_t flags
)
1968 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1969 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1971 return fs
->unlink(parent
, name
);
1975 lookup object by name (POSIX style)
1977 int rgw_lookup(struct rgw_fs
*rgw_fs
,
1978 struct rgw_file_handle
*parent_fh
, const char* path
,
1979 struct rgw_file_handle
**fh
,
1980 struct stat
*st
, uint32_t mask
, uint32_t flags
)
1982 //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
1983 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1985 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1987 (! parent
->is_dir())) {
1992 RGWFileHandle
* rgw_fh
;
1995 if (parent
->is_root()) {
1996 /* special: parent lookup--note lack of ref()! */
1997 if (unlikely((strcmp(path
, "..") == 0) ||
1998 (strcmp(path
, "/") == 0))) {
2001 RGWLibFS::BucketStats bstat
;
2002 fhr
= fs
->stat_bucket(parent
, path
, bstat
, RGWFileHandle::FLAG_NONE
);
2003 rgw_fh
= get
<0>(fhr
);
2008 /* special: after readdir--note extra ref()! */
2009 if (unlikely((strcmp(path
, "..") == 0))) {
2011 lsubdout(fs
->get_context(), rgw
, 17)
2012 << __func__
<< " BANG"<< *rgw_fh
2016 enum rgw_fh_type fh_type
= fh_type_of(flags
);
2018 uint32_t sl_flags
= (flags
& RGW_LOOKUP_FLAG_RCB
)
2019 ? RGWFileHandle::FLAG_NONE
2020 : RGWFileHandle::FLAG_EXACT_MATCH
;
2022 bool fast_attrs
= fs
->get_context()->_conf
->rgw_nfs_s3_fast_attrs
;
2024 if ((flags
& RGW_LOOKUP_FLAG_RCB
) && fast_attrs
) {
2025 /* FAKE STAT--this should mean, interpolate special
2026 * owner, group, and perms masks */
2027 fhr
= fs
->fake_leaf(parent
, path
, fh_type
, st
, mask
, sl_flags
);
2029 if ((fh_type
== RGW_FS_TYPE_DIRECTORY
) && fast_attrs
) {
2030 /* trust cached dir, if present */
2031 fhr
= fs
->lookup_fh(parent
, path
, RGWFileHandle::FLAG_DIRECTORY
);
2033 rgw_fh
= get
<0>(fhr
);
2037 fhr
= fs
->stat_leaf(parent
, path
, fh_type
, sl_flags
);
2039 if (! get
<0>(fhr
)) {
2040 if (! (flags
& RGW_LOOKUP_FLAG_CREATE
))
2043 fhr
= fs
->lookup_fh(parent
, path
, RGWFileHandle::FLAG_CREATE
);
2045 rgw_fh
= get
<0>(fhr
);
2050 struct rgw_file_handle
*rfh
= rgw_fh
->get_fh();
2057 lookup object by handle (NFS style)
2059 int rgw_lookup_handle(struct rgw_fs
*rgw_fs
, struct rgw_fh_hk
*fh_hk
,
2060 struct rgw_file_handle
**fh
, uint32_t flags
)
2062 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2064 RGWFileHandle
* rgw_fh
= fs
->lookup_handle(*fh_hk
);
2070 struct rgw_file_handle
*rfh
= rgw_fh
->get_fh();
2077 * release file handle
2079 int rgw_fh_rele(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2082 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2083 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2085 lsubdout(fs
->get_context(), rgw
, 17)
2086 << __func__
<< " " << *rgw_fh
2094 get unix attributes for object
2096 int rgw_getattr(struct rgw_fs
*rgw_fs
,
2097 struct rgw_file_handle
*fh
, struct stat
*st
, uint32_t flags
)
2099 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2100 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2102 return fs
->getattr(rgw_fh
, st
);
2106 set unix attributes for object
2108 int rgw_setattr(struct rgw_fs
*rgw_fs
,
2109 struct rgw_file_handle
*fh
, struct stat
*st
,
2110 uint32_t mask
, uint32_t flags
)
2112 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2113 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2115 return fs
->setattr(rgw_fh
, st
, mask
, flags
);
2121 int rgw_truncate(struct rgw_fs
*rgw_fs
,
2122 struct rgw_file_handle
*fh
, uint64_t size
, uint32_t flags
)
2130 int rgw_open(struct rgw_fs
*rgw_fs
,
2131 struct rgw_file_handle
*fh
, uint32_t posix_flags
, uint32_t flags
)
2133 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2136 * need to track specific opens--at least read opens and
2137 * a write open; we need to know when a write open is returned,
2138 * that closes a write transaction
2140 * for now, we will support single-open only, it's preferable to
2141 * anything we can otherwise do without access to the NFS state
2143 if (! rgw_fh
->is_file())
2146 return rgw_fh
->open(flags
);
2152 int rgw_close(struct rgw_fs
*rgw_fs
,
2153 struct rgw_file_handle
*fh
, uint32_t flags
)
2155 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2156 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2157 int rc
= rgw_fh
->close(/* XXX */);
2159 if (flags
& RGW_CLOSE_FLAG_RELE
)
2165 int rgw_readdir(struct rgw_fs
*rgw_fs
,
2166 struct rgw_file_handle
*parent_fh
, uint64_t *offset
,
2167 rgw_readdir_cb rcb
, void *cb_arg
, bool *eof
,
2170 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2176 lsubdout(parent
->get_fs()->get_context(), rgw
, 15)
2178 << " offset=" << *offset
2181 if ((*offset
== 0) &&
2182 (flags
& RGW_READDIR_FLAG_DOTDOT
)) {
2183 /* send '.' and '..' with their NFS-defined offsets */
2184 rcb(".", cb_arg
, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2185 rcb("..", cb_arg
, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2188 int rc
= parent
->readdir(rcb
, cb_arg
, offset
, eof
, flags
);
2192 /* enumeration continuing from name */
2193 int rgw_readdir2(struct rgw_fs
*rgw_fs
,
2194 struct rgw_file_handle
*parent_fh
, const char *name
,
2195 rgw_readdir_cb rcb
, void *cb_arg
, bool *eof
,
2198 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2204 lsubdout(parent
->get_fs()->get_context(), rgw
, 15)
2206 << " offset=" << ((name
) ? name
: "(nil)")
2210 (flags
& RGW_READDIR_FLAG_DOTDOT
)) {
2211 /* send '.' and '..' with their NFS-defined offsets */
2212 rcb(".", cb_arg
, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2213 rcb("..", cb_arg
, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR
);
2216 int rc
= parent
->readdir(rcb
, cb_arg
, name
, eof
, flags
);
2218 } /* rgw_readdir2 */
2220 /* project offset of dirent name */
2221 int rgw_dirent_offset(struct rgw_fs
*rgw_fs
,
2222 struct rgw_file_handle
*parent_fh
,
2223 const char *name
, int64_t *offset
,
2226 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
2231 std::string sname
{name
};
2232 int rc
= parent
->offset_of(sname
, offset
, flags
);
2239 int rgw_read(struct rgw_fs
*rgw_fs
,
2240 struct rgw_file_handle
*fh
, uint64_t offset
,
2241 size_t length
, size_t *bytes_read
, void *buffer
,
2244 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2245 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2247 return fs
->read(rgw_fh
, offset
, length
, bytes_read
, buffer
, flags
);
2253 int rgw_readlink(struct rgw_fs
*rgw_fs
,
2254 struct rgw_file_handle
*fh
, uint64_t offset
,
2255 size_t length
, size_t *bytes_read
, void *buffer
,
2258 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2259 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2261 return fs
->readlink(rgw_fh
, offset
, length
, bytes_read
, buffer
, flags
);
2267 int rgw_write(struct rgw_fs
*rgw_fs
,
2268 struct rgw_file_handle
*fh
, uint64_t offset
,
2269 size_t length
, size_t *bytes_written
, void *buffer
,
2272 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2277 if (! rgw_fh
->is_file())
2280 if (! rgw_fh
->is_open()) {
2281 if (flags
& RGW_OPEN_FLAG_V3
) {
2282 rc
= rgw_fh
->open(flags
);
2289 rc
= rgw_fh
->write(offset
, length
, bytes_written
, buffer
);
2295 read data from file (vector)
2300 struct rgw_vio
* vio
;
2303 RGWReadV(buffer::list
& _bl
, rgw_vio
* _vio
) : vio(_vio
) {
2307 struct rgw_vio
* get_vio() { return vio
; }
2309 const auto& buffers() { return bl
.buffers(); }
2311 unsigned /* XXX */ length() { return bl
.length(); }
2315 void rgw_readv_rele(struct rgw_uio
*uio
, uint32_t flags
)
2317 RGWReadV
* rdv
= static_cast<RGWReadV
*>(uio
->uio_p1
);
2319 ::operator delete(rdv
);
2322 int rgw_readv(struct rgw_fs
*rgw_fs
,
2323 struct rgw_file_handle
*fh
, rgw_uio
*uio
, uint32_t flags
)
2326 CephContext
* cct
= static_cast<CephContext
*>(rgw_fs
->rgw
);
2327 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2328 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2330 if (! rgw_fh
->is_file())
2336 RGWGetObjRequest
req(cct
, fs
->get_user(), rgw_fh
->bucket_name(),
2337 rgw_fh
->object_name(), uio
->uio_offset
, uio
->uio_resid
,
2339 req
.do_hexdump
= false;
2341 rc
= rgwlib
.get_fe()->execute_req(&req
);
2344 RGWReadV
* rdv
= static_cast<RGWReadV
*>(
2345 ::operator new(sizeof(RGWReadV
) +
2346 (bl
.buffers().size() * sizeof(struct rgw_vio
))));
2349 RGWReadV(bl
, reinterpret_cast<rgw_vio
*>(rdv
+sizeof(RGWReadV
)));
2352 uio
->uio_cnt
= rdv
->buffers().size();
2353 uio
->uio_resid
= rdv
->length();
2354 uio
->uio_vio
= rdv
->get_vio();
2355 uio
->uio_rele
= rgw_readv_rele
;
2358 auto& buffers
= rdv
->buffers();
2359 for (auto& bp
: buffers
) {
2360 rgw_vio
*vio
= &(uio
->uio_vio
[ix
]);
2361 vio
->vio_base
= const_cast<char*>(bp
.c_str());
2362 vio
->vio_len
= bp
.length();
2363 vio
->vio_u1
= nullptr;
2364 vio
->vio_p1
= nullptr;
2376 write data to file (vector)
2378 int rgw_writev(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2379 rgw_uio
*uio
, uint32_t flags
)
2384 CephContext
* cct
= static_cast<CephContext
*>(rgw_fs
->rgw
);
2385 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2386 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2388 if (! rgw_fh
->is_file())
2392 for (unsigned int ix
= 0; ix
< uio
->uio_cnt
; ++ix
) {
2393 rgw_vio
*vio
= &(uio
->uio_vio
[ix
]);
2395 buffer::create_static(vio
->vio_len
,
2396 static_cast<char*>(vio
->vio_base
)));
2399 std::string oname
= rgw_fh
->relative_object_name();
2400 RGWPutObjRequest
req(cct
, fs
->get_user(), rgw_fh
->bucket_name(),
2403 int rc
= rgwlib
.get_fe()->execute_req(&req
);
2405 /* XXX update size (in request) */
2413 int rgw_fsync(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*handle
,
2419 int rgw_commit(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2420 uint64_t offset
, uint64_t length
, uint32_t flags
)
2422 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2424 return rgw_fh
->commit(offset
, length
, RGWFileHandle::FLAG_NONE
);