1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #include "include/compat.h"
5 #include "include/rados/rgw_file.h"
11 #include "rgw_rados.h"
12 #include "rgw_resolve.h"
16 #include "rgw_acl_s3.h"
17 #include "rgw_frontend.h"
18 #include "rgw_request.h"
19 #include "rgw_process.h"
20 #include "rgw_rest_user.h"
21 #include "rgw_rest_s3.h"
22 #include "rgw_os_lib.h"
23 #include "rgw_auth_s3.h"
25 #include "rgw_bucket.h"
27 #include "rgw_lib_frontend.h"
28 #include "common/errno.h"
32 #define dout_subsys ceph_subsys_rgw
40 const string
RGWFileHandle::root_name
= "/";
42 std::atomic
<uint32_t> RGWLibFS::fs_inst_counter
;
44 uint32_t RGWLibFS::write_completion_interval_s
= 10;
46 ceph::timer
<ceph::mono_clock
> RGWLibFS::write_timer
{
47 ceph::construct_suspended
};
49 inline int valid_fs_bucket_name(const string
& name
) {
50 int rc
= valid_s3_bucket_name(name
, false /* relaxed */);
52 if (name
.size() > 255)
59 inline int valid_fs_object_name(const string
& name
) {
60 int rc
= valid_s3_object_name(name
);
62 if (name
.size() > 1024)
69 LookupFHResult
RGWLibFS::stat_bucket(RGWFileHandle
* parent
, const char *path
,
70 RGWLibFS::BucketStats
& bs
,
73 LookupFHResult fhr
{nullptr, 0};
74 std::string bucket_name
{path
};
75 RGWStatBucketRequest
req(cct
, get_user(), bucket_name
, bs
);
77 int rc
= rgwlib
.get_fe()->execute_req(&req
);
79 (req
.get_ret() == 0) &&
81 fhr
= lookup_fh(parent
, path
,
82 (flags
& RGWFileHandle::FLAG_LOCKED
)|
83 RGWFileHandle::FLAG_CREATE
|
84 RGWFileHandle::FLAG_BUCKET
);
86 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
87 if (! (flags
& RGWFileHandle::FLAG_LOCKED
)) {
90 rgw_fh
->set_times(req
.get_ctime());
91 /* restore attributes */
92 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
93 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
94 if (ux_key
&& ux_attrs
) {
95 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
96 if (get
<0>(dar
) || get
<1>(dar
)) {
100 if (! (flags
& RGWFileHandle::FLAG_LOCKED
)) {
101 rgw_fh
->mtx
.unlock();
108 LookupFHResult
RGWLibFS::stat_leaf(RGWFileHandle
* parent
,
110 enum rgw_fh_type type
,
113 /* find either-of <object_name>, <object_name/>, only one of
114 * which should exist; atomicity? */
117 LookupFHResult fhr
{nullptr, 0};
119 /* XXX the need for two round-trip operations to identify file or
120 * directory leaf objects is unecessary--the current proposed
121 * mechanism to avoid this is to store leaf object names with an
122 * object locator w/o trailing slash */
124 std::string obj_path
= parent
->format_child_name(path
, false);
126 for (auto ix
: { 0, 1, 2 }) {
131 if (type
== RGW_FS_TYPE_DIRECTORY
)
134 RGWStatObjRequest
req(cct
, get_user(),
135 parent
->bucket_name(), obj_path
,
136 RGWStatObjRequest::FLAG_NONE
);
137 int rc
= rgwlib
.get_fe()->execute_req(&req
);
139 (req
.get_ret() == 0)) {
140 fhr
= lookup_fh(parent
, path
, RGWFileHandle::FLAG_CREATE
);
142 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
143 lock_guard
guard(rgw_fh
->mtx
);
144 rgw_fh
->set_size(req
.get_size());
145 rgw_fh
->set_times(req
.get_mtime());
146 /* restore attributes */
147 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
148 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
149 if (ux_key
&& ux_attrs
) {
150 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
151 if (get
<0>(dar
) || get
<1>(dar
)) {
164 if (type
== RGW_FS_TYPE_FILE
)
168 RGWStatObjRequest
req(cct
, get_user(),
169 parent
->bucket_name(), obj_path
,
170 RGWStatObjRequest::FLAG_NONE
);
171 int rc
= rgwlib
.get_fe()->execute_req(&req
);
173 (req
.get_ret() == 0)) {
174 fhr
= lookup_fh(parent
, path
, RGWFileHandle::FLAG_DIRECTORY
);
176 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
177 lock_guard
guard(rgw_fh
->mtx
);
178 rgw_fh
->set_size(req
.get_size());
179 rgw_fh
->set_times(req
.get_mtime());
180 /* restore attributes */
181 auto ux_key
= req
.get_attr(RGW_ATTR_UNIX_KEY1
);
182 auto ux_attrs
= req
.get_attr(RGW_ATTR_UNIX1
);
183 if (ux_key
&& ux_attrs
) {
184 DecodeAttrsResult dar
= rgw_fh
->decode_attrs(ux_key
, ux_attrs
);
185 if (get
<0>(dar
) || get
<1>(dar
)) {
196 std::string object_name
{path
};
197 RGWStatLeafRequest
req(cct
, get_user(), parent
, object_name
);
198 int rc
= rgwlib
.get_fe()->execute_req(&req
);
200 (req
.get_ret() == 0)) {
202 /* we need rgw object's key name equal to file name, if
204 if ((flags
& RGWFileHandle::FLAG_EXACT_MATCH
) &&
205 !req
.exact_matched
) {
206 lsubdout(get_context(), rgw
, 15)
208 << ": stat leaf not exact match file name = "
212 fhr
= lookup_fh(parent
, path
,
213 RGWFileHandle::FLAG_CREATE
|
215 RGWFileHandle::FLAG_DIRECTORY
:
216 RGWFileHandle::FLAG_NONE
));
217 /* XXX we don't have an object--in general, there need not
218 * be one (just a path segment in some other object). In
219 * actual leaf an object exists, but we'd need another round
220 * trip to get attrs */
222 /* for now use the parent object's mtime */
223 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
224 lock_guard
guard(rgw_fh
->mtx
);
225 rgw_fh
->set_mtime(parent
->get_mtime());
238 } /* RGWLibFS::stat_leaf */
240 int RGWLibFS::read(RGWFileHandle
* rgw_fh
, uint64_t offset
, size_t length
,
241 size_t* bytes_read
, void* buffer
, uint32_t flags
)
243 if (! rgw_fh
->is_file())
246 if (rgw_fh
->deleted())
249 RGWReadRequest
req(get_context(), get_user(), rgw_fh
, offset
, length
,
252 int rc
= rgwlib
.get_fe()->execute_req(&req
);
254 (req
.get_ret() == 0)) {
255 lock_guard(rgw_fh
->mtx
);
256 rgw_fh
->set_atime(real_clock::to_timespec(real_clock::now()));
257 *bytes_read
= req
.nread
;
263 int RGWLibFS::unlink(RGWFileHandle
* rgw_fh
, const char* name
, uint32_t flags
)
267 RGWFileHandle
* parent
= nullptr;
268 RGWFileHandle
* bkt_fh
= nullptr;
270 if (unlikely(flags
& RGWFileHandle::FLAG_UNLINK_THIS
)) {
272 parent
= rgw_fh
->get_parent();
276 LookupFHResult fhr
= lookup_fh(parent
, name
, RGWFileHandle::FLAG_LOCK
);
277 rgw_fh
= get
<0>(fhr
);
281 if (parent
->is_root()) {
282 /* a bucket may have an object storing Unix attributes, check
283 * for and delete it */
285 fhr
= stat_bucket(parent
, name
, bs
, (rgw_fh
) ?
286 RGWFileHandle::FLAG_LOCKED
:
287 RGWFileHandle::FLAG_NONE
);
288 bkt_fh
= get
<0>(fhr
);
289 if (unlikely(! bkt_fh
)) {
290 /* implies !rgw_fh, so also !LOCKED */
294 if (bs
.num_entries
> 1) {
295 unref(bkt_fh
); /* return stat_bucket ref */
296 if (likely(!! rgw_fh
)) { /* return lock and ref from
297 * lookup_fh (or caller in the
299 * RGWFileHandle::FLAG_UNLINK_THIS) */
300 rgw_fh
->mtx
.unlock();
305 /* delete object w/key "<bucket>/" (uxattrs), if any */
307 RGWDeleteObjRequest
req(cct
, get_user(), bkt_fh
->bucket_name(), oname
);
308 rc
= rgwlib
.get_fe()->execute_req(&req
);
309 /* don't care if ENOENT */
314 RGWDeleteBucketRequest
req(cct
, get_user(), bname
);
315 rc
= rgwlib
.get_fe()->execute_req(&req
);
324 /* XXX for now, peform a hard lookup to deduce the type of
325 * object to be deleted ("foo" vs. "foo/")--also, ensures
326 * atomicity at this endpoint */
327 struct rgw_file_handle
*fh
;
328 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &fh
,
329 RGW_LOOKUP_FLAG_NONE
);
334 rgw_fh
= get_rgwfh(fh
);
335 rgw_fh
->mtx
.lock(); /* LOCKED */
338 std::string oname
= rgw_fh
->relative_object_name();
339 if (rgw_fh
->is_dir()) {
340 /* for the duration of our cache timer, trust positive
342 if (rgw_fh
->has_children()) {
343 rgw_fh
->mtx
.unlock();
349 RGWDeleteObjRequest
req(cct
, get_user(), parent
->bucket_name(),
351 rc
= rgwlib
.get_fe()->execute_req(&req
);
357 /* ENOENT when raced with other s3 gateway */
358 if (! rc
|| rc
== -ENOENT
) {
359 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
360 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
361 RGWFileHandle::FHCache::FLAG_LOCK
);
365 real_time t
= real_clock::now();
366 parent
->set_mtime(real_clock::to_timespec(t
));
367 parent
->set_ctime(real_clock::to_timespec(t
));
370 rgw_fh
->mtx
.unlock();
374 } /* RGWLibFS::unlink */
376 int RGWLibFS::rename(RGWFileHandle
* src_fh
, RGWFileHandle
* dst_fh
,
377 const char *_src_name
, const char *_dst_name
)
380 /* XXX initial implementation: try-copy, and delete if copy
386 std::string src_name
{_src_name
};
387 std::string dst_name
{_dst_name
};
390 LookupFHResult fhr
= lookup_fh(src_fh
, _src_name
, RGWFileHandle::FLAG_LOCK
);
391 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
393 /* should not happen */
395 ldout(get_context(), 0) << __func__
396 << " BUG no such src renaming path="
402 /* forbid renaming of directories (unreasonable at scale) */
403 if (rgw_fh
->is_dir()) {
404 ldout(get_context(), 12) << __func__
405 << " rejecting attempt to rename directory path="
406 << rgw_fh
->full_object_name()
412 /* forbid renaming open files (violates intent, for now) */
413 if (rgw_fh
->is_open()) {
414 ldout(get_context(), 12) << __func__
415 << " rejecting attempt to rename open file path="
416 << rgw_fh
->full_object_name()
422 t
= real_clock::now();
424 for (int ix
: {0, 1}) {
428 RGWCopyObjRequest
req(cct
, get_user(), src_fh
, dst_fh
, src_name
,
430 int rc
= rgwlib
.get_fe()->execute_req(&req
);
432 ((rc
= req
.get_ret()) != 0)) {
433 ldout(get_context(), 1)
435 << " rename step 0 failed src="
436 << src_fh
->full_object_name() << " " << src_name
437 << " dst=" << dst_fh
->full_object_name()
443 ldout(get_context(), 12)
445 << " rename step 0 success src="
446 << src_fh
->full_object_name() << " " << src_name
447 << " dst=" << dst_fh
->full_object_name()
451 /* update dst change id */
452 dst_fh
->set_times(t
);
457 rc
= this->unlink(rgw_fh
/* LOCKED */, _src_name
,
458 RGWFileHandle::FLAG_UNLINK_THIS
);
461 ldout(get_context(), 12)
463 << " rename step 1 success src="
464 << src_fh
->full_object_name() << " " << src_name
465 << " dst=" << dst_fh
->full_object_name()
469 /* update src change id */
470 src_fh
->set_times(t
);
472 ldout(get_context(), 1)
474 << " rename step 1 failed src="
475 << src_fh
->full_object_name() << " " << src_name
476 << " dst=" << dst_fh
->full_object_name()
488 rgw_fh
->mtx
.unlock(); /* !LOCKED */
489 unref(rgw_fh
); /* -ref */
493 } /* RGWLibFS::rename */
495 MkObjResult
RGWLibFS::mkdir(RGWFileHandle
* parent
, const char *name
,
496 struct stat
*st
, uint32_t mask
, uint32_t flags
)
499 rgw_file_handle
*lfh
;
501 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
502 RGW_LOOKUP_FLAG_NONE
);
505 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
506 return MkObjResult
{nullptr, -EEXIST
};
509 MkObjResult mkr
{nullptr, -EINVAL
};
511 RGWFileHandle
* rgw_fh
= nullptr;
512 buffer::list ux_key
, ux_attrs
;
514 fhr
= lookup_fh(parent
, name
,
515 RGWFileHandle::FLAG_CREATE
|
516 RGWFileHandle::FLAG_DIRECTORY
|
517 RGWFileHandle::FLAG_LOCK
);
518 rgw_fh
= get
<0>(fhr
);
520 rgw_fh
->create_stat(st
, mask
);
521 rgw_fh
->set_times(real_clock::now());
523 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
526 get
<0>(mkr
) = rgw_fh
;
532 if (parent
->is_root()) {
535 /* enforce S3 name restrictions */
536 rc
= valid_fs_bucket_name(bname
);
538 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
539 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
540 RGWFileHandle::FHCache::FLAG_LOCK
);
541 rgw_fh
->mtx
.unlock();
543 get
<0>(mkr
) = nullptr;
548 RGWCreateBucketRequest
req(get_context(), get_user(), bname
);
551 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
552 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
554 rc
= rgwlib
.get_fe()->execute_req(&req
);
557 /* create an object representing the directory */
559 string dir_name
= parent
->format_child_name(name
, true);
561 /* need valid S3 name (characters, length <= 1024, etc) */
562 rc
= valid_fs_object_name(dir_name
);
564 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
565 fh_cache
.remove(rgw_fh
->fh
.fh_hk
.object
, rgw_fh
,
566 RGWFileHandle::FHCache::FLAG_LOCK
);
567 rgw_fh
->mtx
.unlock();
569 get
<0>(mkr
) = nullptr;
574 RGWPutObjRequest
req(get_context(), get_user(), parent
->bucket_name(),
578 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
579 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
581 rc
= rgwlib
.get_fe()->execute_req(&req
);
588 rgw_fh
->flags
|= RGWFileHandle::FLAG_DELETED
;
589 rgw_fh
->mtx
.unlock(); /* !LOCKED */
591 get
<0>(mkr
) = nullptr;
596 real_time t
= real_clock::now();
597 parent
->set_mtime(real_clock::to_timespec(t
));
598 parent
->set_ctime(real_clock::to_timespec(t
));
599 rgw_fh
->mtx
.unlock(); /* !LOCKED */
605 } /* RGWLibFS::mkdir */
607 MkObjResult
RGWLibFS::create(RGWFileHandle
* parent
, const char *name
,
608 struct stat
*st
, uint32_t mask
, uint32_t flags
)
614 rgw_file_handle
*lfh
;
615 rc
= rgw_lookup(get_fs(), parent
->get_fh(), name
, &lfh
,
616 RGW_LOOKUP_FLAG_NONE
);
619 rc
= rgw_fh_rele(get_fs(), lfh
, RGW_FH_RELE_FLAG_NONE
);
620 return MkObjResult
{nullptr, -EEXIST
};
623 /* expand and check name */
624 std::string obj_name
= parent
->format_child_name(name
, false);
625 rc
= valid_fs_object_name(obj_name
);
627 return MkObjResult
{nullptr, rc
};
632 RGWPutObjRequest
req(cct
, get_user(), parent
->bucket_name(), obj_name
, bl
);
633 MkObjResult mkr
{nullptr, -EINVAL
};
635 rc
= rgwlib
.get_fe()->execute_req(&req
);
641 LookupFHResult fhr
= lookup_fh(parent
, name
, RGWFileHandle::FLAG_CREATE
|
642 RGWFileHandle::FLAG_LOCK
);
643 RGWFileHandle
* rgw_fh
= get
<0>(fhr
);
645 if (get
<1>(fhr
) & RGWFileHandle::FLAG_CREATE
) {
646 /* fill in stat data */
647 real_time t
= real_clock::now();
648 rgw_fh
->create_stat(st
, mask
);
649 rgw_fh
->set_times(t
);
651 parent
->set_mtime(real_clock::to_timespec(t
));
652 parent
->set_ctime(real_clock::to_timespec(t
));
655 (void) rgw_fh
->stat(st
);
656 get
<0>(mkr
) = rgw_fh
;
657 rgw_fh
->mtx
.unlock();
665 } /* RGWLibFS::create */
667 int RGWLibFS::getattr(RGWFileHandle
* rgw_fh
, struct stat
* st
)
669 switch(rgw_fh
->fh
.fh_type
) {
670 case RGW_FS_TYPE_FILE
:
672 if (rgw_fh
->deleted())
680 return rgw_fh
->stat(st
);
681 } /* RGWLibFS::getattr */
683 int RGWLibFS::setattr(RGWFileHandle
* rgw_fh
, struct stat
* st
, uint32_t mask
,
687 buffer::list ux_key
, ux_attrs
;
689 lock_guard
guard(rgw_fh
->mtx
);
691 switch(rgw_fh
->fh
.fh_type
) {
692 case RGW_FS_TYPE_FILE
:
694 if (rgw_fh
->deleted())
702 string obj_name
{rgw_fh
->relative_object_name()};
704 if (rgw_fh
->is_dir() &&
705 (likely(! rgw_fh
->is_bucket()))) {
709 RGWSetAttrsRequest
req(cct
, get_user(), rgw_fh
->bucket_name(), obj_name
);
711 rgw_fh
->create_stat(st
, mask
);
712 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
715 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
716 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
718 rc
= rgwlib
.get_fe()->execute_req(&req
);
722 /* special case: materialize placeholder dir */
724 RGWPutObjRequest
req(get_context(), get_user(), rgw_fh
->bucket_name(),
727 rgw_fh
->encode_attrs(ux_key
, ux_attrs
); /* because std::moved */
730 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
731 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
733 rc
= rgwlib
.get_fe()->execute_req(&req
);
737 if ((rc
!= 0) || (rc2
!= 0)) {
741 rgw_fh
->set_ctime(real_clock::to_timespec(real_clock::now()));
744 } /* RGWLibFS::setattr */
746 /* called under rgw_fh->mtx held */
747 void RGWLibFS::update_fh(RGWFileHandle
*rgw_fh
)
750 string obj_name
{rgw_fh
->relative_object_name()};
751 buffer::list ux_key
, ux_attrs
;
753 if (rgw_fh
->is_dir() &&
754 (likely(! rgw_fh
->is_bucket()))) {
758 lsubdout(get_context(), rgw
, 17)
760 << " update old versioned fh : " << obj_name
763 RGWSetAttrsRequest
req(cct
, get_user(), rgw_fh
->bucket_name(), obj_name
);
765 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
767 req
.emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
768 req
.emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
770 rc
= rgwlib
.get_fe()->execute_req(&req
);
773 if ((rc
!= 0) || (rc2
!= 0)) {
774 lsubdout(get_context(), rgw
, 17)
776 << " update fh failed : " << obj_name
779 } /* RGWLibFS::update_fh */
781 void RGWLibFS::close()
783 state
.flags
|= FLAG_CLOSED
;
789 ObjUnref(RGWLibFS
* _fs
) : fs(_fs
) {}
790 void operator()(RGWFileHandle
* fh
) const {
791 lsubdout(fs
->get_context(), rgw
, 5)
794 << " before ObjUnref refs=" << fh
->get_refcnt()
800 /* force cache drain, forces objects to evict */
801 fh_cache
.drain(ObjUnref(this),
802 RGWFileHandle::FHCache::FLAG_LOCK
);
803 rgwlib
.get_fe()->get_process()->unregister_fs(this);
805 } /* RGWLibFS::close */
807 inline std::ostream
& operator<<(std::ostream
&os
, struct timespec
const &ts
) {
808 os
<< "<timespec: tv_sec=";
816 std::ostream
& operator<<(std::ostream
&os
, RGWLibFS::event
const &ev
) {
819 case RGWLibFS::event::type::READDIR
:
820 os
<< "type=READDIR;";
823 os
<< "type=UNKNOWN;";
826 os
<< "fid=" << ev
.fhk
.fh_hk
.bucket
<< ":" << ev
.fhk
.fh_hk
.object
827 << ";ts=" << ev
.ts
<< ">";
834 using directory
= RGWFileHandle::directory
;
836 /* dirent invalidate timeout--basically, the upper-bound on
837 * inconsistency with the S3 namespace */
839 = get_context()->_conf
->rgw_nfs_namespace_expire_secs
;
841 /* max events to gc in one cycle */
842 uint32_t max_ev
= get_context()->_conf
->rgw_nfs_max_gc
;
844 struct timespec now
, expire_ts
;
847 std::deque
<event
> &events
= state
.events
;
850 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
);
851 lsubdout(get_context(), rgw
, 15)
852 << "GC: top of expire loop"
854 << " expire_s=" << expire_s
857 lock_guard
guard(state
.mtx
); /* LOCKED */
858 /* just return if no events */
859 if (events
.empty()) {
863 (events
.size() < 500) ? max_ev
: (events
.size() / 4);
864 for (uint32_t ix
= 0; (ix
< _max_ev
) && (events
.size() > 0); ++ix
) {
865 event
& ev
= events
.front();
867 expire_ts
.tv_sec
+= expire_s
;
868 if (expire_ts
> now
) {
877 for (auto& ev
: ve
) {
878 lsubdout(get_context(), rgw
, 15)
879 << "try-expire ev: " << ev
<< dendl
;
880 if (likely(ev
.t
== event::type::READDIR
)) {
881 RGWFileHandle
* rgw_fh
= lookup_handle(ev
.fhk
.fh_hk
);
882 lsubdout(get_context(), rgw
, 15)
883 << "ev rgw_fh: " << rgw_fh
<< dendl
;
885 RGWFileHandle::directory
* d
;
886 if (unlikely(! rgw_fh
->is_dir())) {
887 lsubdout(get_context(), rgw
, 0)
889 << " BUG non-directory found with READDIR event "
890 << "(" << rgw_fh
->bucket_name() << ","
891 << rgw_fh
->object_name() << ")"
895 /* maybe clear state */
896 d
= get
<directory
>(&rgw_fh
->variant_type
);
898 struct timespec ev_ts
= ev
.ts
;
899 lock_guard
guard(rgw_fh
->mtx
);
900 struct timespec d_last_readdir
= d
->last_readdir
;
901 if (unlikely(ev_ts
< d_last_readdir
)) {
902 /* readdir cycle in progress, don't invalidate */
903 lsubdout(get_context(), rgw
, 15)
904 << "GC: delay expiration for "
905 << rgw_fh
->object_name()
906 << " ev.ts=" << ev_ts
907 << " last_readdir=" << d_last_readdir
911 lsubdout(get_context(), rgw
, 15)
913 << rgw_fh
->object_name()
915 rgw_fh
->clear_state();
916 rgw_fh
->invalidate();
922 } /* event::type::READDIR */
925 } while (! (stop
|| shutdown
));
928 std::ostream
& operator<<(std::ostream
&os
,
929 RGWFileHandle
const &rgw_fh
)
931 const auto& fhk
= rgw_fh
.get_key();
932 const auto& fh
= const_cast<RGWFileHandle
&>(rgw_fh
).get_fh();
933 os
<< "<RGWFileHandle:";
934 os
<< "addr=" << &rgw_fh
<< ";";
935 switch (fh
->fh_type
) {
936 case RGW_FS_TYPE_DIRECTORY
:
937 os
<< "type=DIRECTORY;";
939 case RGW_FS_TYPE_FILE
:
943 os
<< "type=UNKNOWN;";
946 os
<< "fid=" << fhk
.fh_hk
.bucket
<< ":" << fhk
.fh_hk
.object
<< ";";
947 os
<< "name=" << rgw_fh
.object_name() << ";";
948 os
<< "refcnt=" << rgw_fh
.get_refcnt() << ";";
953 RGWFileHandle::~RGWFileHandle() {
954 /* !recycle case, handle may STILL be in handle table, BUT
955 * the partition lock is not held in this path */
956 if (fh_hook
.is_linked()) {
957 fs
->fh_cache
.remove(fh
.fh_hk
.object
, this, FHCache::FLAG_LOCK
);
959 /* cond-unref parent */
960 if (parent
&& (! parent
->is_mount())) {
961 /* safe because if parent->unref causes its deletion,
962 * there are a) by refcnt, no other objects/paths pointing
963 * to it and b) by the semantics of valid iteration of
964 * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
965 * no unsafe iterators reaching it either--n.b., this constraint
966 * is binding oncode which may in future attempt to e.g.,
967 * cause the eviction of objects in LRU order */
968 (void) get_fs()->unref(parent
);
972 void RGWFileHandle::encode_attrs(ceph::buffer::list
& ux_key1
,
973 ceph::buffer::list
& ux_attrs1
)
975 fh_key
fhk(this->fh
.fh_hk
);
976 rgw::encode(fhk
, ux_key1
);
977 rgw::encode(*this, ux_attrs1
);
978 } /* RGWFileHandle::encode_attrs */
980 DecodeAttrsResult
RGWFileHandle::decode_attrs(const ceph::buffer::list
* ux_key1
,
981 const ceph::buffer::list
* ux_attrs1
)
983 DecodeAttrsResult dar
{ false, false };
985 auto bl_iter_key1
= const_cast<buffer::list
*>(ux_key1
)->begin();
986 rgw::decode(fhk
, bl_iter_key1
);
987 if (fhk
.version
>= 2) {
988 assert(this->fh
.fh_hk
== fhk
.fh_hk
);
993 auto bl_iter_unix1
= const_cast<buffer::list
*>(ux_attrs1
)->begin();
994 rgw::decode(*this, bl_iter_unix1
);
995 if (this->state
.version
< 2) {
1000 } /* RGWFileHandle::decode_attrs */
1002 bool RGWFileHandle::reclaim() {
1003 lsubdout(fs
->get_context(), rgw
, 17)
1004 << __func__
<< " " << *this
1006 /* in the non-delete case, handle may still be in handle table */
1007 if (fh_hook
.is_linked()) {
1008 /* in this case, we are being called from a context which holds
1009 * the partition lock */
1010 fs
->fh_cache
.remove(fh
.fh_hk
.object
, this, FHCache::FLAG_NONE
);
1013 } /* RGWFileHandle::reclaim */
1015 bool RGWFileHandle::has_children() const
1017 if (unlikely(! is_dir()))
1020 RGWRMdirCheck
req(fs
->get_context(), fs
->get_user(), this);
1021 int rc
= rgwlib
.get_fe()->execute_req(&req
);
1023 return req
.valid
&& req
.has_children
;
1029 std::ostream
& operator<<(std::ostream
&os
,
1030 RGWFileHandle::readdir_offset
const &offset
)
1033 if (unlikely(!! get
<uint64_t*>(&offset
))) {
1034 uint64_t* ioff
= get
<uint64_t*>(offset
);
1038 os
<< get
<const char*>(offset
);
1042 int RGWFileHandle::readdir(rgw_readdir_cb rcb
, void *cb_arg
,
1043 readdir_offset offset
,
1044 bool *eof
, uint32_t flags
)
1046 using event
= RGWLibFS::event
;
1049 struct timespec now
;
1050 CephContext
* cct
= fs
->get_context();
1052 directory
* d
= get
<directory
>(&variant_type
);
1054 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1055 lock_guard
guard(mtx
);
1056 d
->last_readdir
= now
;
1060 if (likely(!! get
<const char*>(&offset
))) {
1061 initial_off
= ! get
<const char*>(offset
);
1063 initial_off
= (*get
<uint64_t*>(offset
) == 0);
1067 RGWListBucketsRequest
req(cct
, fs
->get_user(), this, rcb
, cb_arg
,
1069 rc
= rgwlib
.get_fe()->execute_req(&req
);
1071 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1072 lock_guard
guard(mtx
);
1076 inc_nlink(req
.d_count
);
1078 event
ev(event::type::READDIR
, get_key(), state
.atime
);
1079 lock_guard
sguard(fs
->state
.mtx
);
1080 fs
->state
.push_event(ev
);
1083 RGWReaddirRequest
req(cct
, fs
->get_user(), this, rcb
, cb_arg
, offset
);
1084 rc
= rgwlib
.get_fe()->execute_req(&req
);
1086 (void) clock_gettime(CLOCK_MONOTONIC_COARSE
, &now
); /* !LOCKED */
1087 lock_guard
guard(mtx
);
1091 inc_nlink(req
.d_count
);
1093 event
ev(event::type::READDIR
, get_key(), state
.atime
);
1094 lock_guard
sguard(fs
->state
.mtx
);
1095 fs
->state
.push_event(ev
);
1099 lsubdout(fs
->get_context(), rgw
, 15)
1101 << " final link count=" << state
.nlink
1105 } /* RGWFileHandle::readdir */
1107 int RGWFileHandle::write(uint64_t off
, size_t len
, size_t *bytes_written
,
1111 using WriteCompletion
= RGWLibFS::WriteCompletion
;
1113 lock_guard
guard(mtx
);
1117 file
* f
= get
<file
>(&variant_type
);
1122 lsubdout(fs
->get_context(), rgw
, 5)
1124 << " write attempted on deleted object "
1125 << this->object_name()
1127 /* zap write transaction, if any */
1129 delete f
->write_req
;
1130 f
->write_req
= nullptr;
1135 if (! f
->write_req
) {
1136 /* guard--we do not support (e.g., COW-backed) partial writes */
1138 lsubdout(fs
->get_context(), rgw
, 5)
1140 << " " << object_name()
1141 << " non-0 initial write position " << off
1147 std::string object_name
= relative_object_name();
1149 new RGWWriteRequest(fs
->get_context(), fs
->get_user(), this,
1150 bucket_name(), object_name
);
1151 rc
= rgwlib
.get_fe()->start_req(f
->write_req
);
1153 lsubdout(fs
->get_context(), rgw
, 5)
1155 << this->object_name()
1156 << " write start failed " << off
1157 << " (" << rc
<< ")"
1159 /* zap failed write transaction */
1160 delete f
->write_req
;
1161 f
->write_req
= nullptr;
1164 if (stateless_open()) {
1165 /* start write timer */
1166 f
->write_req
->timer_id
=
1167 RGWLibFS::write_timer
.add_event(
1168 std::chrono::seconds(RGWLibFS::write_completion_interval_s
),
1169 WriteCompletion(*this));
1175 if ((static_cast<off_t
>(off
) < f
->write_req
->real_ofs
) &&
1176 ((f
->write_req
->real_ofs
- off
) <= len
)) {
1177 overlap
= f
->write_req
->real_ofs
- off
;
1178 off
= f
->write_req
->real_ofs
;
1179 buffer
= static_cast<char*>(buffer
) + overlap
;
1187 buffer::create_static(len
, static_cast<char*>(buffer
)));
1190 buffer::copy(static_cast<char*>(buffer
), len
));
1193 f
->write_req
->put_data(off
, bl
);
1194 rc
= f
->write_req
->exec_continue();
1197 size_t min_size
= off
+ len
;
1198 if (min_size
> get_size())
1200 if (stateless_open()) {
1201 /* bump write timer */
1202 RGWLibFS::write_timer
.adjust_event(
1203 f
->write_req
->timer_id
, std::chrono::seconds(10));
1206 /* continuation failed (e.g., non-contiguous write position) */
1207 lsubdout(fs
->get_context(), rgw
, 5)
1210 << " failed write at position " << off
1211 << " (fails write transaction) "
1213 /* zap failed write transaction */
1214 delete f
->write_req
;
1215 f
->write_req
= nullptr;
1219 *bytes_written
= (rc
== 0) ? (len
+ overlap
) : 0;
1221 } /* RGWFileHandle::write */
1223 int RGWFileHandle::write_finish(uint32_t flags
)
1225 unique_lock guard
{mtx
, std::defer_lock
};
1228 if (! (flags
& FLAG_LOCKED
)) {
1232 file
* f
= get
<file
>(&variant_type
);
1233 if (f
&& (f
->write_req
)) {
1234 lsubdout(fs
->get_context(), rgw
, 10)
1236 << " finishing write trans on " << object_name()
1238 rc
= rgwlib
.get_fe()->finish_req(f
->write_req
);
1240 rc
= f
->write_req
->get_ret();
1242 delete f
->write_req
;
1243 f
->write_req
= nullptr;
1247 } /* RGWFileHandle::write_finish */
1249 int RGWFileHandle::close()
1251 lock_guard
guard(mtx
);
1253 int rc
= write_finish(FLAG_LOCKED
);
1255 flags
&= ~FLAG_OPEN
;
1256 flags
&= ~FLAG_STATELESS_OPEN
;
1259 } /* RGWFileHandle::close */
1261 RGWFileHandle::file::~file()
1266 void RGWFileHandle::clear_state()
1268 directory
* d
= get
<directory
>(&variant_type
);
1271 d
->last_marker
= rgw_obj_key
{};
1275 void RGWFileHandle::invalidate() {
1276 RGWLibFS
*fs
= get_fs();
1277 if (fs
->invalidate_cb
) {
1278 fs
->invalidate_cb(fs
->invalidate_arg
, get_key().fh_hk
);
1282 int RGWWriteRequest::exec_start() {
1283 struct req_state
* s
= get_state();
1285 auto compression_type
=
1286 get_store()->get_zone_params().get_compression_type(
1287 s
->bucket_info
.placement_rule
);
1289 /* not obviously supportable */
1290 assert(! dlo_manifest
);
1293 perfcounter
->inc(l_rgw_put
);
1296 if (s
->object
.empty()) {
1297 ldout(s
->cct
, 0) << __func__
<< " called on empty object" << dendl
;
1301 op_ret
= get_params();
1305 op_ret
= get_system_versioning_params(s
, &olh_epoch
, &version_id
);
1310 /* user-supplied MD5 check skipped (not supplied) */
1311 /* early quota check skipped--we don't have size yet */
1312 /* skipping user-supplied etag--we might have one in future, but
1313 * like data it and other attrs would arrive after open */
1314 processor
= select_processor(*static_cast<RGWObjectCtx
*>(s
->obj_ctx
),
1316 op_ret
= processor
->prepare(get_store(), NULL
);
1318 ldout(s
->cct
, 20) << "processor->prepare() returned ret=" << op_ret
1324 if (compression_type
!= "none") {
1325 plugin
= Compressor::create(s
->cct
, compression_type
);
1327 ldout(s
->cct
, 1) << "Cannot load plugin for rgw_compression_type "
1328 << compression_type
<< dendl
;
1330 compressor
.emplace(s
->cct
, plugin
, filter
);
1331 filter
= &*compressor
;
1339 int RGWWriteRequest::exec_continue()
1341 struct req_state
* s
= get_state();
1344 /* check guards (e.g., contig write) */
1348 size_t len
= data
.length();
1352 /* XXX we are currently synchronous--supplied data buffers cannot
1353 * be used after the caller returns */
1354 bool need_to_wait
= true;
1355 bufferlist orig_data
;
1360 hash
.Update((const byte
*)data
.c_str(), data
.length());
1361 op_ret
= put_data_and_throttle(filter
, data
, ofs
, need_to_wait
);
1363 if (!need_to_wait
|| op_ret
!= -EEXIST
) {
1364 ldout(s
->cct
, 20) << "processor->thottle_data() returned ret="
1369 ldout(s
->cct
, 5) << "NOTICE: processor->throttle_data() returned -EEXIST, need to restart write" << dendl
;
1371 /* restore original data */
1372 data
.swap(orig_data
);
1374 /* restart processing with different oid suffix */
1375 dispose_processor(processor
);
1376 processor
= select_processor(*static_cast<RGWObjectCtx
*>(s
->obj_ctx
),
1382 gen_rand_alphanumeric(get_store()->ctx(), buf
, sizeof(buf
) - 1);
1383 oid_rand
.append(buf
);
1385 op_ret
= processor
->prepare(get_store(), &oid_rand
);
1387 ldout(s
->cct
, 0) << "ERROR: processor->prepare() returned "
1392 /* restore compression filter, if any */
1394 compressor
.emplace(s
->cct
, plugin
, filter
);
1395 filter
= &*compressor
;
1398 op_ret
= put_data_and_throttle(filter
, data
, ofs
, false);
1403 bytes_written
+= len
;
1407 } /* exec_continue */
1409 int RGWWriteRequest::exec_finish()
1411 buffer::list bl
, aclbl
, ux_key
, ux_attrs
;
1412 map
<string
, string
>::iterator iter
;
1413 char calc_md5
[CEPH_CRYPTO_MD5_DIGESTSIZE
* 2 + 1];
1414 unsigned char m
[CEPH_CRYPTO_MD5_DIGESTSIZE
];
1415 struct req_state
* s
= get_state();
1417 size_t osize
= rgw_fh
->get_size();
1418 struct timespec octime
= rgw_fh
->get_ctime();
1419 struct timespec omtime
= rgw_fh
->get_mtime();
1420 real_time appx_t
= real_clock::now();
1422 s
->obj_size
= bytes_written
;
1423 perfcounter
->inc(l_rgw_put_b
, s
->obj_size
);
1425 op_ret
= get_store()->check_quota(s
->bucket_owner
.get_id(), s
->bucket
,
1426 user_quota
, bucket_quota
, s
->obj_size
);
1431 op_ret
= get_store()->check_bucket_shards(s
->bucket_info
, s
->bucket
,
1439 if (compressor
&& compressor
->is_compressed()) {
1441 RGWCompressionInfo cs_info
;
1442 cs_info
.compression_type
= plugin
->get_type_name();
1443 cs_info
.orig_size
= s
->obj_size
;
1444 cs_info
.blocks
= std::move(compressor
->get_compression_blocks());
1445 ::encode(cs_info
, tmp
);
1446 attrs
[RGW_ATTR_COMPRESSION
] = tmp
;
1447 ldout(s
->cct
, 20) << "storing " << RGW_ATTR_COMPRESSION
1448 << " with type=" << cs_info
.compression_type
1449 << ", orig_size=" << cs_info
.orig_size
1450 << ", blocks=" << cs_info
.blocks
.size() << dendl
;
1453 buf_to_hex(m
, CEPH_CRYPTO_MD5_DIGESTSIZE
, calc_md5
);
1456 bl
.append(etag
.c_str(), etag
.size() + 1);
1457 emplace_attr(RGW_ATTR_ETAG
, std::move(bl
));
1459 policy
.encode(aclbl
);
1460 emplace_attr(RGW_ATTR_ACL
, std::move(aclbl
));
1463 rgw_fh
->set_mtime(real_clock::to_timespec(appx_t
));
1464 rgw_fh
->set_ctime(real_clock::to_timespec(appx_t
));
1465 rgw_fh
->set_size(bytes_written
);
1466 rgw_fh
->encode_attrs(ux_key
, ux_attrs
);
1468 emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
1469 emplace_attr(RGW_ATTR_UNIX1
, std::move(ux_attrs
));
1471 for (iter
= s
->generic_attrs
.begin(); iter
!= s
->generic_attrs
.end();
1473 buffer::list
& attrbl
= attrs
[iter
->first
];
1474 const string
& val
= iter
->second
;
1475 attrbl
.append(val
.c_str(), val
.size() + 1);
1478 op_ret
= rgw_get_request_metadata(s
->cct
, s
->info
, attrs
);
1482 encode_delete_at_attr(delete_at
, attrs
);
1484 /* Add a custom metadata to expose the information whether an object
1485 * is an SLO or not. Appending the attribute must be performed AFTER
1486 * processing any input from user in order to prohibit overwriting. */
1487 if (unlikely(!! slo_info
)) {
1488 buffer::list slo_userindicator_bl
;
1489 ::encode("True", slo_userindicator_bl
);
1490 emplace_attr(RGW_ATTR_SLO_UINDICATOR
, std::move(slo_userindicator_bl
));
1493 op_ret
= processor
->complete(s
->obj_size
, etag
, &mtime
, real_time(), attrs
,
1494 (delete_at
? *delete_at
: real_time()),
1495 if_match
, if_nomatch
);
1497 /* revert attr updates */
1498 rgw_fh
->set_mtime(omtime
);
1499 rgw_fh
->set_ctime(octime
);
1500 rgw_fh
->set_size(osize
);
1504 dispose_processor(processor
);
1505 perfcounter
->tinc(l_rgw_put_lat
,
1506 (ceph_clock_now() - s
->time
));
1510 } /* namespace rgw */
1515 void rgwfile_version(int *major
, int *minor
, int *extra
)
1518 *major
= LIBRGW_FILE_VER_MAJOR
;
1520 *minor
= LIBRGW_FILE_VER_MINOR
;
1522 *extra
= LIBRGW_FILE_VER_EXTRA
;
1526 attach rgw namespace
1528 int rgw_mount(librgw_t rgw
, const char *uid
, const char *acc_key
,
1529 const char *sec_key
, struct rgw_fs
**rgw_fs
,
1534 /* stash access data for "mount" */
1535 RGWLibFS
* new_fs
= new RGWLibFS(static_cast<CephContext
*>(rgw
), uid
, acc_key
,
1539 rc
= new_fs
->authorize(rgwlib
.get_store());
1545 /* register fs for shared gc */
1546 rgwlib
.get_fe()->get_process()->register_fs(new_fs
);
1548 struct rgw_fs
*fs
= new_fs
->get_fs();
1551 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1559 int rgw_mount2(librgw_t rgw
, const char *uid
, const char *acc_key
,
1560 const char *sec_key
, const char *root
, struct rgw_fs
**rgw_fs
,
1565 /* stash access data for "mount" */
1566 RGWLibFS
* new_fs
= new RGWLibFS(static_cast<CephContext
*>(rgw
), uid
, acc_key
,
1570 rc
= new_fs
->authorize(rgwlib
.get_store());
1576 /* register fs for shared gc */
1577 rgwlib
.get_fe()->get_process()->register_fs(new_fs
);
1579 struct rgw_fs
*fs
= new_fs
->get_fs();
1582 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1591 register invalidate callbacks
1593 int rgw_register_invalidate(struct rgw_fs
*rgw_fs
, rgw_fh_callback_t cb
,
1594 void *arg
, uint32_t flags
)
1597 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1598 return fs
->register_invalidate(cb
, arg
, flags
);
1602 detach rgw namespace
1604 int rgw_umount(struct rgw_fs
*rgw_fs
, uint32_t flags
)
1606 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1612 get filesystem attributes
1614 int rgw_statfs(struct rgw_fs
*rgw_fs
,
1615 struct rgw_file_handle
*parent_fh
,
1616 struct rgw_statvfs
*vfs_st
, uint32_t flags
)
1618 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1619 struct rados_cluster_stat_t stats
;
1621 RGWGetClusterStatReq
req(fs
->get_context(), fs
->get_user(), stats
);
1622 int rc
= rgwlib
.get_fe()->execute_req(&req
);
1624 lderr(fs
->get_context()) << "ERROR: getting total cluster usage"
1625 << cpp_strerror(-rc
) << dendl
;
1629 //Set block size to 1M.
1630 constexpr uint32_t CEPH_BLOCK_SHIFT
= 20;
1631 vfs_st
->f_bsize
= 1 << CEPH_BLOCK_SHIFT
;
1632 vfs_st
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
1633 vfs_st
->f_blocks
= stats
.kb
>> (CEPH_BLOCK_SHIFT
- 10);
1634 vfs_st
->f_bfree
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
1635 vfs_st
->f_bavail
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
1636 vfs_st
->f_files
= stats
.num_objects
;
1637 vfs_st
->f_ffree
= -1;
1638 vfs_st
->f_fsid
[0] = fs
->get_fsid();
1639 vfs_st
->f_fsid
[1] = fs
->get_fsid();
1641 vfs_st
->f_namemax
= 4096;
1646 generic create -- create an empty regular file
1648 int rgw_create(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
1649 const char *name
, struct stat
*st
, uint32_t mask
,
1650 struct rgw_file_handle
**fh
, uint32_t posix_flags
,
1655 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1656 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1659 (parent
->is_root()) ||
1660 (parent
->is_file())) {
1665 MkObjResult fhr
= fs
->create(parent
, name
, st
, mask
, flags
);
1666 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
1669 *fh
= nfh
->get_fh();
1675 create a new directory
1677 int rgw_mkdir(struct rgw_fs
*rgw_fs
,
1678 struct rgw_file_handle
*parent_fh
,
1679 const char *name
, struct stat
*st
, uint32_t mask
,
1680 struct rgw_file_handle
**fh
, uint32_t flags
)
1684 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1685 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1692 MkObjResult fhr
= fs
->mkdir(parent
, name
, st
, mask
, flags
);
1693 RGWFileHandle
*nfh
= get
<0>(fhr
); // nullptr if !success
1696 *fh
= nfh
->get_fh();
1704 int rgw_rename(struct rgw_fs
*rgw_fs
,
1705 struct rgw_file_handle
*src
, const char* src_name
,
1706 struct rgw_file_handle
*dst
, const char* dst_name
,
1709 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1711 RGWFileHandle
* src_fh
= get_rgwfh(src
);
1712 RGWFileHandle
* dst_fh
= get_rgwfh(dst
);
1714 return fs
->rename(src_fh
, dst_fh
, src_name
, dst_name
);
1718 remove file or directory
1720 int rgw_unlink(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*parent_fh
,
1721 const char *name
, uint32_t flags
)
1723 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1724 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1726 return fs
->unlink(parent
, name
);
1730 lookup object by name (POSIX style)
1732 int rgw_lookup(struct rgw_fs
*rgw_fs
,
1733 struct rgw_file_handle
*parent_fh
, const char* path
,
1734 struct rgw_file_handle
**fh
, uint32_t flags
)
1736 //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
1737 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1739 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1741 (! parent
->is_dir())) {
1746 RGWFileHandle
* rgw_fh
;
1749 if (parent
->is_root()) {
1750 /* special: parent lookup--note lack of ref()! */
1751 if (unlikely((strcmp(path
, "..") == 0) ||
1752 (strcmp(path
, "/") == 0))) {
1755 RGWLibFS::BucketStats bstat
;
1756 fhr
= fs
->stat_bucket(parent
, path
, bstat
, RGWFileHandle::FLAG_NONE
);
1757 rgw_fh
= get
<0>(fhr
);
1762 /* special: after readdir--note extra ref()! */
1763 if (unlikely((strcmp(path
, "..") == 0))) {
1765 lsubdout(fs
->get_context(), rgw
, 17)
1766 << __func__
<< "BANG"<< *rgw_fh
1770 /* lookup in a readdir callback */
1771 enum rgw_fh_type fh_type
= fh_type_of(flags
);
1773 uint32_t sl_flags
= (flags
& RGW_LOOKUP_FLAG_RCB
)
1774 ? RGWFileHandle::FLAG_NONE
1775 : RGWFileHandle::FLAG_EXACT_MATCH
;
1777 fhr
= fs
->stat_leaf(parent
, path
, fh_type
, sl_flags
);
1778 if (! get
<0>(fhr
)) {
1779 if (! (flags
& RGW_LOOKUP_FLAG_CREATE
))
1782 fhr
= fs
->lookup_fh(parent
, path
, RGWFileHandle::FLAG_CREATE
);
1784 rgw_fh
= get
<0>(fhr
);
1788 struct rgw_file_handle
*rfh
= rgw_fh
->get_fh();
1795 lookup object by handle (NFS style)
1797 int rgw_lookup_handle(struct rgw_fs
*rgw_fs
, struct rgw_fh_hk
*fh_hk
,
1798 struct rgw_file_handle
**fh
, uint32_t flags
)
1800 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1802 RGWFileHandle
* rgw_fh
= fs
->lookup_handle(*fh_hk
);
1808 struct rgw_file_handle
*rfh
= rgw_fh
->get_fh();
1815 * release file handle
1817 int rgw_fh_rele(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
1820 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1821 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
1823 lsubdout(fs
->get_context(), rgw
, 17)
1824 << __func__
<< " " << *rgw_fh
1832 get unix attributes for object
1834 int rgw_getattr(struct rgw_fs
*rgw_fs
,
1835 struct rgw_file_handle
*fh
, struct stat
*st
, uint32_t flags
)
1837 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1838 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
1840 return fs
->getattr(rgw_fh
, st
);
1844 set unix attributes for object
1846 int rgw_setattr(struct rgw_fs
*rgw_fs
,
1847 struct rgw_file_handle
*fh
, struct stat
*st
,
1848 uint32_t mask
, uint32_t flags
)
1850 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1851 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
1853 return fs
->setattr(rgw_fh
, st
, mask
, flags
);
1859 int rgw_truncate(struct rgw_fs
*rgw_fs
,
1860 struct rgw_file_handle
*fh
, uint64_t size
, uint32_t flags
)
1868 int rgw_open(struct rgw_fs
*rgw_fs
,
1869 struct rgw_file_handle
*fh
, uint32_t posix_flags
, uint32_t flags
)
1871 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
1874 * need to track specific opens--at least read opens and
1875 * a write open; we need to know when a write open is returned,
1876 * that closes a write transaction
1878 * for now, we will support single-open only, it's preferable to
1879 * anything we can otherwise do without access to the NFS state
1881 if (! rgw_fh
->is_file())
1884 return rgw_fh
->open(flags
);
1890 int rgw_close(struct rgw_fs
*rgw_fs
,
1891 struct rgw_file_handle
*fh
, uint32_t flags
)
1893 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1894 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
1895 int rc
= rgw_fh
->close(/* XXX */);
1897 if (flags
& RGW_CLOSE_FLAG_RELE
)
1903 int rgw_readdir(struct rgw_fs
*rgw_fs
,
1904 struct rgw_file_handle
*parent_fh
, uint64_t *offset
,
1905 rgw_readdir_cb rcb
, void *cb_arg
, bool *eof
,
1908 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1914 lsubdout(parent
->get_fs()->get_context(), rgw
, 15)
1916 << " offset=" << *offset
1919 if ((*offset
== 0) &&
1920 (flags
& RGW_READDIR_FLAG_DOTDOT
)) {
1921 /* send '.' and '..' with their NFS-defined offsets */
1922 rcb(".", cb_arg
, 1, RGW_LOOKUP_FLAG_DIR
);
1923 rcb("..", cb_arg
, 2, RGW_LOOKUP_FLAG_DIR
);
1926 int rc
= parent
->readdir(rcb
, cb_arg
, offset
, eof
, flags
);
1930 /* enumeration continuing from name */
1931 int rgw_readdir2(struct rgw_fs
*rgw_fs
,
1932 struct rgw_file_handle
*parent_fh
, const char *name
,
1933 rgw_readdir_cb rcb
, void *cb_arg
, bool *eof
,
1936 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1942 lsubdout(parent
->get_fs()->get_context(), rgw
, 15)
1944 << " offset=" << ((name
) ? name
: "(nil)")
1948 (flags
& RGW_READDIR_FLAG_DOTDOT
)) {
1949 /* send '.' and '..' with their NFS-defined offsets */
1950 rcb(".", cb_arg
, 1, RGW_LOOKUP_FLAG_DIR
);
1951 rcb("..", cb_arg
, 2, RGW_LOOKUP_FLAG_DIR
);
1954 int rc
= parent
->readdir(rcb
, cb_arg
, name
, eof
, flags
);
1956 } /* rgw_readdir2 */
1958 /* project offset of dirent name */
1959 int rgw_dirent_offset(struct rgw_fs
*rgw_fs
,
1960 struct rgw_file_handle
*parent_fh
,
1961 const char *name
, int64_t *offset
,
1964 RGWFileHandle
* parent
= get_rgwfh(parent_fh
);
1969 std::string sname
{name
};
1970 int rc
= parent
->offset_of(sname
, offset
, flags
);
1977 int rgw_read(struct rgw_fs
*rgw_fs
,
1978 struct rgw_file_handle
*fh
, uint64_t offset
,
1979 size_t length
, size_t *bytes_read
, void *buffer
,
1982 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
1983 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
1985 return fs
->read(rgw_fh
, offset
, length
, bytes_read
, buffer
, flags
);
1991 int rgw_write(struct rgw_fs
*rgw_fs
,
1992 struct rgw_file_handle
*fh
, uint64_t offset
,
1993 size_t length
, size_t *bytes_written
, void *buffer
,
1996 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2001 if (! rgw_fh
->is_file())
2004 if (! rgw_fh
->is_open()) {
2005 if (flags
& RGW_OPEN_FLAG_V3
) {
2006 rc
= rgw_fh
->open(flags
);
2013 rc
= rgw_fh
->write(offset
, length
, bytes_written
, buffer
);
2019 read data from file (vector)
2024 struct rgw_vio
* vio
;
2027 RGWReadV(buffer::list
& _bl
, rgw_vio
* _vio
) : vio(_vio
) {
2031 struct rgw_vio
* get_vio() { return vio
; }
2033 const std::list
<buffer::ptr
>& buffers() { return bl
.buffers(); }
2035 unsigned /* XXX */ length() { return bl
.length(); }
2039 void rgw_readv_rele(struct rgw_uio
*uio
, uint32_t flags
)
2041 RGWReadV
* rdv
= static_cast<RGWReadV
*>(uio
->uio_p1
);
2043 ::operator delete(rdv
);
2046 int rgw_readv(struct rgw_fs
*rgw_fs
,
2047 struct rgw_file_handle
*fh
, rgw_uio
*uio
, uint32_t flags
)
2050 CephContext
* cct
= static_cast<CephContext
*>(rgw_fs
->rgw
);
2051 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2052 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2054 if (! rgw_fh
->is_file())
2060 RGWGetObjRequest
req(cct
, fs
->get_user(), rgw_fh
->bucket_name(),
2061 rgw_fh
->object_name(), uio
->uio_offset
, uio
->uio_resid
,
2063 req
.do_hexdump
= false;
2065 rc
= rgwlib
.get_fe()->execute_req(&req
);
2068 RGWReadV
* rdv
= static_cast<RGWReadV
*>(
2069 ::operator new(sizeof(RGWReadV
) +
2070 (bl
.buffers().size() * sizeof(struct rgw_vio
))));
2073 RGWReadV(bl
, reinterpret_cast<rgw_vio
*>(rdv
+sizeof(RGWReadV
)));
2076 uio
->uio_cnt
= rdv
->buffers().size();
2077 uio
->uio_resid
= rdv
->length();
2078 uio
->uio_vio
= rdv
->get_vio();
2079 uio
->uio_rele
= rgw_readv_rele
;
2082 auto& buffers
= rdv
->buffers();
2083 for (auto& bp
: buffers
) {
2084 rgw_vio
*vio
= &(uio
->uio_vio
[ix
]);
2085 vio
->vio_base
= const_cast<char*>(bp
.c_str());
2086 vio
->vio_len
= bp
.length();
2087 vio
->vio_u1
= nullptr;
2088 vio
->vio_p1
= nullptr;
2100 write data to file (vector)
2102 int rgw_writev(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2103 rgw_uio
*uio
, uint32_t flags
)
2108 CephContext
* cct
= static_cast<CephContext
*>(rgw_fs
->rgw
);
2109 RGWLibFS
*fs
= static_cast<RGWLibFS
*>(rgw_fs
->fs_private
);
2110 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2112 if (! rgw_fh
->is_file())
2116 for (unsigned int ix
= 0; ix
< uio
->uio_cnt
; ++ix
) {
2117 rgw_vio
*vio
= &(uio
->uio_vio
[ix
]);
2119 buffer::create_static(vio
->vio_len
,
2120 static_cast<char*>(vio
->vio_base
)));
2123 std::string oname
= rgw_fh
->relative_object_name();
2124 RGWPutObjRequest
req(cct
, fs
->get_user(), rgw_fh
->bucket_name(),
2127 int rc
= rgwlib
.get_fe()->execute_req(&req
);
2129 /* XXX update size (in request) */
2137 int rgw_fsync(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*handle
,
2143 int rgw_commit(struct rgw_fs
*rgw_fs
, struct rgw_file_handle
*fh
,
2144 uint64_t offset
, uint64_t length
, uint32_t flags
)
2146 RGWFileHandle
* rgw_fh
= get_rgwfh(fh
);
2148 return rgw_fh
->commit(offset
, length
, RGWFileHandle::FLAG_NONE
);