1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
7 #include "include/rados/rgw_file.h"
22 #include <boost/intrusive_ptr.hpp>
23 #include <boost/range/adaptor/reversed.hpp>
24 #include <boost/container/flat_map.hpp>
25 #include <boost/variant.hpp>
26 #include <boost/utility/string_ref.hpp>
27 #include <boost/optional.hpp>
29 #include "include/buffer.h"
30 #include "common/cohort_lru.h"
31 #include "common/ceph_timer.h"
32 #include "rgw_common.h"
36 #include "rgw_token.h"
37 #include "rgw_compression.h"
41 * ASSERT_H somehow not defined after all the above (which bring
42 * in common/debug.h [e.g., dout])
44 #include "include/assert.h"
47 #define RGW_RWXMODE (S_IRWXU | S_IRWXG | S_IRWXO)
49 #define RGW_RWMODE (RGW_RWXMODE & \
50 ~(S_IXUSR | S_IXGRP | S_IXOTH))
56 static inline void ignore(T
&&) {}
59 namespace bi
= boost::intrusive
;
63 class RGWWriteRequest
;
65 static inline bool operator <(const struct timespec
& lhs
,
66 const struct timespec
& rhs
) {
67 if (lhs
.tv_sec
== rhs
.tv_sec
)
68 return lhs
.tv_nsec
< rhs
.tv_nsec
;
70 return lhs
.tv_sec
< rhs
.tv_sec
;
73 static inline bool operator ==(const struct timespec
& lhs
,
74 const struct timespec
& rhs
) {
75 return ((lhs
.tv_sec
== rhs
.tv_sec
) &&
76 (lhs
.tv_nsec
== rhs
.tv_nsec
));
81 * The current 64-bit, non-cryptographic hash used here is intended
82 * for prototyping only.
84 * However, the invariant being prototyped is that objects be
85 * identifiable by their hash components alone. We believe this can
86 * be legitimately implemented using 128-hash values for bucket and
87 * object components, together with a cluster-resident cryptographic
88 * key. Since an MD5 or SHA-1 key is 128 bits and the (fast),
89 * non-cryptographic CityHash128 hash algorithm takes a 128-bit seed,
90 * speculatively we could use that for the final hash computations.
97 static constexpr uint64_t seed
= 8675309;
99 fh_key() : version(0) {}
101 fh_key(const rgw_fh_hk
& _hk
)
102 : fh_hk(_hk
), version(0) {
106 fh_key(const uint64_t bk
, const uint64_t ok
)
112 fh_key(const uint64_t bk
, const char *_o
)
115 fh_hk
.object
= XXH64(_o
, ::strlen(_o
), seed
);
118 fh_key(const std::string
& _b
, const std::string
& _o
)
120 fh_hk
.bucket
= XXH64(_b
.c_str(), _o
.length(), seed
);
121 fh_hk
.object
= XXH64(_o
.c_str(), _o
.length(), seed
);
124 void encode(buffer::list
& bl
) const {
125 ENCODE_START(2, 1, bl
);
126 ::encode(fh_hk
.bucket
, bl
);
127 ::encode(fh_hk
.object
, bl
);
128 ::encode((uint32_t)2, bl
);
132 void decode(bufferlist::iterator
& bl
) {
134 ::decode(fh_hk
.bucket
, bl
);
135 ::decode(fh_hk
.object
, bl
);
137 ::decode(version
, bl
);
143 WRITE_CLASS_ENCODER(fh_key
);
145 inline bool operator<(const fh_key
& lhs
, const fh_key
& rhs
)
147 return ((lhs
.fh_hk
.bucket
< rhs
.fh_hk
.bucket
) ||
148 ((lhs
.fh_hk
.bucket
== rhs
.fh_hk
.bucket
) &&
149 (lhs
.fh_hk
.object
< rhs
.fh_hk
.object
)));
152 inline bool operator>(const fh_key
& lhs
, const fh_key
& rhs
)
157 inline bool operator==(const fh_key
& lhs
, const fh_key
& rhs
)
159 return ((lhs
.fh_hk
.bucket
== rhs
.fh_hk
.bucket
) &&
160 (lhs
.fh_hk
.object
== rhs
.fh_hk
.object
));
163 inline bool operator!=(const fh_key
& lhs
, const fh_key
& rhs
)
165 return !(lhs
== rhs
);
168 inline bool operator<=(const fh_key
& lhs
, const fh_key
& rhs
)
170 return (lhs
< rhs
) || (lhs
== rhs
);
173 using boost::variant
;
174 using boost::container::flat_map
;
176 class RGWFileHandle
: public cohort::lru::Object
178 struct rgw_file_handle fh
;
182 RGWFileHandle
* bucket
;
183 RGWFileHandle
* parent
;
184 /* const */ std::string name
; /* XXX file or bucket name */
185 /* const */ fh_key fhk
;
187 using lock_guard
= std::lock_guard
<std::mutex
>;
188 using unique_lock
= std::unique_lock
<std::mutex
>;
190 /* TODO: keeping just the last marker is sufficient for
191 * nfs-ganesha 2.4.5; in the near future, nfs-ganesha will
192 * be able to hint the name of the next dirent required,
193 * from which we can directly synthesize a RADOS marker.
194 * using marker_cache_t = flat_map<uint64_t, rgw_obj_key>;
201 uint32_t owner_uid
; /* XXX need Unix attr */
202 uint32_t owner_gid
; /* XXX need Unix attr */
204 struct timespec ctime
;
205 struct timespec mtime
;
206 struct timespec atime
;
207 State() : dev(0), size(0), nlink(1), owner_uid(0), owner_gid(0),
208 ctime
{0,0}, mtime
{0,0}, atime
{0,0} {}
212 RGWWriteRequest
* write_req
;
213 file() : write_req(nullptr) {}
219 static constexpr uint32_t FLAG_NONE
= 0x0000;
222 rgw_obj_key last_marker
;
223 struct timespec last_readdir
;
225 directory() : flags(FLAG_NONE
), last_readdir
{0,0} {}
230 boost::variant
<file
, directory
> variant_type
;
236 const static std::string root_name
;
238 static constexpr uint16_t MAX_DEPTH
= 256;
240 static constexpr uint32_t FLAG_NONE
= 0x0000;
241 static constexpr uint32_t FLAG_OPEN
= 0x0001;
242 static constexpr uint32_t FLAG_ROOT
= 0x0002;
243 static constexpr uint32_t FLAG_CREATE
= 0x0004;
244 static constexpr uint32_t FLAG_CREATING
= 0x0008;
245 static constexpr uint32_t FLAG_DIRECTORY
= 0x0010;
246 static constexpr uint32_t FLAG_BUCKET
= 0x0020;
247 static constexpr uint32_t FLAG_LOCK
= 0x0040;
248 static constexpr uint32_t FLAG_DELETED
= 0x0080;
249 static constexpr uint32_t FLAG_UNLINK_THIS
= 0x0100;
250 static constexpr uint32_t FLAG_LOCKED
= 0x0200;
251 static constexpr uint32_t FLAG_STATELESS_OPEN
= 0x0400;
252 static constexpr uint32_t FLAG_EXACT_MATCH
= 0x0800;
254 #define CREATE_FLAGS(x) \
255 ((x) & ~(RGWFileHandle::FLAG_CREATE|RGWFileHandle::FLAG_LOCK))
257 friend class RGWLibFS
;
260 RGWFileHandle(RGWLibFS
* _fs
, uint32_t fs_inst
)
261 : fs(_fs
), bucket(nullptr), parent(nullptr), variant_type
{directory()},
262 depth(0), flags(FLAG_ROOT
)
265 fh
.fh_type
= RGW_FS_TYPE_DIRECTORY
;
266 variant_type
= directory();
269 state
.unix_mode
= RGW_RWXMODE
|S_IFDIR
;
270 /* pointer to self */
271 fh
.fh_private
= this;
274 void init_rootfs(std::string
& fsid
, const std::string
& object_name
) {
276 fh
.fh_hk
.bucket
= XXH64(fsid
.c_str(), fsid
.length(), fh_key::seed
);
277 fh
.fh_hk
.object
= XXH64(object_name
.c_str(), object_name
.length(),
284 RGWFileHandle(RGWLibFS
* fs
, uint32_t fs_inst
, RGWFileHandle
* _parent
,
285 const fh_key
& _fhk
, std::string
& _name
, uint32_t _flags
)
286 : fs(fs
), bucket(nullptr), parent(_parent
), name(std::move(_name
)),
287 fhk(_fhk
), flags(_flags
) {
289 if (parent
->is_root()) {
290 fh
.fh_type
= RGW_FS_TYPE_DIRECTORY
;
291 variant_type
= directory();
292 flags
|= FLAG_BUCKET
;
294 bucket
= parent
->is_bucket() ? parent
296 if (flags
& FLAG_DIRECTORY
) {
297 fh
.fh_type
= RGW_FS_TYPE_DIRECTORY
;
298 variant_type
= directory();
300 fh
.fh_type
= RGW_FS_TYPE_FILE
;
301 variant_type
= file();
305 depth
= parent
->depth
+ 1;
307 /* save constant fhk */
308 fh
.fh_hk
= fhk
.fh_hk
; /* XXX redundant in fh_hk */
313 switch (fh
.fh_type
) {
314 case RGW_FS_TYPE_DIRECTORY
:
315 state
.unix_mode
= RGW_RWXMODE
|S_IFDIR
;
317 case RGW_FS_TYPE_FILE
:
318 state
.unix_mode
= RGW_RWMODE
|S_IFREG
;
323 /* pointer to self */
324 fh
.fh_private
= this;
327 const fh_key
& get_key() const {
331 directory
* get_directory() {
332 return get
<directory
>(&variant_type
);
335 size_t get_size() const { return state
.size
; }
337 const char* stype() {
338 return is_dir() ? "DIR" : "FILE";
341 uint16_t get_depth() const { return depth
; }
343 struct rgw_file_handle
* get_fh() { return &fh
; }
345 RGWLibFS
* get_fs() { return fs
; }
347 RGWFileHandle
* get_parent() { return parent
; }
349 uint32_t get_owner_uid() const { return state
.owner_uid
; }
350 uint32_t get_owner_gid() const { return state
.owner_gid
; }
352 struct timespec
get_ctime() const { return state
.ctime
; }
353 struct timespec
get_mtime() const { return state
.mtime
; }
355 void create_stat(struct stat
* st
, uint32_t mask
) {
356 if (mask
& RGW_SETATTR_UID
)
357 state
.owner_uid
= st
->st_uid
;
359 if (mask
& RGW_SETATTR_GID
)
360 state
.owner_gid
= st
->st_gid
;
362 if (mask
& RGW_SETATTR_MODE
) {
363 switch (fh
.fh_type
) {
364 case RGW_FS_TYPE_DIRECTORY
:
365 state
.unix_mode
= st
->st_mode
|S_IFDIR
;
367 case RGW_FS_TYPE_FILE
:
368 state
.unix_mode
= st
->st_mode
|S_IFREG
;
374 if (mask
& RGW_SETATTR_ATIME
)
375 state
.atime
= st
->st_atim
;
376 if (mask
& RGW_SETATTR_MTIME
)
377 state
.mtime
= st
->st_mtim
;
378 if (mask
& RGW_SETATTR_CTIME
)
379 state
.ctime
= st
->st_ctim
;
382 int stat(struct stat
* st
) {
383 /* partial Unix attrs */
384 memset(st
, 0, sizeof(struct stat
));
385 st
->st_dev
= state
.dev
;
386 st
->st_ino
= fh
.fh_hk
.object
; // XXX
388 st
->st_uid
= state
.owner_uid
;
389 st
->st_gid
= state
.owner_gid
;
391 st
->st_mode
= state
.unix_mode
;
393 #ifdef HAVE_STAT_ST_MTIMESPEC_TV_NSEC
394 st
->st_atimespec
= state
.atime
;
395 st
->st_mtimespec
= state
.mtime
;
396 st
->st_ctimespec
= state
.ctime
;
398 st
->st_atim
= state
.atime
;
399 st
->st_mtim
= state
.mtime
;
400 st
->st_ctim
= state
.ctime
;
403 switch (fh
.fh_type
) {
404 case RGW_FS_TYPE_DIRECTORY
:
405 st
->st_nlink
= state
.nlink
;
407 case RGW_FS_TYPE_FILE
:
409 st
->st_blksize
= 4096;
410 st
->st_size
= state
.size
;
411 st
->st_blocks
= (state
.size
) / 512;
419 const std::string
& bucket_name() const {
424 return bucket
->object_name();
427 const std::string
& object_name() const { return name
; }
429 std::string
full_object_name(bool omit_bucket
= false) const {
431 std::vector
<const std::string
*> segments
;
433 const RGWFileHandle
* tfh
= this;
434 while (tfh
&& !tfh
->is_root() && !(tfh
->is_bucket() && omit_bucket
)) {
435 segments
.push_back(&tfh
->object_name());
436 reserve
+= (1 + tfh
->object_name().length());
440 path
.reserve(reserve
);
441 for (auto& s
: boost::adaptors::reverse(segments
)) {
445 if (!omit_bucket
&& (path
.front() != '/')) // pretty-print
454 inline std::string
relative_object_name() const {
455 return full_object_name(true /* omit_bucket */);
458 inline std::string
format_child_name(const std::string
& cbasename
,
460 std::string child_name
{relative_object_name()};
461 if ((child_name
.size() > 0) &&
462 (child_name
.back() != '/'))
464 child_name
+= cbasename
;
470 inline std::string
make_key_name(const char *name
) const {
471 std::string key_name
{full_object_name()};
472 if (key_name
.length() > 0)
478 fh_key
make_fhk(const std::string
& name
) const {
480 return fh_key(fhk
.fh_hk
.object
, name
.c_str());
482 std::string key_name
= make_key_name(name
.c_str());
483 return fh_key(fhk
.fh_hk
.bucket
, key_name
.c_str());
487 void add_marker(uint64_t off
, const rgw_obj_key
& marker
,
490 directory
* d
= get
<directory
>(&variant_type
);
492 unique_lock
guard(mtx
);
493 d
->last_marker
= marker
;
497 const rgw_obj_key
* find_marker(uint64_t off
) const {
500 const directory
* d
= get
<directory
>(&variant_type
);
502 return &d
->last_marker
;
508 int offset_of(const std::string
& name
, int64_t *offset
, uint32_t flags
) {
509 if (unlikely(! is_dir())) {
512 *offset
= XXH64(name
.c_str(), name
.length(), fh_key::seed
);
516 bool is_open() const { return flags
& FLAG_OPEN
; }
517 bool is_root() const { return flags
& FLAG_ROOT
; }
518 bool is_bucket() const { return flags
& FLAG_BUCKET
; }
519 bool is_object() const { return !is_bucket(); }
520 bool is_file() const { return (fh
.fh_type
== RGW_FS_TYPE_FILE
); }
521 bool is_dir() const { return (fh
.fh_type
== RGW_FS_TYPE_DIRECTORY
); }
522 bool creating() const { return flags
& FLAG_CREATING
; }
523 bool deleted() const { return flags
& FLAG_DELETED
; }
524 bool stateless_open() const { return flags
& FLAG_STATELESS_OPEN
; }
525 bool has_children() const;
527 int open(uint32_t gsh_flags
) {
528 lock_guard
guard(mtx
);
530 if (gsh_flags
& RGW_OPEN_FLAG_V3
) {
531 flags
|= FLAG_STATELESS_OPEN
;
539 int readdir(rgw_readdir_cb rcb
, void *cb_arg
, uint64_t *offset
, bool *eof
,
541 int write(uint64_t off
, size_t len
, size_t *nbytes
, void *buffer
);
543 int commit(uint64_t offset
, uint64_t length
, uint32_t flags
) {
544 /* NFS3 and NFSv4 COMMIT implementation
545 * the current atomic update strategy doesn't actually permit
546 * clients to read-stable until either CLOSE (NFSv4+) or the
547 * expiration of the active write timer (NFS3). In the
548 * interim, the client may send an arbitrary number of COMMIT
549 * operations which must return a success result */
553 int write_finish(uint32_t flags
= FLAG_NONE
);
556 void open_for_create() {
557 lock_guard
guard(mtx
);
558 flags
|= FLAG_CREATING
;
561 void clear_creating() {
562 lock_guard
guard(mtx
);
563 flags
&= ~FLAG_CREATING
;
566 void inc_nlink(const uint64_t n
) {
570 void set_nlink(const uint64_t n
) {
574 void set_size(const size_t size
) {
578 void set_times(real_time t
) {
579 state
.ctime
= real_clock::to_timespec(t
);
580 state
.mtime
= state
.ctime
;
581 state
.atime
= state
.ctime
;
584 void set_ctime(const struct timespec
&ts
) {
588 void set_mtime(const struct timespec
&ts
) {
592 void set_atime(const struct timespec
&ts
) {
596 void encode(buffer::list
& bl
) const {
597 ENCODE_START(1, 1, bl
);
598 ::encode(uint32_t(fh
.fh_type
), bl
);
599 ::encode(state
.dev
, bl
);
600 ::encode(state
.size
, bl
);
601 ::encode(state
.nlink
, bl
);
602 ::encode(state
.owner_uid
, bl
);
603 ::encode(state
.owner_gid
, bl
);
604 ::encode(state
.unix_mode
, bl
);
605 for (const auto& t
: { state
.ctime
, state
.mtime
, state
.atime
}) {
606 ::encode(real_clock::from_timespec(t
), bl
);
611 void decode(bufferlist::iterator
& bl
) {
614 ::decode(fh_type
, bl
);
615 assert(fh
.fh_type
== fh_type
);
616 ::decode(state
.dev
, bl
);
617 ::decode(state
.size
, bl
);
618 ::decode(state
.nlink
, bl
);
619 ::decode(state
.owner_uid
, bl
);
620 ::decode(state
.owner_gid
, bl
);
621 ::decode(state
.unix_mode
, bl
);
622 ceph::real_time enc_time
;
623 for (auto t
: { &(state
.ctime
), &(state
.mtime
), &(state
.atime
) }) {
624 ::decode(enc_time
, bl
);
625 *t
= real_clock::to_timespec(enc_time
);
630 void encode_attrs(ceph::buffer::list
& ux_key1
,
631 ceph::buffer::list
& ux_attrs1
);
633 bool decode_attrs(const ceph::buffer::list
* ux_key1
,
634 const ceph::buffer::list
* ux_attrs1
);
638 bool reclaim() override
;
640 typedef cohort::lru::LRU
<std::mutex
> FhLRU
;
644 // for internal ordering
645 bool operator()(const RGWFileHandle
& lhs
, const RGWFileHandle
& rhs
) const
646 { return (lhs
.get_key() < rhs
.get_key()); }
648 // for external search by fh_key
649 bool operator()(const fh_key
& k
, const RGWFileHandle
& fh
) const
650 { return k
< fh
.get_key(); }
652 bool operator()(const RGWFileHandle
& fh
, const fh_key
& k
) const
653 { return fh
.get_key() < k
; }
658 bool operator()(const RGWFileHandle
& lhs
, const RGWFileHandle
& rhs
) const
659 { return (lhs
.get_key() == rhs
.get_key()); }
661 bool operator()(const fh_key
& k
, const RGWFileHandle
& fh
) const
662 { return k
== fh
.get_key(); }
664 bool operator()(const RGWFileHandle
& fh
, const fh_key
& k
) const
665 { return fh
.get_key() == k
; }
668 typedef bi::link_mode
<bi::safe_link
> link_mode
; /* XXX normal */
669 #if defined(FHCACHE_AVL)
670 typedef bi::avl_set_member_hook
<link_mode
> tree_hook_type
;
673 typedef bi::set_member_hook
<link_mode
> tree_hook_type
;
675 tree_hook_type fh_hook
;
677 typedef bi::member_hook
<
678 RGWFileHandle
, tree_hook_type
, &RGWFileHandle::fh_hook
> FhHook
;
680 #if defined(FHCACHE_AVL)
681 typedef bi::avltree
<RGWFileHandle
, bi::compare
<FhLT
>, FhHook
> FHTree
;
683 typedef bi::rbtree
<RGWFileHandle
, bi::compare
<FhLT
>, FhHook
> FhTree
;
685 typedef cohort::lru::TreeX
<RGWFileHandle
, FhTree
, FhLT
, FhEQ
, fh_key
,
688 ~RGWFileHandle() override
;
690 friend std::ostream
& operator<<(std::ostream
&os
,
691 RGWFileHandle
const &rgw_fh
);
693 class Factory
: public cohort::lru::ObjectFactory
698 RGWFileHandle
* parent
;
705 Factory(RGWLibFS
* fs
, uint32_t fs_inst
, RGWFileHandle
* parent
,
706 const fh_key
& fhk
, std::string
& name
, uint32_t flags
)
707 : fs(fs
), fs_inst(fs_inst
), parent(parent
), fhk(fhk
), name(name
),
710 void recycle (cohort::lru::Object
* o
) override
{
711 /* re-use an existing object */
712 o
->~Object(); // call lru::Object virtual dtor
714 new (o
) RGWFileHandle(fs
, fs_inst
, parent
, fhk
, name
, flags
);
717 cohort::lru::Object
* alloc() override
{
718 return new RGWFileHandle(fs
, fs_inst
, parent
, fhk
, name
, flags
);
722 }; /* RGWFileHandle */
724 WRITE_CLASS_ENCODER(RGWFileHandle
);
726 static inline RGWFileHandle
* get_rgwfh(struct rgw_file_handle
* fh
) {
727 return static_cast<RGWFileHandle
*>(fh
->fh_private
);
730 static inline enum rgw_fh_type
fh_type_of(uint32_t flags
) {
731 enum rgw_fh_type fh_type
;
732 switch(flags
& RGW_LOOKUP_TYPE_FLAGS
)
734 case RGW_LOOKUP_FLAG_DIR
:
735 fh_type
= RGW_FS_TYPE_DIRECTORY
;
737 case RGW_LOOKUP_FLAG_FILE
:
738 fh_type
= RGW_FS_TYPE_FILE
;
741 fh_type
= RGW_FS_TYPE_NIL
;
746 typedef std::tuple
<RGWFileHandle
*, uint32_t> LookupFHResult
;
747 typedef std::tuple
<RGWFileHandle
*, int> MkObjResult
;
753 RGWFileHandle root_fh
;
754 rgw_fh_callback_t invalidate_cb
;
755 void *invalidate_arg
;
758 mutable std::atomic
<uint64_t> refcnt
;
760 RGWFileHandle::FHCache fh_cache
;
761 RGWFileHandle::FhLRU fh_lru
;
763 std::string uid
; // should match user.user_id, iiuc
766 RGWAccessKey key
; // XXXX acc_key
768 static std::atomic
<uint32_t> fs_inst_counter
;
770 static uint32_t write_completion_interval_s
;
773 using lock_guard
= std::lock_guard
<std::mutex
>;
774 using unique_lock
= std::unique_lock
<std::mutex
>;
778 enum class type
: uint8_t { READDIR
} ;
782 event(type t
, const fh_key
& k
, const struct timespec
& ts
)
783 : t(t
), fhk(k
), ts(ts
) {}
786 friend std::ostream
& operator<<(std::ostream
&os
,
787 RGWLibFS::event
const &ev
);
789 using event_vector
= /* boost::small_vector<event, 16> */
792 struct WriteCompletion
794 RGWFileHandle
& rgw_fh
;
796 WriteCompletion(RGWFileHandle
& _fh
) : rgw_fh(_fh
) {
797 rgw_fh
.get_fs()->ref(&rgw_fh
);
801 rgw_fh
.close(); /* will finish in-progress write */
802 rgw_fh
.get_fs()->unref(&rgw_fh
);
806 static ceph::timer
<ceph::mono_clock
> write_timer
;
810 std::atomic
<uint32_t> flags
;
811 std::deque
<event
> events
;
813 State() : flags(0) {}
815 void push_event(const event
& ev
) {
816 events
.push_back(ev
);
820 uint32_t new_inst() {
821 return ++fs_inst_counter
;
824 friend class RGWFileHandle
;
825 friend class RGWLibProcess
;
829 static constexpr uint32_t FLAG_NONE
= 0x0000;
830 static constexpr uint32_t FLAG_CLOSED
= 0x0001;
835 real_time creation_time
;
836 uint64_t num_entries
;
839 RGWLibFS(CephContext
* _cct
, const char *_uid
, const char *_user_id
,
841 : cct(_cct
), root_fh(this, new_inst()), invalidate_cb(nullptr),
842 invalidate_arg(nullptr), shutdown(false), refcnt(1),
843 fh_cache(cct
->_conf
->rgw_nfs_fhcache_partitions
,
844 cct
->_conf
->rgw_nfs_fhcache_size
),
845 fh_lru(cct
->_conf
->rgw_nfs_lru_lanes
,
846 cct
->_conf
->rgw_nfs_lru_lane_hiwat
),
847 uid(_uid
), key(_user_id
, _key
) {
849 /* no bucket may be named rgw_fs_inst-(.*) */
850 fsid
= RGWFileHandle::root_name
+ "rgw_fs_inst-" +
851 std::to_string(get_inst());
853 root_fh
.init_rootfs(fsid
/* bucket */, RGWFileHandle::root_name
);
855 /* pointer to self */
856 fs
.fs_private
= this;
858 /* expose public root fh */
859 fs
.root_fh
= root_fh
.get_fh();
862 friend void intrusive_ptr_add_ref(const RGWLibFS
* fs
) {
863 fs
->refcnt
.fetch_add(1, std::memory_order_relaxed
);
866 friend void intrusive_ptr_release(const RGWLibFS
* fs
) {
867 if (fs
->refcnt
.fetch_sub(1, std::memory_order_release
) == 0) {
868 std::atomic_thread_fence(std::memory_order_acquire
);
874 intrusive_ptr_add_ref(this);
879 intrusive_ptr_release(this);
882 void stop() { shutdown
= true; }
884 void release_evict(RGWFileHandle
* fh
) {
885 /* remove from cache, releases sentinel ref */
886 fh_cache
.remove(fh
->fh
.fh_hk
.object
, fh
,
887 RGWFileHandle::FHCache::FLAG_LOCK
);
888 /* release call-path ref */
889 (void) fh_lru
.unref(fh
, cohort::lru::FLAG_NONE
);
892 int authorize(RGWRados
* store
) {
893 int ret
= rgw_get_user_info_by_access_key(store
, key
.id
, user
);
895 RGWAccessKey
* key0
= user
.get_key0();
897 (key0
->key
!= key
.key
))
900 return -ERR_USER_SUSPENDED
;
902 /* try external authenticators (ldap for now) */
903 rgw::LDAPHelper
* ldh
= rgwlib
.get_ldh(); /* !nullptr */
905 /* boost filters and/or string_ref may throw on invalid input */
907 token
= rgw::from_base64(key
.id
);
909 token
= std::string("");
911 if (token
.valid() && (ldh
->auth(token
.id
, token
.key
) == 0)) {
912 /* try to store user if it doesn't already exist */
913 if (rgw_get_user_info_by_uid(store
, token
.id
, user
) < 0) {
914 int ret
= rgw_store_user_info(store
, user
, NULL
, NULL
, real_time(),
917 lsubdout(get_context(), rgw
, 10)
918 << "NOTICE: failed to store new user's info: ret=" << ret
927 int register_invalidate(rgw_fh_callback_t cb
, void *arg
, uint32_t flags
) {
929 invalidate_arg
= arg
;
933 /* find RGWFileHandle by id */
934 LookupFHResult
lookup_fh(const fh_key
& fhk
,
935 const uint32_t flags
= RGWFileHandle::FLAG_NONE
) {
938 // cast int32_t(RGWFileHandle::FLAG_NONE) due to strictness of Clang
939 // the cast transfers a lvalue into a rvalue in the ctor
940 // check the commit message for the full details
941 LookupFHResult fhr
{ nullptr, uint32_t(RGWFileHandle::FLAG_NONE
) };
943 RGWFileHandle::FHCache::Latch lat
;
944 bool fh_locked
= flags
& RGWFileHandle::FLAG_LOCKED
;
948 fh_cache
.find_latch(fhk
.fh_hk
.object
/* partition selector*/,
949 fhk
/* key */, lat
/* serializer */,
950 RGWFileHandle::FHCache::FLAG_LOCK
);
953 if (likely(! fh_locked
))
954 fh
->mtx
.lock(); // XXX !RAII because may-return-LOCKED
955 /* need initial ref from LRU (fast path) */
956 if (! fh_lru
.ref(fh
, cohort::lru::FLAG_INITIAL
)) {
958 if (likely(! fh_locked
))
960 goto retry
; /* !LATCHED */
962 /* LATCHED, LOCKED */
963 if (! (flags
& RGWFileHandle::FLAG_LOCK
))
964 fh
->mtx
.unlock(); /* ! LOCKED */
966 lat
.lock
->unlock(); /* !LATCHED */
969 lsubdout(get_context(), rgw
, 17)
970 << __func__
<< " 1 " << *fh
974 } /* lookup_fh(const fh_key&) */
976 /* find or create an RGWFileHandle */
977 LookupFHResult
lookup_fh(RGWFileHandle
* parent
, const char *name
,
978 const uint32_t flags
= RGWFileHandle::FLAG_NONE
) {
981 // cast int32_t(RGWFileHandle::FLAG_NONE) due to strictness of Clang
982 // the cast transfers a lvalue into a rvalue in the ctor
983 // check the commit message for the full details
984 LookupFHResult fhr
{ nullptr, uint32_t(RGWFileHandle::FLAG_NONE
) };
986 /* mount is stale? */
987 if (state
.flags
& FLAG_CLOSED
)
990 RGWFileHandle::FHCache::Latch lat
;
991 bool fh_locked
= flags
& RGWFileHandle::FLAG_LOCKED
;
993 std::string obj_name
{name
};
994 std::string key_name
{parent
->make_key_name(name
)};
996 lsubdout(get_context(), rgw
, 10)
997 << __func__
<< " lookup called on "
998 << parent
->object_name() << " for " << key_name
999 << " (" << obj_name
<< ")"
1002 fh_key fhk
= parent
->make_fhk(obj_name
);
1006 fh_cache
.find_latch(fhk
.fh_hk
.object
/* partition selector*/,
1007 fhk
/* key */, lat
/* serializer */,
1008 RGWFileHandle::FHCache::FLAG_LOCK
);
1011 if (likely(! fh_locked
))
1012 fh
->mtx
.lock(); // XXX !RAII because may-return-LOCKED
1013 if (fh
->flags
& RGWFileHandle::FLAG_DELETED
) {
1014 /* for now, delay briefly and retry */
1016 if (likely(! fh_locked
))
1018 std::this_thread::sleep_for(std::chrono::milliseconds(20));
1019 goto retry
; /* !LATCHED */
1021 /* need initial ref from LRU (fast path) */
1022 if (! fh_lru
.ref(fh
, cohort::lru::FLAG_INITIAL
)) {
1024 if (likely(! fh_locked
))
1026 goto retry
; /* !LATCHED */
1028 /* LATCHED, LOCKED */
1029 if (! (flags
& RGWFileHandle::FLAG_LOCK
))
1030 if (likely(! fh_locked
))
1031 fh
->mtx
.unlock(); /* ! LOCKED */
1033 /* make or re-use handle */
1034 RGWFileHandle::Factory
prototype(this, get_inst(), parent
, fhk
,
1035 obj_name
, CREATE_FLAGS(flags
));
1036 fh
= static_cast<RGWFileHandle
*>(
1037 fh_lru
.insert(&prototype
,
1038 cohort::lru::Edge::MRU
,
1039 cohort::lru::FLAG_INITIAL
));
1041 /* lock fh (LATCHED) */
1042 if (flags
& RGWFileHandle::FLAG_LOCK
)
1044 /* inserts, releasing latch */
1045 fh_cache
.insert_latched(fh
, lat
, RGWFileHandle::FHCache::FLAG_UNLOCK
);
1046 get
<1>(fhr
) |= RGWFileHandle::FLAG_CREATE
;
1047 /* ref parent (non-initial ref cannot fail on valid object) */
1048 if (! parent
->is_root()) {
1049 (void) fh_lru
.ref(parent
, cohort::lru::FLAG_NONE
);
1051 goto out
; /* !LATCHED */
1054 goto retry
; /* !LATCHED */
1057 lat
.lock
->unlock(); /* !LATCHED */
1061 lsubdout(get_context(), rgw
, 17)
1062 << __func__
<< " 2 " << *fh
1066 } /* lookup_fh(RGWFileHandle*, const char *, const uint32_t) */
1068 inline void unref(RGWFileHandle
* fh
) {
1069 if (likely(! fh
->is_root())) {
1070 (void) fh_lru
.unref(fh
, cohort::lru::FLAG_NONE
);
1074 inline RGWFileHandle
* ref(RGWFileHandle
* fh
) {
1075 if (likely(! fh
->is_root())) {
1076 fh_lru
.ref(fh
, cohort::lru::FLAG_NONE
);
1081 int getattr(RGWFileHandle
* rgw_fh
, struct stat
* st
);
1083 int setattr(RGWFileHandle
* rgw_fh
, struct stat
* st
, uint32_t mask
,
1086 void update_fhk(RGWFileHandle
*rgw_fh
);
1089 LookupFHResult
stat_bucket(RGWFileHandle
* parent
, const char *path
,
1090 RGWLibFS::BucketStats
& bs
,
1093 LookupFHResult
stat_leaf(RGWFileHandle
* parent
, const char *path
,
1094 enum rgw_fh_type type
= RGW_FS_TYPE_NIL
,
1095 uint32_t flags
= RGWFileHandle::FLAG_NONE
);
1097 int read(RGWFileHandle
* rgw_fh
, uint64_t offset
, size_t length
,
1098 size_t* bytes_read
, void* buffer
, uint32_t flags
);
1100 int rename(RGWFileHandle
* old_fh
, RGWFileHandle
* new_fh
,
1101 const char *old_name
, const char *new_name
);
1103 MkObjResult
create(RGWFileHandle
* parent
, const char *name
, struct stat
*st
,
1104 uint32_t mask
, uint32_t flags
);
1106 MkObjResult
mkdir(RGWFileHandle
* parent
, const char *name
, struct stat
*st
,
1107 uint32_t mask
, uint32_t flags
);
1109 int unlink(RGWFileHandle
* rgw_fh
, const char *name
,
1110 uint32_t flags
= FLAG_NONE
);
1112 /* find existing RGWFileHandle */
1113 RGWFileHandle
* lookup_handle(struct rgw_fh_hk fh_hk
) {
1115 if (state
.flags
& FLAG_CLOSED
)
1118 RGWFileHandle::FHCache::Latch lat
;
1123 fh_cache
.find_latch(fhk
.fh_hk
.object
/* partition selector*/,
1124 fhk
/* key */, lat
/* serializer */,
1125 RGWFileHandle::FHCache::FLAG_LOCK
);
1128 lsubdout(get_context(), rgw
, 0)
1129 << __func__
<< " handle lookup failed <"
1130 << fhk
.fh_hk
.bucket
<< "," << fhk
.fh_hk
.object
<< ">"
1131 << "(need persistent handles)"
1136 if (fh
->flags
& RGWFileHandle::FLAG_DELETED
) {
1137 /* for now, delay briefly and retry */
1139 fh
->mtx
.unlock(); /* !LOCKED */
1140 std::this_thread::sleep_for(std::chrono::milliseconds(20));
1141 goto retry
; /* !LATCHED */
1143 if (! fh_lru
.ref(fh
, cohort::lru::FLAG_INITIAL
)) {
1146 goto retry
; /* !LATCHED */
1149 fh
->mtx
.unlock(); /* !LOCKED */
1151 lat
.lock
->unlock(); /* !LATCHED */
1153 /* special case: lookup root_fh */
1155 if (unlikely(fh_hk
== root_fh
.fh
.fh_hk
)) {
1163 CephContext
* get_context() {
1167 struct rgw_fs
* get_fs() { return &fs
; }
1169 uint32_t get_inst() { return root_fh
.state
.dev
; }
1171 RGWUserInfo
* get_user() { return &user
; }
1177 static inline std::string
make_uri(const std::string
& bucket_name
,
1178 const std::string
& object_name
) {
1179 std::string
uri("/");
1180 uri
.reserve(bucket_name
.length() + object_name
.length() + 2);
1188 read directory content (buckets)
1191 class RGWListBucketsRequest
: public RGWLibRequest
,
1192 public RGWListBuckets
/* RGWOp */
1195 RGWFileHandle
* rgw_fh
;
1202 RGWListBucketsRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1203 RGWFileHandle
* _rgw_fh
, rgw_readdir_cb _rcb
,
1204 void* _cb_arg
, uint64_t* _offset
)
1205 : RGWLibRequest(_cct
, _user
), rgw_fh(_rgw_fh
), offset(_offset
),
1206 cb_arg(_cb_arg
), rcb(_rcb
), ix(0), d_count(0) {
1207 const auto& mk
= rgw_fh
->find_marker(*offset
);
1214 bool only_bucket() override
{ return false; }
1216 int op_init() override
{
1217 // assign store, s, and dialect_handler
1218 RGWObjectCtx
* rados_ctx
1219 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1220 // framework promises to call op_init after parent init
1222 RGWOp::init(rados_ctx
->store
, get_state(), this);
1223 op
= this; // assign self as op: REQUIRED
1227 int header_init() override
{
1228 struct req_state
* s
= get_state();
1229 s
->info
.method
= "GET";
1232 /* XXX derp derp derp */
1233 s
->relative_uri
= "/";
1234 s
->info
.request_uri
= "/"; // XXX
1235 s
->info
.effective_uri
= "/";
1236 s
->info
.request_params
= "";
1237 s
->info
.domain
= ""; /* XXX ? */
1245 int get_params() override
{
1246 limit
= -1; /* no limit */
1250 void send_response_begin(bool has_buckets
) override
{
1254 void send_response_data(RGWUserBuckets
& buckets
) override
{
1257 map
<string
, RGWBucketEnt
>& m
= buckets
.get_buckets();
1258 for (const auto& iter
: m
) {
1259 boost::string_ref marker
{iter
.first
};
1260 const RGWBucketEnt
& ent
= iter
.second
;
1261 if (! this->operator()(ent
.bucket
.name
, marker
)) {
1262 /* caller cannot accept more */
1263 lsubdout(cct
, rgw
, 5) << "ListBuckets rcb failed"
1264 << " dirent=" << ent
.bucket
.name
1265 << " call count=" << ix
1271 } /* send_response_data */
1273 void send_response_end() override
{
1277 int operator()(const boost::string_ref
& name
,
1278 const boost::string_ref
& marker
) {
1279 uint64_t off
= XXH64(name
.data(), name
.length(), fh_key::seed
);
1281 /* update traversal cache */
1282 rgw_fh
->add_marker(off
, rgw_obj_key
{marker
.data(), ""},
1283 RGW_FS_TYPE_DIRECTORY
);
1285 return rcb(name
.data(), cb_arg
, off
, RGW_LOOKUP_FLAG_DIR
);
1289 lsubdout(cct
, rgw
, 15) << "READDIR offset: " << *offset
1290 << " is_truncated: " << is_truncated
1292 return !is_truncated
;
1295 }; /* RGWListBucketsRequest */
1298 read directory content (bucket objects)
1301 class RGWReaddirRequest
: public RGWLibRequest
,
1302 public RGWListBucket
/* RGWOp */
1305 RGWFileHandle
* rgw_fh
;
1312 RGWReaddirRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1313 RGWFileHandle
* _rgw_fh
, rgw_readdir_cb _rcb
,
1314 void* _cb_arg
, uint64_t* _offset
)
1315 : RGWLibRequest(_cct
, _user
), rgw_fh(_rgw_fh
), offset(_offset
),
1316 cb_arg(_cb_arg
), rcb(_rcb
), ix(0), d_count(0) {
1317 const auto& mk
= rgw_fh
->find_marker(*offset
);
1321 default_max
= 1000; // XXX was being omitted
1325 bool only_bucket() override
{ return true; }
1327 int op_init() override
{
1328 // assign store, s, and dialect_handler
1329 RGWObjectCtx
* rados_ctx
1330 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1331 // framework promises to call op_init after parent init
1333 RGWOp::init(rados_ctx
->store
, get_state(), this);
1334 op
= this; // assign self as op: REQUIRED
1338 int header_init() override
{
1339 struct req_state
* s
= get_state();
1340 s
->info
.method
= "GET";
1343 /* XXX derp derp derp */
1344 std::string uri
= "/" + rgw_fh
->bucket_name() + "/";
1345 s
->relative_uri
= uri
;
1346 s
->info
.request_uri
= uri
; // XXX
1347 s
->info
.effective_uri
= uri
;
1348 s
->info
.request_params
= "";
1349 s
->info
.domain
= ""; /* XXX ? */
1354 prefix
= rgw_fh
->relative_object_name();
1355 if (prefix
.length() > 0)
1362 int operator()(const boost::string_ref name
, const rgw_obj_key
& marker
,
1365 assert(name
.length() > 0); // XXX
1367 /* hash offset of name in parent (short name) for NFS readdir cookie */
1368 uint64_t off
= XXH64(name
.data(), name
.length(), fh_key::seed
);
1370 /* update traversal cache */
1371 rgw_fh
->add_marker(off
, marker
, type
);
1373 return rcb(name
.data(), cb_arg
, off
,
1374 (type
== RGW_FS_TYPE_DIRECTORY
) ?
1375 RGW_LOOKUP_FLAG_DIR
:
1376 RGW_LOOKUP_FLAG_FILE
);
1379 int get_params() override
{
1384 void send_response() override
{
1385 struct req_state
* s
= get_state();
1386 for (const auto& iter
: objs
) {
1388 boost::string_ref sref
{iter
.key
.name
};
1390 lsubdout(cct
, rgw
, 15) << "readdir objects prefix: " << prefix
1391 << " obj: " << sref
<< dendl
;
1393 size_t last_del
= sref
.find_last_of('/');
1394 if (last_del
!= string::npos
)
1395 sref
.remove_prefix(last_del
+1);
1397 /* leaf directory? */
1401 lsubdout(cct
, rgw
, 15) << "RGWReaddirRequest "
1403 << "list uri=" << s
->relative_uri
<< " "
1404 << " prefix=" << prefix
<< " "
1405 << " obj path=" << iter
.key
.name
1406 << " (" << sref
<< ")" << ""
1409 if(! this->operator()(sref
, next_marker
, RGW_FS_TYPE_FILE
)) {
1410 /* caller cannot accept more */
1411 lsubdout(cct
, rgw
, 5) << "readdir rcb failed"
1412 << " dirent=" << sref
.data()
1413 << " call count=" << ix
1419 for (auto& iter
: common_prefixes
) {
1421 lsubdout(cct
, rgw
, 15) << "readdir common prefixes prefix: " << prefix
1422 << " iter first: " << iter
.first
1423 << " iter second: " << iter
.second
1426 /* XXX aieee--I have seen this case! */
1427 if (iter
.first
== "/")
1430 /* it's safest to modify the element in place--a suffix-modifying
1431 * string_ref operation is problematic since ULP rgw_file callers
1432 * will ultimately need a c-string */
1433 if (iter
.first
.back() == '/')
1434 const_cast<std::string
&>(iter
.first
).pop_back();
1436 boost::string_ref sref
{iter
.first
};
1438 size_t last_del
= sref
.find_last_of('/');
1439 if (last_del
!= string::npos
)
1440 sref
.remove_prefix(last_del
+1);
1442 lsubdout(cct
, rgw
, 15) << "RGWReaddirRequest "
1444 << "list uri=" << s
->relative_uri
<< " "
1445 << " prefix=" << prefix
<< " "
1446 << " cpref=" << sref
1449 this->operator()(sref
, next_marker
, RGW_FS_TYPE_DIRECTORY
);
1454 virtual void send_versioned_response() {
1459 lsubdout(cct
, rgw
, 15) << "READDIR offset: " << *offset
1460 << " next marker: " << next_marker
1461 << " is_truncated: " << is_truncated
1463 return !is_truncated
;
1466 }; /* RGWReaddirRequest */
1469 dir has-children predicate (bucket objects)
1472 class RGWRMdirCheck
: public RGWLibRequest
,
1473 public RGWListBucket
/* RGWOp */
1476 const RGWFileHandle
* rgw_fh
;
1480 RGWRMdirCheck (CephContext
* _cct
, RGWUserInfo
*_user
,
1481 const RGWFileHandle
* _rgw_fh
)
1482 : RGWLibRequest(_cct
, _user
), rgw_fh(_rgw_fh
), valid(false),
1483 has_children(false) {
1488 bool only_bucket() override
{ return true; }
1490 int op_init() override
{
1491 // assign store, s, and dialect_handler
1492 RGWObjectCtx
* rados_ctx
1493 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1494 // framework promises to call op_init after parent init
1496 RGWOp::init(rados_ctx
->store
, get_state(), this);
1497 op
= this; // assign self as op: REQUIRED
1501 int header_init() override
{
1502 struct req_state
* s
= get_state();
1503 s
->info
.method
= "GET";
1506 std::string uri
= "/" + rgw_fh
->bucket_name() + "/";
1507 s
->relative_uri
= uri
;
1508 s
->info
.request_uri
= uri
;
1509 s
->info
.effective_uri
= uri
;
1510 s
->info
.request_params
= "";
1511 s
->info
.domain
= ""; /* XXX ? */
1515 prefix
= rgw_fh
->relative_object_name();
1516 if (prefix
.length() > 0)
1523 int get_params() override
{
1528 void send_response() override
{
1530 if ((objs
.size() > 1) ||
1532 (objs
.front().key
.name
!= prefix
))) {
1533 has_children
= true;
1536 for (auto& iter
: common_prefixes
) {
1537 /* readdir never produces a name for this case */
1538 if (iter
.first
== "/")
1540 has_children
= true;
1545 virtual void send_versioned_response() {
1549 }; /* RGWRMdirCheck */
1555 class RGWCreateBucketRequest
: public RGWLibRequest
,
1556 public RGWCreateBucket
/* RGWOp */
1559 const std::string
& bucket_name
;
1561 RGWCreateBucketRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1562 std::string
& _bname
)
1563 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
) {
1567 bool only_bucket() override
{ return false; }
1569 int read_permissions(RGWOp
* op_obj
) override
{
1570 /* we ARE a 'create bucket' request (cf. rgw_rest.cc, ll. 1305-6) */
1574 int op_init() override
{
1575 // assign store, s, and dialect_handler
1576 RGWObjectCtx
* rados_ctx
1577 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1578 // framework promises to call op_init after parent init
1580 RGWOp::init(rados_ctx
->store
, get_state(), this);
1581 op
= this; // assign self as op: REQUIRED
1585 int header_init() override
{
1587 struct req_state
* s
= get_state();
1588 s
->info
.method
= "PUT";
1591 string uri
= "/" + bucket_name
;
1592 /* XXX derp derp derp */
1593 s
->relative_uri
= uri
;
1594 s
->info
.request_uri
= uri
; // XXX
1595 s
->info
.effective_uri
= uri
;
1596 s
->info
.request_params
= "";
1597 s
->info
.domain
= ""; /* XXX ? */
1605 int get_params() override
{
1606 struct req_state
* s
= get_state();
1607 RGWAccessControlPolicy_S3
s3policy(s
->cct
);
1608 /* we don't have (any) headers, so just create canned ACLs */
1609 int ret
= s3policy
.create_canned(s
->owner
, s
->bucket_owner
, s
->canned_acl
);
1614 void send_response() override
{
1615 /* TODO: something (maybe) */
1617 }; /* RGWCreateBucketRequest */
1623 class RGWDeleteBucketRequest
: public RGWLibRequest
,
1624 public RGWDeleteBucket
/* RGWOp */
1627 const std::string
& bucket_name
;
1629 RGWDeleteBucketRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1630 std::string
& _bname
)
1631 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
) {
1635 bool only_bucket() override
{ return true; }
1637 int op_init() override
{
1638 // assign store, s, and dialect_handler
1639 RGWObjectCtx
* rados_ctx
1640 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1641 // framework promises to call op_init after parent init
1643 RGWOp::init(rados_ctx
->store
, get_state(), this);
1644 op
= this; // assign self as op: REQUIRED
1648 int header_init() override
{
1650 struct req_state
* s
= get_state();
1651 s
->info
.method
= "DELETE";
1654 string uri
= "/" + bucket_name
;
1655 /* XXX derp derp derp */
1656 s
->relative_uri
= uri
;
1657 s
->info
.request_uri
= uri
; // XXX
1658 s
->info
.effective_uri
= uri
;
1659 s
->info
.request_params
= "";
1660 s
->info
.domain
= ""; /* XXX ? */
1668 void send_response() override
{}
1670 }; /* RGWDeleteBucketRequest */
1675 class RGWPutObjRequest
: public RGWLibRequest
,
1676 public RGWPutObj
/* RGWOp */
1679 const std::string
& bucket_name
;
1680 const std::string
& obj_name
;
1681 buffer::list
& bl
; /* XXX */
1682 size_t bytes_written
;
1684 RGWPutObjRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1685 const std::string
& _bname
, const std::string
& _oname
,
1687 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
), obj_name(_oname
),
1688 bl(_bl
), bytes_written(0) {
1692 bool only_bucket() override
{ return true; }
1694 int op_init() override
{
1695 // assign store, s, and dialect_handler
1696 RGWObjectCtx
* rados_ctx
1697 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1698 // framework promises to call op_init after parent init
1700 RGWOp::init(rados_ctx
->store
, get_state(), this);
1701 op
= this; // assign self as op: REQUIRED
1703 int rc
= valid_s3_object_name(obj_name
);
1710 int header_init() override
{
1712 struct req_state
* s
= get_state();
1713 s
->info
.method
= "PUT";
1716 /* XXX derp derp derp */
1717 std::string uri
= make_uri(bucket_name
, obj_name
);
1718 s
->relative_uri
= uri
;
1719 s
->info
.request_uri
= uri
; // XXX
1720 s
->info
.effective_uri
= uri
;
1721 s
->info
.request_params
= "";
1722 s
->info
.domain
= ""; /* XXX ? */
1724 /* XXX required in RGWOp::execute() */
1725 s
->content_length
= bl
.length();
1733 int get_params() override
{
1734 struct req_state
* s
= get_state();
1735 RGWAccessControlPolicy_S3
s3policy(s
->cct
);
1736 /* we don't have (any) headers, so just create canned ACLs */
1737 int ret
= s3policy
.create_canned(s
->owner
, s
->bucket_owner
, s
->canned_acl
);
1742 int get_data(buffer::list
& _bl
) override
{
1743 /* XXX for now, use sharing semantics */
1745 uint32_t len
= _bl
.length();
1746 bytes_written
+= len
;
1750 void send_response() override
{}
1752 int verify_params() override
{
1753 if (bl
.length() > cct
->_conf
->rgw_max_put_size
)
1754 return -ERR_TOO_LARGE
;
1758 }; /* RGWPutObjRequest */
1764 class RGWReadRequest
: public RGWLibRequest
,
1765 public RGWGetObj
/* RGWOp */
1768 RGWFileHandle
* rgw_fh
;
1771 size_t read_resid
; /* initialize to len, <= sizeof(ulp_buffer) */
1772 bool do_hexdump
= false;
1774 RGWReadRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1775 RGWFileHandle
* _rgw_fh
, uint64_t off
, uint64_t len
,
1777 : RGWLibRequest(_cct
, _user
), rgw_fh(_rgw_fh
), ulp_buffer(_ulp_buffer
),
1778 nread(0), read_resid(len
) {
1781 /* fixup RGWGetObj (already know range parameters) */
1782 RGWGetObj::range_parsed
= true;
1783 RGWGetObj::get_data
= true; // XXX
1784 RGWGetObj::partial_content
= true;
1785 RGWGetObj::ofs
= off
;
1786 RGWGetObj::end
= off
+ len
;
1789 bool only_bucket() override
{ return false; }
1791 int op_init() override
{
1792 // assign store, s, and dialect_handler
1793 RGWObjectCtx
* rados_ctx
1794 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1795 // framework promises to call op_init after parent init
1797 RGWOp::init(rados_ctx
->store
, get_state(), this);
1798 op
= this; // assign self as op: REQUIRED
1802 int header_init() override
{
1804 struct req_state
* s
= get_state();
1805 s
->info
.method
= "GET";
1808 /* XXX derp derp derp */
1809 s
->relative_uri
= make_uri(rgw_fh
->bucket_name(),
1810 rgw_fh
->relative_object_name());
1811 s
->info
.request_uri
= s
->relative_uri
; // XXX
1812 s
->info
.effective_uri
= s
->relative_uri
;
1813 s
->info
.request_params
= "";
1814 s
->info
.domain
= ""; /* XXX ? */
1822 int get_params() override
{
1826 int send_response_data(ceph::buffer::list
& bl
, off_t bl_off
,
1827 off_t bl_len
) override
{
1829 for (auto& bp
: bl
.buffers()) {
1830 /* if for some reason bl_off indicates the start-of-data is not at
1831 * the current buffer::ptr, skip it and account */
1832 if (bl_off
> bp
.length()) {
1833 bl_off
-= bp
.length();
1836 /* read no more than read_resid */
1837 bytes
= std::min(read_resid
, size_t(bp
.length()-bl_off
));
1838 memcpy(static_cast<char*>(ulp_buffer
)+nread
, bp
.c_str()+bl_off
, bytes
);
1839 read_resid
-= bytes
; /* reduce read_resid by bytes read */
1842 /* stop if we have no residual ulp_buffer */
1849 int send_response_data_error() override
{
1850 /* S3 implementation just sends nothing--there is no side effect
1851 * to simulate here */
1855 }; /* RGWReadRequest */
1861 class RGWDeleteObjRequest
: public RGWLibRequest
,
1862 public RGWDeleteObj
/* RGWOp */
1865 const std::string
& bucket_name
;
1866 const std::string
& obj_name
;
1868 RGWDeleteObjRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1869 const std::string
& _bname
, const std::string
& _oname
)
1870 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
), obj_name(_oname
) {
1874 bool only_bucket() override
{ return true; }
1876 int op_init() override
{
1877 // assign store, s, and dialect_handler
1878 RGWObjectCtx
* rados_ctx
1879 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1880 // framework promises to call op_init after parent init
1882 RGWOp::init(rados_ctx
->store
, get_state(), this);
1883 op
= this; // assign self as op: REQUIRED
1887 int header_init() override
{
1889 struct req_state
* s
= get_state();
1890 s
->info
.method
= "DELETE";
1893 /* XXX derp derp derp */
1894 std::string uri
= make_uri(bucket_name
, obj_name
);
1895 s
->relative_uri
= uri
;
1896 s
->info
.request_uri
= uri
; // XXX
1897 s
->info
.effective_uri
= uri
;
1898 s
->info
.request_params
= "";
1899 s
->info
.domain
= ""; /* XXX ? */
1907 void send_response() override
{}
1909 }; /* RGWDeleteObjRequest */
1911 class RGWStatObjRequest
: public RGWLibRequest
,
1912 public RGWGetObj
/* RGWOp */
1915 const std::string
& bucket_name
;
1916 const std::string
& obj_name
;
1920 static constexpr uint32_t FLAG_NONE
= 0x000;
1922 RGWStatObjRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1923 const std::string
& _bname
, const std::string
& _oname
,
1925 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
), obj_name(_oname
),
1926 _size(0), flags(_flags
) {
1929 /* fixup RGWGetObj (already know range parameters) */
1930 RGWGetObj::range_parsed
= true;
1931 RGWGetObj::get_data
= false; // XXX
1932 RGWGetObj::partial_content
= true;
1934 RGWGetObj::end
= UINT64_MAX
;
1937 const string
name() override
{ return "stat_obj"; }
1938 RGWOpType
get_type() override
{ return RGW_OP_STAT_OBJ
; }
1940 real_time
get_mtime() const {
1945 uint64_t get_size() { return _size
; }
1946 real_time
ctime() { return mod_time
; } // XXX
1947 real_time
mtime() { return mod_time
; }
1948 std::map
<string
, bufferlist
>& get_attrs() { return attrs
; }
1950 buffer::list
* get_attr(const std::string
& k
) {
1951 auto iter
= attrs
.find(k
);
1952 return (iter
!= attrs
.end()) ? &(iter
->second
) : nullptr;
1955 bool only_bucket() override
{ return false; }
1957 int op_init() override
{
1958 // assign store, s, and dialect_handler
1959 RGWObjectCtx
* rados_ctx
1960 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1961 // framework promises to call op_init after parent init
1963 RGWOp::init(rados_ctx
->store
, get_state(), this);
1964 op
= this; // assign self as op: REQUIRED
1968 int header_init() override
{
1970 struct req_state
* s
= get_state();
1971 s
->info
.method
= "GET";
1974 /* XXX derp derp derp */
1975 s
->relative_uri
= make_uri(bucket_name
, obj_name
);
1976 s
->info
.request_uri
= s
->relative_uri
; // XXX
1977 s
->info
.effective_uri
= s
->relative_uri
;
1978 s
->info
.request_params
= "";
1979 s
->info
.domain
= ""; /* XXX ? */
1987 int get_params() override
{
1991 int send_response_data(ceph::buffer::list
& _bl
, off_t s_off
,
1992 off_t e_off
) override
{
1994 /* XXX save attrs? */
1998 int send_response_data_error() override
{
2003 void execute() override
{
2004 RGWGetObj::execute();
2005 _size
= get_state()->obj_size
;
2008 }; /* RGWStatObjRequest */
2010 class RGWStatBucketRequest
: public RGWLibRequest
,
2011 public RGWStatBucket
/* RGWOp */
2015 std::map
<std::string
, buffer::list
> attrs
;
2016 RGWLibFS::BucketStats
& bs
;
2018 RGWStatBucketRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
2019 const std::string
& _path
,
2020 RGWLibFS::BucketStats
& _stats
)
2021 : RGWLibRequest(_cct
, _user
), bs(_stats
) {
2026 buffer::list
* get_attr(const std::string
& k
) {
2027 auto iter
= attrs
.find(k
);
2028 return (iter
!= attrs
.end()) ? &(iter
->second
) : nullptr;
2031 real_time
get_ctime() const {
2032 return bucket
.creation_time
;
2035 bool only_bucket() override
{ return false; }
2037 int op_init() override
{
2038 // assign store, s, and dialect_handler
2039 RGWObjectCtx
* rados_ctx
2040 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
2041 // framework promises to call op_init after parent init
2043 RGWOp::init(rados_ctx
->store
, get_state(), this);
2044 op
= this; // assign self as op: REQUIRED
2048 int header_init() override
{
2050 struct req_state
* s
= get_state();
2051 s
->info
.method
= "GET";
2054 /* XXX derp derp derp */
2055 s
->relative_uri
= uri
;
2056 s
->info
.request_uri
= uri
; // XXX
2057 s
->info
.effective_uri
= uri
;
2058 s
->info
.request_params
= "";
2059 s
->info
.domain
= ""; /* XXX ? */
2067 virtual int get_params() {
2071 void send_response() override
{
2072 bucket
.creation_time
= get_state()->bucket_info
.creation_time
;
2073 bs
.size
= bucket
.size
;
2074 bs
.size_rounded
= bucket
.size_rounded
;
2075 bs
.creation_time
= bucket
.creation_time
;
2076 bs
.num_entries
= bucket
.count
;
2077 std::swap(attrs
, get_state()->bucket_attrs
);
2081 return (bucket
.bucket
.name
.length() > 0);
2084 }; /* RGWStatBucketRequest */
2086 class RGWStatLeafRequest
: public RGWLibRequest
,
2087 public RGWListBucket
/* RGWOp */
2090 RGWFileHandle
* rgw_fh
;
2096 RGWStatLeafRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
2097 RGWFileHandle
* _rgw_fh
, const std::string
& _path
)
2098 : RGWLibRequest(_cct
, _user
), rgw_fh(_rgw_fh
), path(_path
),
2099 matched(false), is_dir(false), exact_matched(false) {
2100 default_max
= 1000; // logical max {"foo", "foo/"}
2104 bool only_bucket() override
{ return true; }
2106 int op_init() override
{
2107 // assign store, s, and dialect_handler
2108 RGWObjectCtx
* rados_ctx
2109 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
2110 // framework promises to call op_init after parent init
2112 RGWOp::init(rados_ctx
->store
, get_state(), this);
2113 op
= this; // assign self as op: REQUIRED
2117 int header_init() override
{
2119 struct req_state
* s
= get_state();
2120 s
->info
.method
= "GET";
2123 /* XXX derp derp derp */
2124 std::string uri
= "/" + rgw_fh
->bucket_name() + "/";
2125 s
->relative_uri
= uri
;
2126 s
->info
.request_uri
= uri
; // XXX
2127 s
->info
.effective_uri
= uri
;
2128 s
->info
.request_params
= "";
2129 s
->info
.domain
= ""; /* XXX ? */
2134 prefix
= rgw_fh
->relative_object_name();
2135 if (prefix
.length() > 0)
2143 int get_params() override
{
2148 void send_response() override
{
2149 struct req_state
* s
= get_state();
2151 for (const auto& iter
: objs
) {
2152 auto& name
= iter
.key
.name
;
2153 lsubdout(cct
, rgw
, 15) << "RGWStatLeafRequest "
2155 << "list uri=" << s
->relative_uri
<< " "
2156 << " prefix=" << prefix
<< " "
2157 << " obj path=" << name
<< ""
2158 << " target = " << path
<< ""
2160 /* XXX is there a missing match-dir case (trailing '/')? */
2163 exact_matched
= true;
2167 for (auto& iter
: common_prefixes
) {
2168 auto& name
= iter
.first
;
2169 lsubdout(cct
, rgw
, 15) << "RGWStatLeafRequest "
2171 << "list uri=" << s
->relative_uri
<< " "
2172 << " prefix=" << prefix
<< " "
2173 << " pref path=" << name
<< " (not chomped)"
2174 << " target = " << path
<< ""
2182 virtual void send_versioned_response() {
2185 }; /* RGWStatLeafRequest */
2191 class RGWWriteRequest
: public RGWLibContinuedReq
,
2192 public RGWPutObj
/* RGWOp */
2195 const std::string
& bucket_name
;
2196 const std::string
& obj_name
;
2197 RGWFileHandle
* rgw_fh
;
2198 RGWPutObjProcessor
* processor
;
2199 RGWPutObjDataProcessor
* filter
;
2200 boost::optional
<RGWPutObj_Compress
> compressor
;
2201 CompressorRef plugin
;
2206 size_t bytes_written
;
2210 RGWWriteRequest(CephContext
* _cct
, RGWUserInfo
*_user
, RGWFileHandle
* _fh
,
2211 const std::string
& _bname
, const std::string
& _oname
)
2212 : RGWLibContinuedReq(_cct
, _user
), bucket_name(_bname
), obj_name(_oname
),
2213 rgw_fh(_fh
), processor(nullptr), filter(nullptr), real_ofs(0),
2214 bytes_written(0), multipart(false), eio(false) {
2216 int ret
= header_init();
2218 ret
= init_from_header(get_state());
2223 bool only_bucket() override
{ return true; }
2225 int op_init() override
{
2226 // assign store, s, and dialect_handler
2227 RGWObjectCtx
* rados_ctx
2228 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
2229 // framework promises to call op_init after parent init
2231 RGWOp::init(rados_ctx
->store
, get_state(), this);
2232 op
= this; // assign self as op: REQUIRED
2236 int header_init() override
{
2238 struct req_state
* s
= get_state();
2239 s
->info
.method
= "PUT";
2242 /* XXX derp derp derp */
2243 std::string uri
= make_uri(bucket_name
, obj_name
);
2244 s
->relative_uri
= uri
;
2245 s
->info
.request_uri
= uri
; // XXX
2246 s
->info
.effective_uri
= uri
;
2247 s
->info
.request_params
= "";
2248 s
->info
.domain
= ""; /* XXX ? */
2256 RGWPutObjProcessor
*select_processor(RGWObjectCtx
& obj_ctx
,
2257 bool *is_multipart
) override
{
2258 struct req_state
* s
= get_state();
2259 uint64_t part_size
= s
->cct
->_conf
->rgw_obj_stripe_size
;
2260 RGWPutObjProcessor_Atomic
*processor
=
2261 new RGWPutObjProcessor_Atomic(obj_ctx
, s
->bucket_info
, s
->bucket
,
2262 s
->object
.name
, part_size
, s
->req_id
,
2263 s
->bucket_info
.versioning_enabled());
2264 processor
->set_olh_epoch(olh_epoch
);
2265 processor
->set_version_id(version_id
);
2269 int get_params() override
{
2270 struct req_state
* s
= get_state();
2271 RGWAccessControlPolicy_S3
s3policy(s
->cct
);
2272 /* we don't have (any) headers, so just create canned ACLs */
2273 int ret
= s3policy
.create_canned(s
->owner
, s
->bucket_owner
, s
->canned_acl
);
2278 int get_data(buffer::list
& _bl
) override
{
2279 /* XXX for now, use sharing semantics */
2280 uint32_t len
= data
.length();
2282 bytes_written
+= len
;
2286 void put_data(off_t off
, buffer::list
& _bl
) {
2287 if (off
!= real_ofs
) {
2291 real_ofs
+= data
.length();
2292 ofs
= off
; /* consumed in exec_continue() */
2295 int exec_start() override
;
2296 int exec_continue() override
;
2297 int exec_finish() override
;
2299 void send_response() override
{}
2301 int verify_params() override
{
2304 }; /* RGWWriteRequest */
2309 class RGWCopyObjRequest
: public RGWLibRequest
,
2310 public RGWCopyObj
/* RGWOp */
2313 RGWFileHandle
* src_parent
;
2314 RGWFileHandle
* dst_parent
;
2315 const std::string
& src_name
;
2316 const std::string
& dst_name
;
2318 RGWCopyObjRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
2319 RGWFileHandle
* _src_parent
, RGWFileHandle
* _dst_parent
,
2320 const std::string
& _src_name
, const std::string
& _dst_name
)
2321 : RGWLibRequest(_cct
, _user
), src_parent(_src_parent
),
2322 dst_parent(_dst_parent
), src_name(_src_name
), dst_name(_dst_name
) {
2323 /* all requests have this */
2326 /* allow this request to replace selected attrs */
2327 attrs_mod
= RGWRados::ATTRSMOD_MERGE
;
2330 bool only_bucket() override
{ return true; }
2332 int op_init() override
{
2333 // assign store, s, and dialect_handler
2334 RGWObjectCtx
* rados_ctx
2335 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
2336 // framework promises to call op_init after parent init
2338 RGWOp::init(rados_ctx
->store
, get_state(), this);
2339 op
= this; // assign self as op: REQUIRED
2344 int header_init() override
{
2346 struct req_state
* s
= get_state();
2347 s
->info
.method
= "PUT"; // XXX check
2350 src_bucket_name
= src_parent
->bucket_name();
2351 // need s->src_bucket_name?
2352 src_object
.name
= src_parent
->format_child_name(src_name
, false);
2353 // need s->src_object?
2355 dest_bucket_name
= dst_parent
->bucket_name();
2356 // need s->bucket.name?
2357 dest_object
= dst_parent
->format_child_name(dst_name
, false);
2358 // need s->object_name?
2360 int rc
= valid_s3_object_name(dest_object
);
2364 /* XXX and fixup key attr (could optimize w/string ref and
2366 buffer::list ux_key
;
2367 fh_key fhk
= dst_parent
->make_fhk(dst_name
);
2368 rgw::encode(fhk
, ux_key
);
2369 emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
2371 #if 0 /* XXX needed? */
2372 s
->relative_uri
= uri
;
2373 s
->info
.request_uri
= uri
; // XXX
2374 s
->info
.effective_uri
= uri
;
2375 s
->info
.request_params
= "";
2376 s
->info
.domain
= ""; /* XXX ? */
2385 int get_params() override
{
2386 struct req_state
* s
= get_state();
2387 RGWAccessControlPolicy_S3
s3policy(s
->cct
);
2388 /* we don't have (any) headers, so just create canned ACLs */
2389 int ret
= s3policy
.create_canned(s
->owner
, s
->bucket_owner
, s
->canned_acl
);
2390 dest_policy
= s3policy
;
2394 void send_response() override
{}
2395 void send_partial_response(off_t ofs
) override
{}
2397 }; /* RGWCopyObjRequest */
2399 class RGWSetAttrsRequest
: public RGWLibRequest
,
2400 public RGWSetAttrs
/* RGWOp */
2403 const std::string
& bucket_name
;
2404 const std::string
& obj_name
;
2406 RGWSetAttrsRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
2407 const std::string
& _bname
, const std::string
& _oname
)
2408 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
), obj_name(_oname
) {
2412 bool only_bucket() override
{ return false; }
2414 int op_init() override
{
2415 // assign store, s, and dialect_handler
2416 RGWObjectCtx
* rados_ctx
2417 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
2418 // framework promises to call op_init after parent init
2420 RGWOp::init(rados_ctx
->store
, get_state(), this);
2421 op
= this; // assign self as op: REQUIRED
2425 int header_init() override
{
2427 struct req_state
* s
= get_state();
2428 s
->info
.method
= "PUT";
2431 /* XXX derp derp derp */
2432 std::string uri
= make_uri(bucket_name
, obj_name
);
2433 s
->relative_uri
= uri
;
2434 s
->info
.request_uri
= uri
; // XXX
2435 s
->info
.effective_uri
= uri
;
2436 s
->info
.request_params
= "";
2437 s
->info
.domain
= ""; /* XXX ? */
2445 int get_params() override
{
2449 void send_response() override
{}
2451 }; /* RGWSetAttrsRequest */
2453 } /* namespace rgw */
2455 #endif /* RGW_FILE_H */