1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
7 #include "include/rados/rgw_file.h"
22 #include <boost/intrusive_ptr.hpp>
23 #include <boost/range/adaptor/reversed.hpp>
24 #include <boost/container/flat_map.hpp>
25 #include <boost/variant.hpp>
26 #include <boost/utility/string_ref.hpp>
27 #include <boost/optional.hpp>
29 #include "include/buffer.h"
30 #include "common/cohort_lru.h"
31 #include "common/ceph_timer.h"
32 #include "rgw_common.h"
36 #include "rgw_token.h"
37 #include "rgw_compression.h"
41 * ASSERT_H somehow not defined after all the above (which bring
42 * in common/debug.h [e.g., dout])
44 #include "include/assert.h"
47 #define RGW_RWXMODE (S_IRWXU | S_IRWXG | S_IRWXO)
49 #define RGW_RWMODE (RGW_RWXMODE & \
50 ~(S_IXUSR | S_IXGRP | S_IXOTH))
56 static inline void ignore(T
&&) {}
59 namespace bi
= boost::intrusive
;
63 class RGWWriteRequest
;
65 static inline bool operator <(const struct timespec
& lhs
,
66 const struct timespec
& rhs
) {
67 if (lhs
.tv_sec
== rhs
.tv_sec
)
68 return lhs
.tv_nsec
< rhs
.tv_nsec
;
70 return lhs
.tv_sec
< rhs
.tv_sec
;
73 static inline bool operator ==(const struct timespec
& lhs
,
74 const struct timespec
& rhs
) {
75 return ((lhs
.tv_sec
== rhs
.tv_sec
) &&
76 (lhs
.tv_nsec
== rhs
.tv_nsec
));
81 * The current 64-bit, non-cryptographic hash used here is intended
82 * for prototyping only.
84 * However, the invariant being prototyped is that objects be
85 * identifiable by their hash components alone. We believe this can
86 * be legitimately implemented using 128-hash values for bucket and
87 * object components, together with a cluster-resident cryptographic
88 * key. Since an MD5 or SHA-1 key is 128 bits and the (fast),
89 * non-cryptographic CityHash128 hash algorithm takes a 128-bit seed,
90 * speculatively we could use that for the final hash computations.
97 static constexpr uint64_t seed
= 8675309;
99 fh_key() : version(0) {}
101 fh_key(const rgw_fh_hk
& _hk
)
102 : fh_hk(_hk
), version(0) {
106 fh_key(const uint64_t bk
, const uint64_t ok
)
112 fh_key(const uint64_t bk
, const char *_o
)
115 fh_hk
.object
= XXH64(_o
, ::strlen(_o
), seed
);
118 fh_key(const std::string
& _b
, const std::string
& _o
)
120 fh_hk
.bucket
= XXH64(_b
.c_str(), _o
.length(), seed
);
121 fh_hk
.object
= XXH64(_o
.c_str(), _o
.length(), seed
);
124 void encode(buffer::list
& bl
) const {
125 ENCODE_START(2, 1, bl
);
126 ::encode(fh_hk
.bucket
, bl
);
127 ::encode(fh_hk
.object
, bl
);
128 ::encode((uint32_t)2, bl
);
132 void decode(bufferlist::iterator
& bl
) {
134 ::decode(fh_hk
.bucket
, bl
);
135 ::decode(fh_hk
.object
, bl
);
137 ::decode(version
, bl
);
143 WRITE_CLASS_ENCODER(fh_key
);
145 inline bool operator<(const fh_key
& lhs
, const fh_key
& rhs
)
147 return ((lhs
.fh_hk
.bucket
< rhs
.fh_hk
.bucket
) ||
148 ((lhs
.fh_hk
.bucket
== rhs
.fh_hk
.bucket
) &&
149 (lhs
.fh_hk
.object
< rhs
.fh_hk
.object
)));
152 inline bool operator>(const fh_key
& lhs
, const fh_key
& rhs
)
157 inline bool operator==(const fh_key
& lhs
, const fh_key
& rhs
)
159 return ((lhs
.fh_hk
.bucket
== rhs
.fh_hk
.bucket
) &&
160 (lhs
.fh_hk
.object
== rhs
.fh_hk
.object
));
163 inline bool operator!=(const fh_key
& lhs
, const fh_key
& rhs
)
165 return !(lhs
== rhs
);
168 inline bool operator<=(const fh_key
& lhs
, const fh_key
& rhs
)
170 return (lhs
< rhs
) || (lhs
== rhs
);
173 using boost::variant
;
174 using boost::container::flat_map
;
176 typedef std::tuple
<bool, bool> DecodeAttrsResult
;
178 class RGWFileHandle
: public cohort::lru::Object
180 struct rgw_file_handle fh
;
184 RGWFileHandle
* bucket
;
185 RGWFileHandle
* parent
;
186 /* const */ std::string name
; /* XXX file or bucket name */
187 /* const */ fh_key fhk
;
189 using lock_guard
= std::lock_guard
<std::mutex
>;
190 using unique_lock
= std::unique_lock
<std::mutex
>;
192 /* TODO: keeping just the last marker is sufficient for
193 * nfs-ganesha 2.4.5; in the near future, nfs-ganesha will
194 * be able to hint the name of the next dirent required,
195 * from which we can directly synthesize a RADOS marker.
196 * using marker_cache_t = flat_map<uint64_t, rgw_obj_key>;
203 uint32_t owner_uid
; /* XXX need Unix attr */
204 uint32_t owner_gid
; /* XXX need Unix attr */
206 struct timespec ctime
;
207 struct timespec mtime
;
208 struct timespec atime
;
210 State() : dev(0), size(0), nlink(1), owner_uid(0), owner_gid(0),
211 ctime
{0,0}, mtime
{0,0}, atime
{0,0}, version(0) {}
215 RGWWriteRequest
* write_req
;
216 file() : write_req(nullptr) {}
222 static constexpr uint32_t FLAG_NONE
= 0x0000;
225 rgw_obj_key last_marker
;
226 struct timespec last_readdir
;
228 directory() : flags(FLAG_NONE
), last_readdir
{0,0} {}
233 boost::variant
<file
, directory
> variant_type
;
239 const static std::string root_name
;
241 static constexpr uint16_t MAX_DEPTH
= 256;
243 static constexpr uint32_t FLAG_NONE
= 0x0000;
244 static constexpr uint32_t FLAG_OPEN
= 0x0001;
245 static constexpr uint32_t FLAG_ROOT
= 0x0002;
246 static constexpr uint32_t FLAG_CREATE
= 0x0004;
247 static constexpr uint32_t FLAG_CREATING
= 0x0008;
248 static constexpr uint32_t FLAG_DIRECTORY
= 0x0010;
249 static constexpr uint32_t FLAG_BUCKET
= 0x0020;
250 static constexpr uint32_t FLAG_LOCK
= 0x0040;
251 static constexpr uint32_t FLAG_DELETED
= 0x0080;
252 static constexpr uint32_t FLAG_UNLINK_THIS
= 0x0100;
253 static constexpr uint32_t FLAG_LOCKED
= 0x0200;
254 static constexpr uint32_t FLAG_STATELESS_OPEN
= 0x0400;
255 static constexpr uint32_t FLAG_EXACT_MATCH
= 0x0800;
256 static constexpr uint32_t FLAG_MOUNT
= 0x1000;
258 #define CREATE_FLAGS(x) \
259 ((x) & ~(RGWFileHandle::FLAG_CREATE|RGWFileHandle::FLAG_LOCK))
261 friend class RGWLibFS
;
264 RGWFileHandle(RGWLibFS
* _fs
)
265 : fs(_fs
), bucket(nullptr), parent(nullptr), variant_type
{directory()},
266 depth(0), flags(FLAG_NONE
)
269 fh
.fh_type
= RGW_FS_TYPE_DIRECTORY
;
270 variant_type
= directory();
272 state
.unix_mode
= RGW_RWXMODE
|S_IFDIR
;
273 /* pointer to self */
274 fh
.fh_private
= this;
277 uint64_t init_fsid(std::string
& uid
) {
278 return XXH64(uid
.c_str(), uid
.length(), fh_key::seed
);
281 void init_rootfs(std::string
& fsid
, const std::string
& object_name
,
284 fh
.fh_hk
.bucket
= XXH64(fsid
.c_str(), fsid
.length(), fh_key::seed
);
285 fh
.fh_hk
.object
= XXH64(object_name
.c_str(), object_name
.length(),
290 state
.dev
= init_fsid(fsid
);
293 flags
|= RGWFileHandle::FLAG_BUCKET
| RGWFileHandle::FLAG_MOUNT
;
297 flags
|= RGWFileHandle::FLAG_ROOT
| RGWFileHandle::FLAG_MOUNT
;
302 RGWFileHandle(RGWLibFS
* _fs
, RGWFileHandle
* _parent
,
303 const fh_key
& _fhk
, std::string
& _name
, uint32_t _flags
)
304 : fs(_fs
), bucket(nullptr), parent(_parent
), name(std::move(_name
)),
305 fhk(_fhk
), flags(_flags
) {
307 if (parent
->is_root()) {
308 fh
.fh_type
= RGW_FS_TYPE_DIRECTORY
;
309 variant_type
= directory();
310 flags
|= FLAG_BUCKET
;
312 bucket
= parent
->is_bucket() ? parent
314 if (flags
& FLAG_DIRECTORY
) {
315 fh
.fh_type
= RGW_FS_TYPE_DIRECTORY
;
316 variant_type
= directory();
318 fh
.fh_type
= RGW_FS_TYPE_FILE
;
319 variant_type
= file();
323 depth
= parent
->depth
+ 1;
325 /* save constant fhk */
326 fh
.fh_hk
= fhk
.fh_hk
; /* XXX redundant in fh_hk */
328 /* inherits parent's fsid */
329 state
.dev
= parent
->state
.dev
;
331 switch (fh
.fh_type
) {
332 case RGW_FS_TYPE_DIRECTORY
:
333 state
.unix_mode
= RGW_RWXMODE
|S_IFDIR
;
335 case RGW_FS_TYPE_FILE
:
336 state
.unix_mode
= RGW_RWMODE
|S_IFREG
;
341 /* pointer to self */
342 fh
.fh_private
= this;
345 const fh_key
& get_key() const {
349 directory
* get_directory() {
350 return get
<directory
>(&variant_type
);
353 size_t get_size() const { return state
.size
; }
355 const char* stype() {
356 return is_dir() ? "DIR" : "FILE";
359 uint16_t get_depth() const { return depth
; }
361 struct rgw_file_handle
* get_fh() { return &fh
; }
363 RGWLibFS
* get_fs() { return fs
; }
365 RGWFileHandle
* get_parent() { return parent
; }
367 uint32_t get_owner_uid() const { return state
.owner_uid
; }
368 uint32_t get_owner_gid() const { return state
.owner_gid
; }
370 struct timespec
get_ctime() const { return state
.ctime
; }
371 struct timespec
get_mtime() const { return state
.mtime
; }
373 void create_stat(struct stat
* st
, uint32_t mask
) {
374 if (mask
& RGW_SETATTR_UID
)
375 state
.owner_uid
= st
->st_uid
;
377 if (mask
& RGW_SETATTR_GID
)
378 state
.owner_gid
= st
->st_gid
;
380 if (mask
& RGW_SETATTR_MODE
) {
381 switch (fh
.fh_type
) {
382 case RGW_FS_TYPE_DIRECTORY
:
383 state
.unix_mode
= st
->st_mode
|S_IFDIR
;
385 case RGW_FS_TYPE_FILE
:
386 state
.unix_mode
= st
->st_mode
|S_IFREG
;
392 if (mask
& RGW_SETATTR_ATIME
)
393 state
.atime
= st
->st_atim
;
394 if (mask
& RGW_SETATTR_MTIME
)
395 state
.mtime
= st
->st_mtim
;
396 if (mask
& RGW_SETATTR_CTIME
)
397 state
.ctime
= st
->st_ctim
;
400 int stat(struct stat
* st
) {
401 /* partial Unix attrs */
402 memset(st
, 0, sizeof(struct stat
));
403 st
->st_dev
= state
.dev
;
404 st
->st_ino
= fh
.fh_hk
.object
; // XXX
406 st
->st_uid
= state
.owner_uid
;
407 st
->st_gid
= state
.owner_gid
;
409 st
->st_mode
= state
.unix_mode
;
411 #ifdef HAVE_STAT_ST_MTIMESPEC_TV_NSEC
412 st
->st_atimespec
= state
.atime
;
413 st
->st_mtimespec
= state
.mtime
;
414 st
->st_ctimespec
= state
.ctime
;
416 st
->st_atim
= state
.atime
;
417 st
->st_mtim
= state
.mtime
;
418 st
->st_ctim
= state
.ctime
;
421 switch (fh
.fh_type
) {
422 case RGW_FS_TYPE_DIRECTORY
:
423 st
->st_nlink
= state
.nlink
;
425 case RGW_FS_TYPE_FILE
:
427 st
->st_blksize
= 4096;
428 st
->st_size
= state
.size
;
429 st
->st_blocks
= (state
.size
) / 512;
437 const std::string
& bucket_name() const {
442 return bucket
->object_name();
445 const std::string
& object_name() const { return name
; }
447 std::string
full_object_name(bool omit_bucket
= false) const {
449 std::vector
<const std::string
*> segments
;
451 const RGWFileHandle
* tfh
= this;
452 while (tfh
&& !tfh
->is_root() && !(tfh
->is_bucket() && omit_bucket
)) {
453 segments
.push_back(&tfh
->object_name());
454 reserve
+= (1 + tfh
->object_name().length());
458 path
.reserve(reserve
);
459 for (auto& s
: boost::adaptors::reverse(segments
)) {
463 if (!omit_bucket
&& (path
.front() != '/')) // pretty-print
472 inline std::string
relative_object_name() const {
473 return full_object_name(true /* omit_bucket */);
476 inline std::string
format_child_name(const std::string
& cbasename
,
478 std::string child_name
{relative_object_name()};
479 if ((child_name
.size() > 0) &&
480 (child_name
.back() != '/'))
482 child_name
+= cbasename
;
488 inline std::string
make_key_name(const char *name
) const {
489 std::string key_name
{full_object_name()};
490 if (key_name
.length() > 0)
496 fh_key
make_fhk(const std::string
& name
) const {
498 return fh_key(fhk
.fh_hk
.object
, name
.c_str());
500 std::string key_name
= make_key_name(name
.c_str());
501 return fh_key(fhk
.fh_hk
.bucket
, key_name
.c_str());
505 void add_marker(uint64_t off
, const rgw_obj_key
& marker
,
508 directory
* d
= get
<directory
>(&variant_type
);
510 unique_lock
guard(mtx
);
511 d
->last_marker
= marker
;
515 const rgw_obj_key
* find_marker(uint64_t off
) const {
518 const directory
* d
= get
<directory
>(&variant_type
);
520 return &d
->last_marker
;
526 int offset_of(const std::string
& name
, int64_t *offset
, uint32_t flags
) {
527 if (unlikely(! is_dir())) {
530 *offset
= XXH64(name
.c_str(), name
.length(), fh_key::seed
);
534 bool is_open() const { return flags
& FLAG_OPEN
; }
535 bool is_root() const { return flags
& FLAG_ROOT
; }
536 bool is_mount() const { return flags
& FLAG_MOUNT
; }
537 bool is_bucket() const { return flags
& FLAG_BUCKET
; }
538 bool is_object() const { return !is_bucket(); }
539 bool is_file() const { return (fh
.fh_type
== RGW_FS_TYPE_FILE
); }
540 bool is_dir() const { return (fh
.fh_type
== RGW_FS_TYPE_DIRECTORY
); }
541 bool creating() const { return flags
& FLAG_CREATING
; }
542 bool deleted() const { return flags
& FLAG_DELETED
; }
543 bool stateless_open() const { return flags
& FLAG_STATELESS_OPEN
; }
544 bool has_children() const;
546 int open(uint32_t gsh_flags
) {
547 lock_guard
guard(mtx
);
549 if (gsh_flags
& RGW_OPEN_FLAG_V3
) {
550 flags
|= FLAG_STATELESS_OPEN
;
558 typedef boost::variant
<uint64_t*, const char*> readdir_offset
;
560 int readdir(rgw_readdir_cb rcb
, void *cb_arg
, readdir_offset offset
,
561 bool *eof
, uint32_t flags
);
563 int write(uint64_t off
, size_t len
, size_t *nbytes
, void *buffer
);
565 int commit(uint64_t offset
, uint64_t length
, uint32_t flags
) {
566 /* NFS3 and NFSv4 COMMIT implementation
567 * the current atomic update strategy doesn't actually permit
568 * clients to read-stable until either CLOSE (NFSv4+) or the
569 * expiration of the active write timer (NFS3). In the
570 * interim, the client may send an arbitrary number of COMMIT
571 * operations which must return a success result */
575 int write_finish(uint32_t flags
= FLAG_NONE
);
578 void open_for_create() {
579 lock_guard
guard(mtx
);
580 flags
|= FLAG_CREATING
;
583 void clear_creating() {
584 lock_guard
guard(mtx
);
585 flags
&= ~FLAG_CREATING
;
588 void inc_nlink(const uint64_t n
) {
592 void set_nlink(const uint64_t n
) {
596 void set_size(const size_t size
) {
600 void set_times(real_time t
) {
601 state
.ctime
= real_clock::to_timespec(t
);
602 state
.mtime
= state
.ctime
;
603 state
.atime
= state
.ctime
;
606 void set_ctime(const struct timespec
&ts
) {
610 void set_mtime(const struct timespec
&ts
) {
614 void set_atime(const struct timespec
&ts
) {
618 void encode(buffer::list
& bl
) const {
619 ENCODE_START(2, 1, bl
);
620 ::encode(uint32_t(fh
.fh_type
), bl
);
621 ::encode(state
.dev
, bl
);
622 ::encode(state
.size
, bl
);
623 ::encode(state
.nlink
, bl
);
624 ::encode(state
.owner_uid
, bl
);
625 ::encode(state
.owner_gid
, bl
);
626 ::encode(state
.unix_mode
, bl
);
627 for (const auto& t
: { state
.ctime
, state
.mtime
, state
.atime
}) {
628 ::encode(real_clock::from_timespec(t
), bl
);
630 ::encode((uint32_t)2, bl
);
634 void decode(bufferlist::iterator
& bl
) {
637 ::decode(fh_type
, bl
);
638 assert(fh
.fh_type
== fh_type
);
639 ::decode(state
.dev
, bl
);
640 ::decode(state
.size
, bl
);
641 ::decode(state
.nlink
, bl
);
642 ::decode(state
.owner_uid
, bl
);
643 ::decode(state
.owner_gid
, bl
);
644 ::decode(state
.unix_mode
, bl
);
645 ceph::real_time enc_time
;
646 for (auto t
: { &(state
.ctime
), &(state
.mtime
), &(state
.atime
) }) {
647 ::decode(enc_time
, bl
);
648 *t
= real_clock::to_timespec(enc_time
);
651 ::decode(state
.version
, bl
);
656 void encode_attrs(ceph::buffer::list
& ux_key1
,
657 ceph::buffer::list
& ux_attrs1
);
659 DecodeAttrsResult
decode_attrs(const ceph::buffer::list
* ux_key1
,
660 const ceph::buffer::list
* ux_attrs1
);
664 bool reclaim() override
;
666 typedef cohort::lru::LRU
<std::mutex
> FhLRU
;
670 // for internal ordering
671 bool operator()(const RGWFileHandle
& lhs
, const RGWFileHandle
& rhs
) const
672 { return (lhs
.get_key() < rhs
.get_key()); }
674 // for external search by fh_key
675 bool operator()(const fh_key
& k
, const RGWFileHandle
& fh
) const
676 { return k
< fh
.get_key(); }
678 bool operator()(const RGWFileHandle
& fh
, const fh_key
& k
) const
679 { return fh
.get_key() < k
; }
684 bool operator()(const RGWFileHandle
& lhs
, const RGWFileHandle
& rhs
) const
685 { return (lhs
.get_key() == rhs
.get_key()); }
687 bool operator()(const fh_key
& k
, const RGWFileHandle
& fh
) const
688 { return k
== fh
.get_key(); }
690 bool operator()(const RGWFileHandle
& fh
, const fh_key
& k
) const
691 { return fh
.get_key() == k
; }
694 typedef bi::link_mode
<bi::safe_link
> link_mode
; /* XXX normal */
695 #if defined(FHCACHE_AVL)
696 typedef bi::avl_set_member_hook
<link_mode
> tree_hook_type
;
699 typedef bi::set_member_hook
<link_mode
> tree_hook_type
;
701 tree_hook_type fh_hook
;
703 typedef bi::member_hook
<
704 RGWFileHandle
, tree_hook_type
, &RGWFileHandle::fh_hook
> FhHook
;
706 #if defined(FHCACHE_AVL)
707 typedef bi::avltree
<RGWFileHandle
, bi::compare
<FhLT
>, FhHook
> FHTree
;
709 typedef bi::rbtree
<RGWFileHandle
, bi::compare
<FhLT
>, FhHook
> FhTree
;
711 typedef cohort::lru::TreeX
<RGWFileHandle
, FhTree
, FhLT
, FhEQ
, fh_key
,
714 ~RGWFileHandle() override
;
716 friend std::ostream
& operator<<(std::ostream
&os
,
717 RGWFileHandle
const &rgw_fh
);
719 class Factory
: public cohort::lru::ObjectFactory
723 RGWFileHandle
* parent
;
730 Factory(RGWLibFS
* _fs
, RGWFileHandle
* _parent
,
731 const fh_key
& _fhk
, std::string
& _name
, uint32_t _flags
)
732 : fs(_fs
), parent(_parent
), fhk(_fhk
), name(_name
),
735 void recycle (cohort::lru::Object
* o
) override
{
736 /* re-use an existing object */
737 o
->~Object(); // call lru::Object virtual dtor
739 new (o
) RGWFileHandle(fs
, parent
, fhk
, name
, flags
);
742 cohort::lru::Object
* alloc() override
{
743 return new RGWFileHandle(fs
, parent
, fhk
, name
, flags
);
747 }; /* RGWFileHandle */
749 WRITE_CLASS_ENCODER(RGWFileHandle
);
751 static inline RGWFileHandle
* get_rgwfh(struct rgw_file_handle
* fh
) {
752 return static_cast<RGWFileHandle
*>(fh
->fh_private
);
755 static inline enum rgw_fh_type
fh_type_of(uint32_t flags
) {
756 enum rgw_fh_type fh_type
;
757 switch(flags
& RGW_LOOKUP_TYPE_FLAGS
)
759 case RGW_LOOKUP_FLAG_DIR
:
760 fh_type
= RGW_FS_TYPE_DIRECTORY
;
762 case RGW_LOOKUP_FLAG_FILE
:
763 fh_type
= RGW_FS_TYPE_FILE
;
766 fh_type
= RGW_FS_TYPE_NIL
;
771 typedef std::tuple
<RGWFileHandle
*, uint32_t> LookupFHResult
;
772 typedef std::tuple
<RGWFileHandle
*, int> MkObjResult
;
778 RGWFileHandle root_fh
;
779 rgw_fh_callback_t invalidate_cb
;
780 void *invalidate_arg
;
783 mutable std::atomic
<uint64_t> refcnt
;
785 RGWFileHandle::FHCache fh_cache
;
786 RGWFileHandle::FhLRU fh_lru
;
788 std::string uid
; // should match user.user_id, iiuc
791 RGWAccessKey key
; // XXXX acc_key
793 static std::atomic
<uint32_t> fs_inst_counter
;
795 static uint32_t write_completion_interval_s
;
797 using lock_guard
= std::lock_guard
<std::mutex
>;
798 using unique_lock
= std::unique_lock
<std::mutex
>;
802 enum class type
: uint8_t { READDIR
} ;
806 event(type t
, const fh_key
& k
, const struct timespec
& ts
)
807 : t(t
), fhk(k
), ts(ts
) {}
810 friend std::ostream
& operator<<(std::ostream
&os
,
811 RGWLibFS::event
const &ev
);
813 using event_vector
= /* boost::small_vector<event, 16> */
816 struct WriteCompletion
818 RGWFileHandle
& rgw_fh
;
820 WriteCompletion(RGWFileHandle
& _fh
) : rgw_fh(_fh
) {
821 rgw_fh
.get_fs()->ref(&rgw_fh
);
825 rgw_fh
.close(); /* will finish in-progress write */
826 rgw_fh
.get_fs()->unref(&rgw_fh
);
830 static ceph::timer
<ceph::mono_clock
> write_timer
;
834 std::atomic
<uint32_t> flags
;
835 std::deque
<event
> events
;
837 State() : flags(0) {}
839 void push_event(const event
& ev
) {
840 events
.push_back(ev
);
844 uint32_t new_inst() {
845 return ++fs_inst_counter
;
848 friend class RGWFileHandle
;
849 friend class RGWLibProcess
;
853 static constexpr uint32_t FLAG_NONE
= 0x0000;
854 static constexpr uint32_t FLAG_CLOSED
= 0x0001;
859 real_time creation_time
;
860 uint64_t num_entries
;
863 RGWLibFS(CephContext
* _cct
, const char *_uid
, const char *_user_id
,
864 const char* _key
, const char *root
)
865 : cct(_cct
), root_fh(this), invalidate_cb(nullptr),
866 invalidate_arg(nullptr), shutdown(false), refcnt(1),
867 fh_cache(cct
->_conf
->rgw_nfs_fhcache_partitions
,
868 cct
->_conf
->rgw_nfs_fhcache_size
),
869 fh_lru(cct
->_conf
->rgw_nfs_lru_lanes
,
870 cct
->_conf
->rgw_nfs_lru_lane_hiwat
),
871 uid(_uid
), key(_user_id
, _key
) {
873 if (!root
|| !strcmp(root
, "/")) {
874 root_fh
.init_rootfs(uid
, RGWFileHandle::root_name
, false);
876 root_fh
.init_rootfs(uid
, root
, true);
879 /* pointer to self */
880 fs
.fs_private
= this;
882 /* expose public root fh */
883 fs
.root_fh
= root_fh
.get_fh();
888 friend void intrusive_ptr_add_ref(const RGWLibFS
* fs
) {
889 fs
->refcnt
.fetch_add(1, std::memory_order_relaxed
);
892 friend void intrusive_ptr_release(const RGWLibFS
* fs
) {
893 if (fs
->refcnt
.fetch_sub(1, std::memory_order_release
) == 0) {
894 std::atomic_thread_fence(std::memory_order_acquire
);
900 intrusive_ptr_add_ref(this);
905 intrusive_ptr_release(this);
908 void stop() { shutdown
= true; }
910 void release_evict(RGWFileHandle
* fh
) {
911 /* remove from cache, releases sentinel ref */
912 fh_cache
.remove(fh
->fh
.fh_hk
.object
, fh
,
913 RGWFileHandle::FHCache::FLAG_LOCK
);
914 /* release call-path ref */
915 (void) fh_lru
.unref(fh
, cohort::lru::FLAG_NONE
);
918 int authorize(RGWRados
* store
) {
919 int ret
= rgw_get_user_info_by_access_key(store
, key
.id
, user
);
921 RGWAccessKey
* key0
= user
.get_key0();
923 (key0
->key
!= key
.key
))
926 return -ERR_USER_SUSPENDED
;
928 /* try external authenticators (ldap for now) */
929 rgw::LDAPHelper
* ldh
= rgwlib
.get_ldh(); /* !nullptr */
931 /* boost filters and/or string_ref may throw on invalid input */
933 token
= rgw::from_base64(key
.id
);
935 token
= std::string("");
937 if (token
.valid() && (ldh
->auth(token
.id
, token
.key
) == 0)) {
938 /* try to store user if it doesn't already exist */
939 if (rgw_get_user_info_by_uid(store
, token
.id
, user
) < 0) {
940 int ret
= rgw_store_user_info(store
, user
, NULL
, NULL
, real_time(),
943 lsubdout(get_context(), rgw
, 10)
944 << "NOTICE: failed to store new user's info: ret=" << ret
953 int register_invalidate(rgw_fh_callback_t cb
, void *arg
, uint32_t flags
) {
955 invalidate_arg
= arg
;
959 /* find RGWFileHandle by id */
960 LookupFHResult
lookup_fh(const fh_key
& fhk
,
961 const uint32_t flags
= RGWFileHandle::FLAG_NONE
) {
964 // cast int32_t(RGWFileHandle::FLAG_NONE) due to strictness of Clang
965 // the cast transfers a lvalue into a rvalue in the ctor
966 // check the commit message for the full details
967 LookupFHResult fhr
{ nullptr, uint32_t(RGWFileHandle::FLAG_NONE
) };
969 RGWFileHandle::FHCache::Latch lat
;
970 bool fh_locked
= flags
& RGWFileHandle::FLAG_LOCKED
;
974 fh_cache
.find_latch(fhk
.fh_hk
.object
/* partition selector*/,
975 fhk
/* key */, lat
/* serializer */,
976 RGWFileHandle::FHCache::FLAG_LOCK
);
979 if (likely(! fh_locked
))
980 fh
->mtx
.lock(); // XXX !RAII because may-return-LOCKED
981 /* need initial ref from LRU (fast path) */
982 if (! fh_lru
.ref(fh
, cohort::lru::FLAG_INITIAL
)) {
984 if (likely(! fh_locked
))
986 goto retry
; /* !LATCHED */
988 /* LATCHED, LOCKED */
989 if (! (flags
& RGWFileHandle::FLAG_LOCK
))
990 fh
->mtx
.unlock(); /* ! LOCKED */
992 lat
.lock
->unlock(); /* !LATCHED */
995 lsubdout(get_context(), rgw
, 17)
996 << __func__
<< " 1 " << *fh
1000 } /* lookup_fh(const fh_key&) */
1002 /* find or create an RGWFileHandle */
1003 LookupFHResult
lookup_fh(RGWFileHandle
* parent
, const char *name
,
1004 const uint32_t flags
= RGWFileHandle::FLAG_NONE
) {
1007 // cast int32_t(RGWFileHandle::FLAG_NONE) due to strictness of Clang
1008 // the cast transfers a lvalue into a rvalue in the ctor
1009 // check the commit message for the full details
1010 LookupFHResult fhr
{ nullptr, uint32_t(RGWFileHandle::FLAG_NONE
) };
1012 /* mount is stale? */
1013 if (state
.flags
& FLAG_CLOSED
)
1016 RGWFileHandle::FHCache::Latch lat
;
1017 bool fh_locked
= flags
& RGWFileHandle::FLAG_LOCKED
;
1019 std::string obj_name
{name
};
1020 std::string key_name
{parent
->make_key_name(name
)};
1022 lsubdout(get_context(), rgw
, 10)
1023 << __func__
<< " lookup called on "
1024 << parent
->object_name() << " for " << key_name
1025 << " (" << obj_name
<< ")"
1028 fh_key fhk
= parent
->make_fhk(obj_name
);
1032 fh_cache
.find_latch(fhk
.fh_hk
.object
/* partition selector*/,
1033 fhk
/* key */, lat
/* serializer */,
1034 RGWFileHandle::FHCache::FLAG_LOCK
);
1037 if (likely(! fh_locked
))
1038 fh
->mtx
.lock(); // XXX !RAII because may-return-LOCKED
1039 if (fh
->flags
& RGWFileHandle::FLAG_DELETED
) {
1040 /* for now, delay briefly and retry */
1042 if (likely(! fh_locked
))
1044 std::this_thread::sleep_for(std::chrono::milliseconds(20));
1045 goto retry
; /* !LATCHED */
1047 /* need initial ref from LRU (fast path) */
1048 if (! fh_lru
.ref(fh
, cohort::lru::FLAG_INITIAL
)) {
1050 if (likely(! fh_locked
))
1052 goto retry
; /* !LATCHED */
1054 /* LATCHED, LOCKED */
1055 if (! (flags
& RGWFileHandle::FLAG_LOCK
))
1056 if (likely(! fh_locked
))
1057 fh
->mtx
.unlock(); /* ! LOCKED */
1059 /* make or re-use handle */
1060 RGWFileHandle::Factory
prototype(this, parent
, fhk
,
1061 obj_name
, CREATE_FLAGS(flags
));
1062 uint32_t iflags
{cohort::lru::FLAG_INITIAL
};
1063 fh
= static_cast<RGWFileHandle
*>(
1064 fh_lru
.insert(&prototype
,
1065 cohort::lru::Edge::MRU
,
1068 /* lock fh (LATCHED) */
1069 if (flags
& RGWFileHandle::FLAG_LOCK
)
1071 if (likely(! (iflags
& cohort::lru::FLAG_RECYCLE
))) {
1072 /* inserts at cached insert iterator, releasing latch */
1073 fh_cache
.insert_latched(
1074 fh
, lat
, RGWFileHandle::FHCache::FLAG_UNLOCK
);
1076 /* recycle step invalidates Latch */
1078 fhk
.fh_hk
.object
, fh
, RGWFileHandle::FHCache::FLAG_NONE
);
1079 lat
.lock
->unlock(); /* !LATCHED */
1081 get
<1>(fhr
) |= RGWFileHandle::FLAG_CREATE
;
1082 /* ref parent (non-initial ref cannot fail on valid object) */
1083 if (! parent
->is_mount()) {
1084 (void) fh_lru
.ref(parent
, cohort::lru::FLAG_NONE
);
1086 goto out
; /* !LATCHED */
1089 goto retry
; /* !LATCHED */
1092 lat
.lock
->unlock(); /* !LATCHED */
1096 lsubdout(get_context(), rgw
, 17)
1097 << __func__
<< " 2 " << *fh
1101 } /* lookup_fh(RGWFileHandle*, const char *, const uint32_t) */
1103 inline void unref(RGWFileHandle
* fh
) {
1104 if (likely(! fh
->is_mount())) {
1105 (void) fh_lru
.unref(fh
, cohort::lru::FLAG_NONE
);
1109 inline RGWFileHandle
* ref(RGWFileHandle
* fh
) {
1110 if (likely(! fh
->is_mount())) {
1111 fh_lru
.ref(fh
, cohort::lru::FLAG_NONE
);
1116 int getattr(RGWFileHandle
* rgw_fh
, struct stat
* st
);
1118 int setattr(RGWFileHandle
* rgw_fh
, struct stat
* st
, uint32_t mask
,
1121 void update_fh(RGWFileHandle
*rgw_fh
);
1123 LookupFHResult
stat_bucket(RGWFileHandle
* parent
, const char *path
,
1124 RGWLibFS::BucketStats
& bs
,
1127 LookupFHResult
stat_leaf(RGWFileHandle
* parent
, const char *path
,
1128 enum rgw_fh_type type
= RGW_FS_TYPE_NIL
,
1129 uint32_t flags
= RGWFileHandle::FLAG_NONE
);
1131 int read(RGWFileHandle
* rgw_fh
, uint64_t offset
, size_t length
,
1132 size_t* bytes_read
, void* buffer
, uint32_t flags
);
1134 int rename(RGWFileHandle
* old_fh
, RGWFileHandle
* new_fh
,
1135 const char *old_name
, const char *new_name
);
1137 MkObjResult
create(RGWFileHandle
* parent
, const char *name
, struct stat
*st
,
1138 uint32_t mask
, uint32_t flags
);
1140 MkObjResult
mkdir(RGWFileHandle
* parent
, const char *name
, struct stat
*st
,
1141 uint32_t mask
, uint32_t flags
);
1143 int unlink(RGWFileHandle
* rgw_fh
, const char *name
,
1144 uint32_t flags
= FLAG_NONE
);
1146 /* find existing RGWFileHandle */
1147 RGWFileHandle
* lookup_handle(struct rgw_fh_hk fh_hk
) {
1149 if (state
.flags
& FLAG_CLOSED
)
1152 RGWFileHandle::FHCache::Latch lat
;
1157 fh_cache
.find_latch(fhk
.fh_hk
.object
/* partition selector*/,
1158 fhk
/* key */, lat
/* serializer */,
1159 RGWFileHandle::FHCache::FLAG_LOCK
);
1162 lsubdout(get_context(), rgw
, 0)
1163 << __func__
<< " handle lookup failed <"
1164 << fhk
.fh_hk
.bucket
<< "," << fhk
.fh_hk
.object
<< ">"
1165 << "(need persistent handles)"
1170 if (fh
->flags
& RGWFileHandle::FLAG_DELETED
) {
1171 /* for now, delay briefly and retry */
1173 fh
->mtx
.unlock(); /* !LOCKED */
1174 std::this_thread::sleep_for(std::chrono::milliseconds(20));
1175 goto retry
; /* !LATCHED */
1177 if (! fh_lru
.ref(fh
, cohort::lru::FLAG_INITIAL
)) {
1180 goto retry
; /* !LATCHED */
1183 fh
->mtx
.unlock(); /* !LOCKED */
1185 lat
.lock
->unlock(); /* !LATCHED */
1187 /* special case: lookup root_fh */
1189 if (unlikely(fh_hk
== root_fh
.fh
.fh_hk
)) {
1197 CephContext
* get_context() {
1201 struct rgw_fs
* get_fs() { return &fs
; }
1203 uint64_t get_fsid() { return root_fh
.state
.dev
; }
1205 RGWUserInfo
* get_user() { return &user
; }
1211 static inline std::string
make_uri(const std::string
& bucket_name
,
1212 const std::string
& object_name
) {
1213 std::string
uri("/");
1214 uri
.reserve(bucket_name
.length() + object_name
.length() + 2);
1222 read directory content (buckets)
1225 class RGWListBucketsRequest
: public RGWLibRequest
,
1226 public RGWListBuckets
/* RGWOp */
1229 RGWFileHandle
* rgw_fh
;
1230 RGWFileHandle::readdir_offset offset
;
1237 RGWListBucketsRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1238 RGWFileHandle
* _rgw_fh
, rgw_readdir_cb _rcb
,
1239 void* _cb_arg
, RGWFileHandle::readdir_offset
& _offset
)
1240 : RGWLibRequest(_cct
, _user
), rgw_fh(_rgw_fh
), offset(_offset
),
1241 cb_arg(_cb_arg
), rcb(_rcb
), ioff(nullptr), ix(0), d_count(0) {
1245 if (unlikely(!! get
<uint64_t*>(&offset
))) {
1246 ioff
= get
<uint64_t*>(offset
);
1247 const auto& mk
= rgw_fh
->find_marker(*ioff
);
1252 const char* mk
= get
<const char*>(offset
);
1260 bool only_bucket() override
{ return false; }
1262 int op_init() override
{
1263 // assign store, s, and dialect_handler
1264 RGWObjectCtx
* rados_ctx
1265 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1266 // framework promises to call op_init after parent init
1268 RGWOp::init(rados_ctx
->store
, get_state(), this);
1269 op
= this; // assign self as op: REQUIRED
1273 int header_init() override
{
1274 struct req_state
* s
= get_state();
1275 s
->info
.method
= "GET";
1278 /* XXX derp derp derp */
1279 s
->relative_uri
= "/";
1280 s
->info
.request_uri
= "/"; // XXX
1281 s
->info
.effective_uri
= "/";
1282 s
->info
.request_params
= "";
1283 s
->info
.domain
= ""; /* XXX ? */
1291 int get_params() override
{
1292 limit
= -1; /* no limit */
1296 void send_response_begin(bool has_buckets
) override
{
1300 void send_response_data(RGWUserBuckets
& buckets
) override
{
1303 map
<string
, RGWBucketEnt
>& m
= buckets
.get_buckets();
1304 for (const auto& iter
: m
) {
1305 boost::string_ref marker
{iter
.first
};
1306 const RGWBucketEnt
& ent
= iter
.second
;
1307 if (! this->operator()(ent
.bucket
.name
, marker
)) {
1308 /* caller cannot accept more */
1309 lsubdout(cct
, rgw
, 5) << "ListBuckets rcb failed"
1310 << " dirent=" << ent
.bucket
.name
1311 << " call count=" << ix
1317 } /* send_response_data */
1319 void send_response_end() override
{
1323 int operator()(const boost::string_ref
& name
,
1324 const boost::string_ref
& marker
) {
1325 uint64_t off
= XXH64(name
.data(), name
.length(), fh_key::seed
);
1329 /* update traversal cache */
1330 rgw_fh
->add_marker(off
, rgw_obj_key
{marker
.data(), ""},
1331 RGW_FS_TYPE_DIRECTORY
);
1333 return rcb(name
.data(), cb_arg
, off
, RGW_LOOKUP_FLAG_DIR
);
1337 lsubdout(cct
, rgw
, 15) << "READDIR offset: " << offset
1338 << " is_truncated: " << is_truncated
1340 return !is_truncated
;
1343 }; /* RGWListBucketsRequest */
1346 read directory content (bucket objects)
1349 class RGWReaddirRequest
: public RGWLibRequest
,
1350 public RGWListBucket
/* RGWOp */
1353 RGWFileHandle
* rgw_fh
;
1354 RGWFileHandle::readdir_offset offset
;
1361 RGWReaddirRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1362 RGWFileHandle
* _rgw_fh
, rgw_readdir_cb _rcb
,
1363 void* _cb_arg
, RGWFileHandle::readdir_offset
& _offset
)
1364 : RGWLibRequest(_cct
, _user
), rgw_fh(_rgw_fh
), offset(_offset
),
1365 cb_arg(_cb_arg
), rcb(_rcb
), ioff(nullptr), ix(0), d_count(0) {
1369 if (unlikely(!! get
<uint64_t*>(&offset
))) {
1370 ioff
= get
<uint64_t*>(offset
);
1371 const auto& mk
= rgw_fh
->find_marker(*ioff
);
1376 const char* mk
= get
<const char*>(offset
);
1378 std::string tmark
{rgw_fh
->relative_object_name()};
1381 marker
= rgw_obj_key
{std::move(tmark
), "", ""};
1385 default_max
= 1000; // XXX was being omitted
1389 bool only_bucket() override
{ return true; }
1391 int op_init() override
{
1392 // assign store, s, and dialect_handler
1393 RGWObjectCtx
* rados_ctx
1394 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1395 // framework promises to call op_init after parent init
1397 RGWOp::init(rados_ctx
->store
, get_state(), this);
1398 op
= this; // assign self as op: REQUIRED
1402 int header_init() override
{
1403 struct req_state
* s
= get_state();
1404 s
->info
.method
= "GET";
1407 /* XXX derp derp derp */
1408 std::string uri
= "/" + rgw_fh
->bucket_name() + "/";
1409 s
->relative_uri
= uri
;
1410 s
->info
.request_uri
= uri
; // XXX
1411 s
->info
.effective_uri
= uri
;
1412 s
->info
.request_params
= "";
1413 s
->info
.domain
= ""; /* XXX ? */
1418 prefix
= rgw_fh
->relative_object_name();
1419 if (prefix
.length() > 0)
1426 int operator()(const boost::string_ref name
, const rgw_obj_key
& marker
,
1429 assert(name
.length() > 0); // XXX
1431 /* hash offset of name in parent (short name) for NFS readdir cookie */
1432 uint64_t off
= XXH64(name
.data(), name
.length(), fh_key::seed
);
1433 if (unlikely(!! ioff
)) {
1436 /* update traversal cache */
1437 rgw_fh
->add_marker(off
, marker
, type
);
1439 return rcb(name
.data(), cb_arg
, off
,
1440 (type
== RGW_FS_TYPE_DIRECTORY
) ?
1441 RGW_LOOKUP_FLAG_DIR
:
1442 RGW_LOOKUP_FLAG_FILE
);
1445 int get_params() override
{
1450 void send_response() override
{
1451 struct req_state
* s
= get_state();
1452 for (const auto& iter
: objs
) {
1454 boost::string_ref sref
{iter
.key
.name
};
1456 lsubdout(cct
, rgw
, 15) << "readdir objects prefix: " << prefix
1457 << " obj: " << sref
<< dendl
;
1459 size_t last_del
= sref
.find_last_of('/');
1460 if (last_del
!= string::npos
)
1461 sref
.remove_prefix(last_del
+1);
1463 /* leaf directory? */
1467 lsubdout(cct
, rgw
, 15) << "RGWReaddirRequest "
1469 << "list uri=" << s
->relative_uri
<< " "
1470 << " prefix=" << prefix
<< " "
1471 << " obj path=" << iter
.key
.name
1472 << " (" << sref
<< ")" << ""
1475 if(! this->operator()(sref
, next_marker
, RGW_FS_TYPE_FILE
)) {
1476 /* caller cannot accept more */
1477 lsubdout(cct
, rgw
, 5) << "readdir rcb failed"
1478 << " dirent=" << sref
.data()
1479 << " call count=" << ix
1485 for (auto& iter
: common_prefixes
) {
1487 lsubdout(cct
, rgw
, 15) << "readdir common prefixes prefix: " << prefix
1488 << " iter first: " << iter
.first
1489 << " iter second: " << iter
.second
1492 /* XXX aieee--I have seen this case! */
1493 if (iter
.first
== "/")
1496 /* it's safest to modify the element in place--a suffix-modifying
1497 * string_ref operation is problematic since ULP rgw_file callers
1498 * will ultimately need a c-string */
1499 if (iter
.first
.back() == '/')
1500 const_cast<std::string
&>(iter
.first
).pop_back();
1502 boost::string_ref sref
{iter
.first
};
1504 size_t last_del
= sref
.find_last_of('/');
1505 if (last_del
!= string::npos
)
1506 sref
.remove_prefix(last_del
+1);
1508 lsubdout(cct
, rgw
, 15) << "RGWReaddirRequest "
1510 << "list uri=" << s
->relative_uri
<< " "
1511 << " prefix=" << prefix
<< " "
1512 << " cpref=" << sref
1515 this->operator()(sref
, next_marker
, RGW_FS_TYPE_DIRECTORY
);
1520 virtual void send_versioned_response() {
1525 lsubdout(cct
, rgw
, 15) << "READDIR offset: " << offset
1526 << " next marker: " << next_marker
1527 << " is_truncated: " << is_truncated
1529 return !is_truncated
;
1532 }; /* RGWReaddirRequest */
1535 dir has-children predicate (bucket objects)
1538 class RGWRMdirCheck
: public RGWLibRequest
,
1539 public RGWListBucket
/* RGWOp */
1542 const RGWFileHandle
* rgw_fh
;
1546 RGWRMdirCheck (CephContext
* _cct
, RGWUserInfo
*_user
,
1547 const RGWFileHandle
* _rgw_fh
)
1548 : RGWLibRequest(_cct
, _user
), rgw_fh(_rgw_fh
), valid(false),
1549 has_children(false) {
1554 bool only_bucket() override
{ return true; }
1556 int op_init() override
{
1557 // assign store, s, and dialect_handler
1558 RGWObjectCtx
* rados_ctx
1559 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1560 // framework promises to call op_init after parent init
1562 RGWOp::init(rados_ctx
->store
, get_state(), this);
1563 op
= this; // assign self as op: REQUIRED
1567 int header_init() override
{
1568 struct req_state
* s
= get_state();
1569 s
->info
.method
= "GET";
1572 std::string uri
= "/" + rgw_fh
->bucket_name() + "/";
1573 s
->relative_uri
= uri
;
1574 s
->info
.request_uri
= uri
;
1575 s
->info
.effective_uri
= uri
;
1576 s
->info
.request_params
= "";
1577 s
->info
.domain
= ""; /* XXX ? */
1581 prefix
= rgw_fh
->relative_object_name();
1582 if (prefix
.length() > 0)
1589 int get_params() override
{
1594 void send_response() override
{
1596 if ((objs
.size() > 1) ||
1598 (objs
.front().key
.name
!= prefix
))) {
1599 has_children
= true;
1602 for (auto& iter
: common_prefixes
) {
1603 /* readdir never produces a name for this case */
1604 if (iter
.first
== "/")
1606 has_children
= true;
1611 virtual void send_versioned_response() {
1615 }; /* RGWRMdirCheck */
1621 class RGWCreateBucketRequest
: public RGWLibRequest
,
1622 public RGWCreateBucket
/* RGWOp */
1625 const std::string
& bucket_name
;
1627 RGWCreateBucketRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1628 std::string
& _bname
)
1629 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
) {
1633 bool only_bucket() override
{ return false; }
1635 int read_permissions(RGWOp
* op_obj
) override
{
1636 /* we ARE a 'create bucket' request (cf. rgw_rest.cc, ll. 1305-6) */
1640 int op_init() override
{
1641 // assign store, s, and dialect_handler
1642 RGWObjectCtx
* rados_ctx
1643 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1644 // framework promises to call op_init after parent init
1646 RGWOp::init(rados_ctx
->store
, get_state(), this);
1647 op
= this; // assign self as op: REQUIRED
1651 int header_init() override
{
1653 struct req_state
* s
= get_state();
1654 s
->info
.method
= "PUT";
1657 string uri
= "/" + bucket_name
;
1658 /* XXX derp derp derp */
1659 s
->relative_uri
= uri
;
1660 s
->info
.request_uri
= uri
; // XXX
1661 s
->info
.effective_uri
= uri
;
1662 s
->info
.request_params
= "";
1663 s
->info
.domain
= ""; /* XXX ? */
1671 int get_params() override
{
1672 struct req_state
* s
= get_state();
1673 RGWAccessControlPolicy_S3
s3policy(s
->cct
);
1674 /* we don't have (any) headers, so just create canned ACLs */
1675 int ret
= s3policy
.create_canned(s
->owner
, s
->bucket_owner
, s
->canned_acl
);
1680 void send_response() override
{
1681 /* TODO: something (maybe) */
1683 }; /* RGWCreateBucketRequest */
1689 class RGWDeleteBucketRequest
: public RGWLibRequest
,
1690 public RGWDeleteBucket
/* RGWOp */
1693 const std::string
& bucket_name
;
1695 RGWDeleteBucketRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1696 std::string
& _bname
)
1697 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
) {
1701 bool only_bucket() override
{ return true; }
1703 int op_init() override
{
1704 // assign store, s, and dialect_handler
1705 RGWObjectCtx
* rados_ctx
1706 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1707 // framework promises to call op_init after parent init
1709 RGWOp::init(rados_ctx
->store
, get_state(), this);
1710 op
= this; // assign self as op: REQUIRED
1714 int header_init() override
{
1716 struct req_state
* s
= get_state();
1717 s
->info
.method
= "DELETE";
1720 string uri
= "/" + bucket_name
;
1721 /* XXX derp derp derp */
1722 s
->relative_uri
= uri
;
1723 s
->info
.request_uri
= uri
; // XXX
1724 s
->info
.effective_uri
= uri
;
1725 s
->info
.request_params
= "";
1726 s
->info
.domain
= ""; /* XXX ? */
1734 void send_response() override
{}
1736 }; /* RGWDeleteBucketRequest */
1741 class RGWPutObjRequest
: public RGWLibRequest
,
1742 public RGWPutObj
/* RGWOp */
1745 const std::string
& bucket_name
;
1746 const std::string
& obj_name
;
1747 buffer::list
& bl
; /* XXX */
1748 size_t bytes_written
;
1750 RGWPutObjRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1751 const std::string
& _bname
, const std::string
& _oname
,
1753 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
), obj_name(_oname
),
1754 bl(_bl
), bytes_written(0) {
1758 bool only_bucket() override
{ return true; }
1760 int op_init() override
{
1761 // assign store, s, and dialect_handler
1762 RGWObjectCtx
* rados_ctx
1763 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1764 // framework promises to call op_init after parent init
1766 RGWOp::init(rados_ctx
->store
, get_state(), this);
1767 op
= this; // assign self as op: REQUIRED
1769 int rc
= valid_s3_object_name(obj_name
);
1776 int header_init() override
{
1778 struct req_state
* s
= get_state();
1779 s
->info
.method
= "PUT";
1782 /* XXX derp derp derp */
1783 std::string uri
= make_uri(bucket_name
, obj_name
);
1784 s
->relative_uri
= uri
;
1785 s
->info
.request_uri
= uri
; // XXX
1786 s
->info
.effective_uri
= uri
;
1787 s
->info
.request_params
= "";
1788 s
->info
.domain
= ""; /* XXX ? */
1790 /* XXX required in RGWOp::execute() */
1791 s
->content_length
= bl
.length();
1799 int get_params() override
{
1800 struct req_state
* s
= get_state();
1801 RGWAccessControlPolicy_S3
s3policy(s
->cct
);
1802 /* we don't have (any) headers, so just create canned ACLs */
1803 int ret
= s3policy
.create_canned(s
->owner
, s
->bucket_owner
, s
->canned_acl
);
1808 int get_data(buffer::list
& _bl
) override
{
1809 /* XXX for now, use sharing semantics */
1811 uint32_t len
= _bl
.length();
1812 bytes_written
+= len
;
1816 void send_response() override
{}
1818 int verify_params() override
{
1819 if (bl
.length() > cct
->_conf
->rgw_max_put_size
)
1820 return -ERR_TOO_LARGE
;
1824 }; /* RGWPutObjRequest */
1830 class RGWReadRequest
: public RGWLibRequest
,
1831 public RGWGetObj
/* RGWOp */
1834 RGWFileHandle
* rgw_fh
;
1837 size_t read_resid
; /* initialize to len, <= sizeof(ulp_buffer) */
1838 bool do_hexdump
= false;
1840 RGWReadRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1841 RGWFileHandle
* _rgw_fh
, uint64_t off
, uint64_t len
,
1843 : RGWLibRequest(_cct
, _user
), rgw_fh(_rgw_fh
), ulp_buffer(_ulp_buffer
),
1844 nread(0), read_resid(len
) {
1847 /* fixup RGWGetObj (already know range parameters) */
1848 RGWGetObj::range_parsed
= true;
1849 RGWGetObj::get_data
= true; // XXX
1850 RGWGetObj::partial_content
= true;
1851 RGWGetObj::ofs
= off
;
1852 RGWGetObj::end
= off
+ len
;
1855 bool only_bucket() override
{ return false; }
1857 int op_init() override
{
1858 // assign store, s, and dialect_handler
1859 RGWObjectCtx
* rados_ctx
1860 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1861 // framework promises to call op_init after parent init
1863 RGWOp::init(rados_ctx
->store
, get_state(), this);
1864 op
= this; // assign self as op: REQUIRED
1868 int header_init() override
{
1870 struct req_state
* s
= get_state();
1871 s
->info
.method
= "GET";
1874 /* XXX derp derp derp */
1875 s
->relative_uri
= make_uri(rgw_fh
->bucket_name(),
1876 rgw_fh
->relative_object_name());
1877 s
->info
.request_uri
= s
->relative_uri
; // XXX
1878 s
->info
.effective_uri
= s
->relative_uri
;
1879 s
->info
.request_params
= "";
1880 s
->info
.domain
= ""; /* XXX ? */
1888 int get_params() override
{
1892 int send_response_data(ceph::buffer::list
& bl
, off_t bl_off
,
1893 off_t bl_len
) override
{
1895 for (auto& bp
: bl
.buffers()) {
1896 /* if for some reason bl_off indicates the start-of-data is not at
1897 * the current buffer::ptr, skip it and account */
1898 if (bl_off
> bp
.length()) {
1899 bl_off
-= bp
.length();
1902 /* read no more than read_resid */
1903 bytes
= std::min(read_resid
, size_t(bp
.length()-bl_off
));
1904 memcpy(static_cast<char*>(ulp_buffer
)+nread
, bp
.c_str()+bl_off
, bytes
);
1905 read_resid
-= bytes
; /* reduce read_resid by bytes read */
1908 /* stop if we have no residual ulp_buffer */
1915 int send_response_data_error() override
{
1916 /* S3 implementation just sends nothing--there is no side effect
1917 * to simulate here */
1921 }; /* RGWReadRequest */
1927 class RGWDeleteObjRequest
: public RGWLibRequest
,
1928 public RGWDeleteObj
/* RGWOp */
1931 const std::string
& bucket_name
;
1932 const std::string
& obj_name
;
1934 RGWDeleteObjRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1935 const std::string
& _bname
, const std::string
& _oname
)
1936 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
), obj_name(_oname
) {
1940 bool only_bucket() override
{ return true; }
1942 int op_init() override
{
1943 // assign store, s, and dialect_handler
1944 RGWObjectCtx
* rados_ctx
1945 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
1946 // framework promises to call op_init after parent init
1948 RGWOp::init(rados_ctx
->store
, get_state(), this);
1949 op
= this; // assign self as op: REQUIRED
1953 int header_init() override
{
1955 struct req_state
* s
= get_state();
1956 s
->info
.method
= "DELETE";
1959 /* XXX derp derp derp */
1960 std::string uri
= make_uri(bucket_name
, obj_name
);
1961 s
->relative_uri
= uri
;
1962 s
->info
.request_uri
= uri
; // XXX
1963 s
->info
.effective_uri
= uri
;
1964 s
->info
.request_params
= "";
1965 s
->info
.domain
= ""; /* XXX ? */
1973 void send_response() override
{}
1975 }; /* RGWDeleteObjRequest */
1977 class RGWStatObjRequest
: public RGWLibRequest
,
1978 public RGWGetObj
/* RGWOp */
1981 const std::string
& bucket_name
;
1982 const std::string
& obj_name
;
1986 static constexpr uint32_t FLAG_NONE
= 0x000;
1988 RGWStatObjRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
1989 const std::string
& _bname
, const std::string
& _oname
,
1991 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
), obj_name(_oname
),
1992 _size(0), flags(_flags
) {
1995 /* fixup RGWGetObj (already know range parameters) */
1996 RGWGetObj::range_parsed
= true;
1997 RGWGetObj::get_data
= false; // XXX
1998 RGWGetObj::partial_content
= true;
2000 RGWGetObj::end
= UINT64_MAX
;
2003 const string
name() override
{ return "stat_obj"; }
2004 RGWOpType
get_type() override
{ return RGW_OP_STAT_OBJ
; }
2006 real_time
get_mtime() const {
2011 uint64_t get_size() { return _size
; }
2012 real_time
ctime() { return mod_time
; } // XXX
2013 real_time
mtime() { return mod_time
; }
2014 std::map
<string
, bufferlist
>& get_attrs() { return attrs
; }
2016 buffer::list
* get_attr(const std::string
& k
) {
2017 auto iter
= attrs
.find(k
);
2018 return (iter
!= attrs
.end()) ? &(iter
->second
) : nullptr;
2021 bool only_bucket() override
{ return false; }
2023 int op_init() override
{
2024 // assign store, s, and dialect_handler
2025 RGWObjectCtx
* rados_ctx
2026 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
2027 // framework promises to call op_init after parent init
2029 RGWOp::init(rados_ctx
->store
, get_state(), this);
2030 op
= this; // assign self as op: REQUIRED
2034 int header_init() override
{
2036 struct req_state
* s
= get_state();
2037 s
->info
.method
= "GET";
2040 /* XXX derp derp derp */
2041 s
->relative_uri
= make_uri(bucket_name
, obj_name
);
2042 s
->info
.request_uri
= s
->relative_uri
; // XXX
2043 s
->info
.effective_uri
= s
->relative_uri
;
2044 s
->info
.request_params
= "";
2045 s
->info
.domain
= ""; /* XXX ? */
2053 int get_params() override
{
2057 int send_response_data(ceph::buffer::list
& _bl
, off_t s_off
,
2058 off_t e_off
) override
{
2060 /* XXX save attrs? */
2064 int send_response_data_error() override
{
2069 void execute() override
{
2070 RGWGetObj::execute();
2071 _size
= get_state()->obj_size
;
2074 }; /* RGWStatObjRequest */
2076 class RGWStatBucketRequest
: public RGWLibRequest
,
2077 public RGWStatBucket
/* RGWOp */
2081 std::map
<std::string
, buffer::list
> attrs
;
2082 RGWLibFS::BucketStats
& bs
;
2084 RGWStatBucketRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
2085 const std::string
& _path
,
2086 RGWLibFS::BucketStats
& _stats
)
2087 : RGWLibRequest(_cct
, _user
), bs(_stats
) {
2092 buffer::list
* get_attr(const std::string
& k
) {
2093 auto iter
= attrs
.find(k
);
2094 return (iter
!= attrs
.end()) ? &(iter
->second
) : nullptr;
2097 real_time
get_ctime() const {
2098 return bucket
.creation_time
;
2101 bool only_bucket() override
{ return false; }
2103 int op_init() override
{
2104 // assign store, s, and dialect_handler
2105 RGWObjectCtx
* rados_ctx
2106 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
2107 // framework promises to call op_init after parent init
2109 RGWOp::init(rados_ctx
->store
, get_state(), this);
2110 op
= this; // assign self as op: REQUIRED
2114 int header_init() override
{
2116 struct req_state
* s
= get_state();
2117 s
->info
.method
= "GET";
2120 /* XXX derp derp derp */
2121 s
->relative_uri
= uri
;
2122 s
->info
.request_uri
= uri
; // XXX
2123 s
->info
.effective_uri
= uri
;
2124 s
->info
.request_params
= "";
2125 s
->info
.domain
= ""; /* XXX ? */
2133 virtual int get_params() {
2137 void send_response() override
{
2138 bucket
.creation_time
= get_state()->bucket_info
.creation_time
;
2139 bs
.size
= bucket
.size
;
2140 bs
.size_rounded
= bucket
.size_rounded
;
2141 bs
.creation_time
= bucket
.creation_time
;
2142 bs
.num_entries
= bucket
.count
;
2143 std::swap(attrs
, get_state()->bucket_attrs
);
2147 return (bucket
.bucket
.name
.length() > 0);
2150 }; /* RGWStatBucketRequest */
2152 class RGWStatLeafRequest
: public RGWLibRequest
,
2153 public RGWListBucket
/* RGWOp */
2156 RGWFileHandle
* rgw_fh
;
2162 RGWStatLeafRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
2163 RGWFileHandle
* _rgw_fh
, const std::string
& _path
)
2164 : RGWLibRequest(_cct
, _user
), rgw_fh(_rgw_fh
), path(_path
),
2165 matched(false), is_dir(false), exact_matched(false) {
2166 default_max
= 1000; // logical max {"foo", "foo/"}
2170 bool only_bucket() override
{ return true; }
2172 int op_init() override
{
2173 // assign store, s, and dialect_handler
2174 RGWObjectCtx
* rados_ctx
2175 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
2176 // framework promises to call op_init after parent init
2178 RGWOp::init(rados_ctx
->store
, get_state(), this);
2179 op
= this; // assign self as op: REQUIRED
2183 int header_init() override
{
2185 struct req_state
* s
= get_state();
2186 s
->info
.method
= "GET";
2189 /* XXX derp derp derp */
2190 std::string uri
= "/" + rgw_fh
->bucket_name() + "/";
2191 s
->relative_uri
= uri
;
2192 s
->info
.request_uri
= uri
; // XXX
2193 s
->info
.effective_uri
= uri
;
2194 s
->info
.request_params
= "";
2195 s
->info
.domain
= ""; /* XXX ? */
2200 prefix
= rgw_fh
->relative_object_name();
2201 if (prefix
.length() > 0)
2209 int get_params() override
{
2214 void send_response() override
{
2215 struct req_state
* s
= get_state();
2217 for (const auto& iter
: objs
) {
2218 auto& name
= iter
.key
.name
;
2219 lsubdout(cct
, rgw
, 15) << "RGWStatLeafRequest "
2221 << "list uri=" << s
->relative_uri
<< " "
2222 << " prefix=" << prefix
<< " "
2223 << " obj path=" << name
<< ""
2224 << " target = " << path
<< ""
2226 /* XXX is there a missing match-dir case (trailing '/')? */
2229 exact_matched
= true;
2233 for (auto& iter
: common_prefixes
) {
2234 auto& name
= iter
.first
;
2235 lsubdout(cct
, rgw
, 15) << "RGWStatLeafRequest "
2237 << "list uri=" << s
->relative_uri
<< " "
2238 << " prefix=" << prefix
<< " "
2239 << " pref path=" << name
<< " (not chomped)"
2240 << " target = " << path
<< ""
2248 virtual void send_versioned_response() {
2251 }; /* RGWStatLeafRequest */
2257 class RGWWriteRequest
: public RGWLibContinuedReq
,
2258 public RGWPutObj
/* RGWOp */
2261 const std::string
& bucket_name
;
2262 const std::string
& obj_name
;
2263 RGWFileHandle
* rgw_fh
;
2264 RGWPutObjProcessor
* processor
;
2265 RGWPutObjDataProcessor
* filter
;
2266 boost::optional
<RGWPutObj_Compress
> compressor
;
2267 CompressorRef plugin
;
2272 size_t bytes_written
;
2276 RGWWriteRequest(CephContext
* _cct
, RGWUserInfo
*_user
, RGWFileHandle
* _fh
,
2277 const std::string
& _bname
, const std::string
& _oname
)
2278 : RGWLibContinuedReq(_cct
, _user
), bucket_name(_bname
), obj_name(_oname
),
2279 rgw_fh(_fh
), processor(nullptr), filter(nullptr), real_ofs(0),
2280 bytes_written(0), multipart(false), eio(false) {
2282 int ret
= header_init();
2284 ret
= init_from_header(get_state());
2289 bool only_bucket() override
{ return true; }
2291 int op_init() override
{
2292 // assign store, s, and dialect_handler
2293 RGWObjectCtx
* rados_ctx
2294 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
2295 // framework promises to call op_init after parent init
2297 RGWOp::init(rados_ctx
->store
, get_state(), this);
2298 op
= this; // assign self as op: REQUIRED
2302 int header_init() override
{
2304 struct req_state
* s
= get_state();
2305 s
->info
.method
= "PUT";
2308 /* XXX derp derp derp */
2309 std::string uri
= make_uri(bucket_name
, obj_name
);
2310 s
->relative_uri
= uri
;
2311 s
->info
.request_uri
= uri
; // XXX
2312 s
->info
.effective_uri
= uri
;
2313 s
->info
.request_params
= "";
2314 s
->info
.domain
= ""; /* XXX ? */
2322 RGWPutObjProcessor
*select_processor(RGWObjectCtx
& obj_ctx
,
2323 bool *is_multipart
) override
{
2324 struct req_state
* s
= get_state();
2325 uint64_t part_size
= s
->cct
->_conf
->rgw_obj_stripe_size
;
2326 RGWPutObjProcessor_Atomic
*processor
=
2327 new RGWPutObjProcessor_Atomic(obj_ctx
, s
->bucket_info
, s
->bucket
,
2328 s
->object
.name
, part_size
, s
->req_id
,
2329 s
->bucket_info
.versioning_enabled());
2330 processor
->set_olh_epoch(olh_epoch
);
2331 processor
->set_version_id(version_id
);
2335 int get_params() override
{
2336 struct req_state
* s
= get_state();
2337 RGWAccessControlPolicy_S3
s3policy(s
->cct
);
2338 /* we don't have (any) headers, so just create canned ACLs */
2339 int ret
= s3policy
.create_canned(s
->owner
, s
->bucket_owner
, s
->canned_acl
);
2344 int get_data(buffer::list
& _bl
) override
{
2345 /* XXX for now, use sharing semantics */
2346 uint32_t len
= data
.length();
2348 bytes_written
+= len
;
2352 void put_data(off_t off
, buffer::list
& _bl
) {
2353 if (off
!= real_ofs
) {
2357 real_ofs
+= data
.length();
2358 ofs
= off
; /* consumed in exec_continue() */
2361 int exec_start() override
;
2362 int exec_continue() override
;
2363 int exec_finish() override
;
2365 void send_response() override
{}
2367 int verify_params() override
{
2370 }; /* RGWWriteRequest */
2375 class RGWCopyObjRequest
: public RGWLibRequest
,
2376 public RGWCopyObj
/* RGWOp */
2379 RGWFileHandle
* src_parent
;
2380 RGWFileHandle
* dst_parent
;
2381 const std::string
& src_name
;
2382 const std::string
& dst_name
;
2384 RGWCopyObjRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
2385 RGWFileHandle
* _src_parent
, RGWFileHandle
* _dst_parent
,
2386 const std::string
& _src_name
, const std::string
& _dst_name
)
2387 : RGWLibRequest(_cct
, _user
), src_parent(_src_parent
),
2388 dst_parent(_dst_parent
), src_name(_src_name
), dst_name(_dst_name
) {
2389 /* all requests have this */
2392 /* allow this request to replace selected attrs */
2393 attrs_mod
= RGWRados::ATTRSMOD_MERGE
;
2396 bool only_bucket() override
{ return true; }
2398 int op_init() override
{
2399 // assign store, s, and dialect_handler
2400 RGWObjectCtx
* rados_ctx
2401 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
2402 // framework promises to call op_init after parent init
2404 RGWOp::init(rados_ctx
->store
, get_state(), this);
2405 op
= this; // assign self as op: REQUIRED
2410 int header_init() override
{
2412 struct req_state
* s
= get_state();
2413 s
->info
.method
= "PUT"; // XXX check
2416 src_bucket_name
= src_parent
->bucket_name();
2417 // need s->src_bucket_name?
2418 src_object
.name
= src_parent
->format_child_name(src_name
, false);
2419 // need s->src_object?
2421 dest_bucket_name
= dst_parent
->bucket_name();
2422 // need s->bucket.name?
2423 dest_object
= dst_parent
->format_child_name(dst_name
, false);
2424 // need s->object_name?
2426 int rc
= valid_s3_object_name(dest_object
);
2430 /* XXX and fixup key attr (could optimize w/string ref and
2432 buffer::list ux_key
;
2433 fh_key fhk
= dst_parent
->make_fhk(dst_name
);
2434 rgw::encode(fhk
, ux_key
);
2435 emplace_attr(RGW_ATTR_UNIX_KEY1
, std::move(ux_key
));
2437 #if 0 /* XXX needed? */
2438 s
->relative_uri
= uri
;
2439 s
->info
.request_uri
= uri
; // XXX
2440 s
->info
.effective_uri
= uri
;
2441 s
->info
.request_params
= "";
2442 s
->info
.domain
= ""; /* XXX ? */
2451 int get_params() override
{
2452 struct req_state
* s
= get_state();
2453 RGWAccessControlPolicy_S3
s3policy(s
->cct
);
2454 /* we don't have (any) headers, so just create canned ACLs */
2455 int ret
= s3policy
.create_canned(s
->owner
, s
->bucket_owner
, s
->canned_acl
);
2456 dest_policy
= s3policy
;
2460 void send_response() override
{}
2461 void send_partial_response(off_t ofs
) override
{}
2463 }; /* RGWCopyObjRequest */
2465 class RGWSetAttrsRequest
: public RGWLibRequest
,
2466 public RGWSetAttrs
/* RGWOp */
2469 const std::string
& bucket_name
;
2470 const std::string
& obj_name
;
2472 RGWSetAttrsRequest(CephContext
* _cct
, RGWUserInfo
*_user
,
2473 const std::string
& _bname
, const std::string
& _oname
)
2474 : RGWLibRequest(_cct
, _user
), bucket_name(_bname
), obj_name(_oname
) {
2478 bool only_bucket() override
{ return false; }
2480 int op_init() override
{
2481 // assign store, s, and dialect_handler
2482 RGWObjectCtx
* rados_ctx
2483 = static_cast<RGWObjectCtx
*>(get_state()->obj_ctx
);
2484 // framework promises to call op_init after parent init
2486 RGWOp::init(rados_ctx
->store
, get_state(), this);
2487 op
= this; // assign self as op: REQUIRED
2491 int header_init() override
{
2493 struct req_state
* s
= get_state();
2494 s
->info
.method
= "PUT";
2497 /* XXX derp derp derp */
2498 std::string uri
= make_uri(bucket_name
, obj_name
);
2499 s
->relative_uri
= uri
;
2500 s
->info
.request_uri
= uri
; // XXX
2501 s
->info
.effective_uri
= uri
;
2502 s
->info
.request_params
= "";
2503 s
->info
.domain
= ""; /* XXX ? */
2511 int get_params() override
{
2515 void send_response() override
{}
2517 }; /* RGWSetAttrsRequest */
2519 } /* namespace rgw */
2521 #endif /* RGW_FILE_H */