1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #ifndef CEPH_RGWRADOS_H
5 #define CEPH_RGWRADOS_H
9 #include "include/rados/librados.hpp"
10 #include "include/Context.h"
11 #include "common/admin_socket.h"
12 #include "common/RefCountedObj.h"
13 #include "common/RWLock.h"
14 #include "common/ceph_time.h"
15 #include "common/lru_map.h"
16 #include "common/ceph_json.h"
17 #include "rgw_common.h"
18 #include "cls/rgw/cls_rgw_types.h"
19 #include "cls/version/cls_version_types.h"
20 #include "cls/log/cls_log_types.h"
21 #include "cls/timeindex/cls_timeindex_types.h"
22 #include "cls/otp/cls_otp_types.h"
24 #include "rgw_metadata.h"
25 #include "rgw_meta_sync_status.h"
26 #include "rgw_period_puller.h"
27 #include "rgw_sync_module.h"
28 #include "rgw_sync_log_trim.h"
29 #include "rgw_service.h"
31 #include "services/svc_rados.h"
32 #include "services/svc_zone.h"
38 class RGWMetaNotifier
;
39 class RGWDataNotifier
;
41 class RGWObjectExpirer
;
42 class RGWMetaSyncProcessorThread
;
43 class RGWDataSyncProcessorThread
;
44 class RGWSyncLogTrimThread
;
45 class RGWSyncTraceManager
;
51 class RGWSysObjectCtx
;
53 /* flags for put_obj_meta() */
54 #define PUT_OBJ_CREATE 0x01
55 #define PUT_OBJ_EXCL 0x02
56 #define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
58 #define RGW_OBJ_NS_MULTIPART "multipart"
59 #define RGW_OBJ_NS_SHADOW "shadow"
61 #define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
63 #define RGW_NO_SHARD -1
65 #define RGW_SHARDS_PRIME_0 7877
66 #define RGW_SHARDS_PRIME_1 65521
68 extern const std::string MP_META_SUFFIX
;
70 // only called by rgw_shard_id and rgw_bucket_shard_index
71 static inline int rgw_shards_mod(unsigned hval
, int max_shards
)
73 if (max_shards
<= RGW_SHARDS_PRIME_0
) {
74 return hval
% RGW_SHARDS_PRIME_0
% max_shards
;
76 return hval
% RGW_SHARDS_PRIME_1
% max_shards
;
79 // used for logging and tagging
80 static inline int rgw_shard_id(const string
& key
, int max_shards
)
82 return rgw_shards_mod(ceph_str_hash_linux(key
.c_str(), key
.size()),
86 // used for bucket indices
87 static inline uint32_t rgw_bucket_shard_index(const std::string
& key
,
89 uint32_t sid
= ceph_str_hash_linux(key
.c_str(), key
.size());
90 uint32_t sid2
= sid
^ ((sid
& 0xFF) << 24);
91 return rgw_shards_mod(sid2
, num_shards
);
94 static inline int rgw_shards_max()
96 return RGW_SHARDS_PRIME_1
;
99 static inline void prepend_bucket_marker(const rgw_bucket
& bucket
, const string
& orig_oid
, string
& oid
)
101 if (bucket
.marker
.empty() || orig_oid
.empty()) {
106 oid
.append(orig_oid
);
110 static inline void get_obj_bucket_and_oid_loc(const rgw_obj
& obj
, string
& oid
, string
& locator
)
112 const rgw_bucket
& bucket
= obj
.bucket
;
113 prepend_bucket_marker(bucket
, obj
.get_oid(), oid
);
114 const string
& loc
= obj
.key
.get_loc();
116 prepend_bucket_marker(bucket
, loc
, locator
);
122 int rgw_policy_from_attrset(CephContext
*cct
, map
<string
, bufferlist
>& attrset
, RGWAccessControlPolicy
*policy
);
124 static inline bool rgw_raw_obj_to_obj(const rgw_bucket
& bucket
, const rgw_raw_obj
& raw_obj
, rgw_obj
*obj
)
126 ssize_t pos
= raw_obj
.oid
.find('_');
131 if (!rgw_obj_key::parse_raw_oid(raw_obj
.oid
.substr(pos
+ 1), &obj
->key
)) {
134 obj
->bucket
= bucket
;
140 struct rgw_bucket_placement
{
141 rgw_placement_rule placement_rule
;
144 void dump(Formatter
*f
) const;
147 class rgw_obj_select
{
148 rgw_placement_rule placement_rule
;
154 rgw_obj_select() : is_raw(false) {}
155 explicit rgw_obj_select(const rgw_obj
& _obj
) : obj(_obj
), is_raw(false) {}
156 explicit rgw_obj_select(const rgw_raw_obj
& _raw_obj
) : raw_obj(_raw_obj
), is_raw(true) {}
157 rgw_obj_select(const rgw_obj_select
& rhs
) {
158 placement_rule
= rhs
.placement_rule
;
161 raw_obj
= rhs
.raw_obj
;
167 rgw_raw_obj
get_raw_obj(const RGWZoneGroup
& zonegroup
, const RGWZoneParams
& zone_params
) const;
168 rgw_raw_obj
get_raw_obj(RGWRados
*store
) const;
170 rgw_obj_select
& operator=(const rgw_obj
& rhs
) {
176 rgw_obj_select
& operator=(const rgw_raw_obj
& rhs
) {
182 void set_placement_rule(const rgw_placement_rule
& rule
) {
183 placement_rule
= rule
;
185 void dump(Formatter
*f
) const;
188 struct compression_block
{
193 void encode(bufferlist
& bl
) const {
194 ENCODE_START(1, 1, bl
);
201 void decode(bufferlist::const_iterator
& bl
) {
208 void dump(Formatter
*f
) const;
210 WRITE_CLASS_ENCODER(compression_block
)
212 struct RGWCompressionInfo
{
213 string compression_type
;
215 vector
<compression_block
> blocks
;
217 RGWCompressionInfo() : compression_type("none"), orig_size(0) {}
218 RGWCompressionInfo(const RGWCompressionInfo
& cs_info
) : compression_type(cs_info
.compression_type
),
219 orig_size(cs_info
.orig_size
),
220 blocks(cs_info
.blocks
) {}
222 void encode(bufferlist
& bl
) const {
223 ENCODE_START(1, 1, bl
);
224 encode(compression_type
, bl
);
225 encode(orig_size
, bl
);
230 void decode(bufferlist::const_iterator
& bl
) {
232 decode(compression_type
, bl
);
233 decode(orig_size
, bl
);
237 void dump(Formatter
*f
) const;
239 WRITE_CLASS_ENCODER(RGWCompressionInfo
)
241 int rgw_compression_info_from_attrset(map
<string
, bufferlist
>& attrs
, bool& need_decompress
, RGWCompressionInfo
& cs_info
);
247 RGWOLHInfo() : removed(false) {}
249 void encode(bufferlist
& bl
) const {
250 ENCODE_START(1, 1, bl
);
256 void decode(bufferlist::const_iterator
& bl
) {
262 static void generate_test_instances(list
<RGWOLHInfo
*>& o
);
263 void dump(Formatter
*f
) const;
265 WRITE_CLASS_ENCODER(RGWOLHInfo
)
267 struct RGWOLHPendingInfo
{
268 ceph::real_time time
;
270 RGWOLHPendingInfo() {}
272 void encode(bufferlist
& bl
) const {
273 ENCODE_START(1, 1, bl
);
278 void decode(bufferlist::const_iterator
& bl
) {
284 void dump(Formatter
*f
) const;
286 WRITE_CLASS_ENCODER(RGWOLHPendingInfo
)
288 struct RGWUsageBatch
{
289 map
<ceph::real_time
, rgw_usage_log_entry
> m
;
291 void insert(ceph::real_time
& t
, rgw_usage_log_entry
& entry
, bool *account
) {
292 bool exists
= m
.find(t
) != m
.end();
294 m
[t
].aggregate(entry
);
298 struct RGWUsageIter
{
302 RGWUsageIter() : index(0) {}
307 virtual int handle_data(bufferlist
& bl
, off_t bl_ofs
, off_t bl_len
) = 0;
309 virtual ~RGWGetDataCB() {}
312 struct RGWCloneRangeInfo
{
319 struct RGWObjManifestPart
{
320 rgw_obj loc
; /* the object where the data is located */
321 uint64_t loc_ofs
; /* the offset at that object where the data is located */
322 uint64_t size
; /* the part size */
324 RGWObjManifestPart() : loc_ofs(0), size(0) {}
326 void encode(bufferlist
& bl
) const {
327 ENCODE_START(2, 2, bl
);
334 void decode(bufferlist::const_iterator
& bl
) {
335 DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl
);
342 void dump(Formatter
*f
) const;
343 static void generate_test_instances(list
<RGWObjManifestPart
*>& o
);
345 WRITE_CLASS_ENCODER(RGWObjManifestPart
)
348 The manifest defines a set of rules for structuring the object parts.
349 There are a few terms to note:
350 - head: the head part of the object, which is the part that contains
351 the first chunk of data. An object might not have a head (as in the
352 case of multipart-part objects).
353 - stripe: data portion of a single rgw object that resides on a single
355 - part: a collection of stripes that make a contiguous part of an
356 object. A regular object will only have one part (although might have
357 many stripes), a multipart object might have many parts. Each part
358 has a fixed stripe size, although the last stripe of a part might
359 be smaller than that. Consecutive parts may be merged if their stripe
363 struct RGWObjManifestRule
{
364 uint32_t start_part_num
;
366 uint64_t part_size
; /* each part size, 0 if there's no part size, meaning it's unlimited */
367 uint64_t stripe_max_size
; /* underlying obj max size */
368 string override_prefix
;
370 RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
371 RGWObjManifestRule(uint32_t _start_part_num
, uint64_t _start_ofs
, uint64_t _part_size
, uint64_t _stripe_max_size
) :
372 start_part_num(_start_part_num
), start_ofs(_start_ofs
), part_size(_part_size
), stripe_max_size(_stripe_max_size
) {}
374 void encode(bufferlist
& bl
) const {
375 ENCODE_START(2, 1, bl
);
376 encode(start_part_num
, bl
);
377 encode(start_ofs
, bl
);
378 encode(part_size
, bl
);
379 encode(stripe_max_size
, bl
);
380 encode(override_prefix
, bl
);
384 void decode(bufferlist::const_iterator
& bl
) {
386 decode(start_part_num
, bl
);
387 decode(start_ofs
, bl
);
388 decode(part_size
, bl
);
389 decode(stripe_max_size
, bl
);
391 decode(override_prefix
, bl
);
394 void dump(Formatter
*f
) const;
396 WRITE_CLASS_ENCODER(RGWObjManifestRule
)
398 class RGWObjManifest
{
400 bool explicit_objs
; /* old manifest? */
401 map
<uint64_t, RGWObjManifestPart
> objs
;
407 rgw_placement_rule head_placement_rule
;
409 uint64_t max_head_size
;
411 rgw_bucket_placement tail_placement
; /* might be different than the original bucket,
412 as object might have been copied across pools */
413 map
<uint64_t, RGWObjManifestRule
> rules
;
415 string tail_instance
; /* tail object's instance */
417 void convert_to_explicit(const RGWZoneGroup
& zonegroup
, const RGWZoneParams
& zone_params
);
418 int append_explicit(RGWObjManifest
& m
, const RGWZoneGroup
& zonegroup
, const RGWZoneParams
& zone_params
);
419 void append_rules(RGWObjManifest
& m
, map
<uint64_t, RGWObjManifestRule
>::iterator
& iter
, string
*override_prefix
);
421 void update_iterators() {
423 end_iter
.seek(obj_size
);
427 RGWObjManifest() : explicit_objs(false), obj_size(0), head_size(0), max_head_size(0),
428 begin_iter(this), end_iter(this) {}
429 RGWObjManifest(const RGWObjManifest
& rhs
) {
432 RGWObjManifest
& operator=(const RGWObjManifest
& rhs
) {
433 explicit_objs
= rhs
.explicit_objs
;
435 obj_size
= rhs
.obj_size
;
437 head_size
= rhs
.head_size
;
438 max_head_size
= rhs
.max_head_size
;
440 tail_placement
= rhs
.tail_placement
;
442 tail_instance
= rhs
.tail_instance
;
444 begin_iter
.set_manifest(this);
445 end_iter
.set_manifest(this);
447 begin_iter
.seek(rhs
.begin_iter
.get_ofs());
448 end_iter
.seek(rhs
.end_iter
.get_ofs());
453 map
<uint64_t, RGWObjManifestPart
>& get_explicit_objs() {
458 void set_explicit(uint64_t _size
, map
<uint64_t, RGWObjManifestPart
>& _objs
) {
459 explicit_objs
= true;
464 void get_implicit_location(uint64_t cur_part_id
, uint64_t cur_stripe
, uint64_t ofs
, string
*override_prefix
, rgw_obj_select
*location
);
466 void set_trivial_rule(uint64_t tail_ofs
, uint64_t stripe_max_size
) {
467 RGWObjManifestRule
rule(0, tail_ofs
, 0, stripe_max_size
);
469 max_head_size
= tail_ofs
;
472 void set_multipart_part_rule(uint64_t stripe_max_size
, uint64_t part_num
) {
473 RGWObjManifestRule
rule(0, 0, 0, stripe_max_size
);
474 rule
.start_part_num
= part_num
;
479 void encode(bufferlist
& bl
) const {
480 ENCODE_START(7, 6, bl
);
481 encode(obj_size
, bl
);
483 encode(explicit_objs
, bl
);
485 encode(head_size
, bl
);
486 encode(max_head_size
, bl
);
489 bool encode_tail_bucket
= !(tail_placement
.bucket
== obj
.bucket
);
490 encode(encode_tail_bucket
, bl
);
491 if (encode_tail_bucket
) {
492 encode(tail_placement
.bucket
, bl
);
494 bool encode_tail_instance
= (tail_instance
!= obj
.key
.instance
);
495 encode(encode_tail_instance
, bl
);
496 if (encode_tail_instance
) {
497 encode(tail_instance
, bl
);
499 encode(head_placement_rule
, bl
);
500 encode(tail_placement
.placement_rule
, bl
);
504 void decode(bufferlist::const_iterator
& bl
) {
505 DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl
);
506 decode(obj_size
, bl
);
509 decode(explicit_objs
, bl
);
511 decode(head_size
, bl
);
512 decode(max_head_size
, bl
);
516 explicit_objs
= true;
518 map
<uint64_t, RGWObjManifestPart
>::iterator iter
= objs
.begin();
519 obj
= iter
->second
.loc
;
520 head_size
= iter
->second
.size
;
521 max_head_size
= head_size
;
525 if (explicit_objs
&& head_size
> 0 && !objs
.empty()) {
526 /* patch up manifest due to issue 16435:
527 * the first object in the explicit objs list might not be the one we need to access, use the
528 * head object instead if set. This would happen if we had an old object that was created
529 * when the explicit objs manifest was around, and it got copied.
531 rgw_obj
& obj_0
= objs
[0].loc
;
532 if (!obj_0
.get_oid().empty() && obj_0
.key
.ns
.empty()) {
534 objs
[0].size
= head_size
;
540 decode(tail_placement
.bucket
, bl
);
543 decode(need_to_decode
, bl
);
544 if (need_to_decode
) {
545 decode(tail_placement
.bucket
, bl
);
547 tail_placement
.bucket
= obj
.bucket
;
554 decode(tail_instance
, bl
);
557 decode(need_to_decode
, bl
);
558 if (need_to_decode
) {
559 decode(tail_instance
, bl
);
561 tail_instance
= obj
.key
.instance
;
564 } else { // old object created before 'tail_instance' field added to manifest
565 tail_instance
= obj
.key
.instance
;
569 decode(head_placement_rule
, bl
);
570 decode(tail_placement
.placement_rule
, bl
);
577 void dump(Formatter
*f
) const;
578 static void generate_test_instances(list
<RGWObjManifest
*>& o
);
580 int append(RGWObjManifest
& m
, const RGWZoneGroup
& zonegroup
,
581 const RGWZoneParams
& zone_params
);
582 int append(RGWObjManifest
& m
, RGWSI_Zone
*zone_svc
);
584 bool get_rule(uint64_t ofs
, RGWObjManifestRule
*rule
);
589 return rules
.empty();
592 bool has_explicit_objs() {
593 return explicit_objs
;
598 if (objs
.size() == 1) {
599 map
<uint64_t, RGWObjManifestPart
>::iterator iter
= objs
.begin();
600 rgw_obj
& o
= iter
->second
.loc
;
603 return (objs
.size() >= 2);
605 return (obj_size
> head_size
);
608 void set_head(const rgw_placement_rule
& placement_rule
, const rgw_obj
& _o
, uint64_t _s
) {
609 head_placement_rule
= placement_rule
;
613 if (explicit_objs
&& head_size
> 0) {
615 objs
[0].size
= head_size
;
619 const rgw_obj
& get_obj() {
623 void set_tail_placement(const rgw_placement_rule
& placement_rule
, const rgw_bucket
& _b
) {
624 tail_placement
.placement_rule
= placement_rule
;
625 tail_placement
.bucket
= _b
;
628 const rgw_bucket_placement
& get_tail_placement() {
629 return tail_placement
;
632 const rgw_placement_rule
& get_head_placement_rule() {
633 return head_placement_rule
;
636 void set_prefix(const string
& _p
) {
640 const string
& get_prefix() {
644 void set_tail_instance(const string
& _ti
) {
648 const string
& get_tail_instance() {
649 return tail_instance
;
652 void set_head_size(uint64_t _s
) {
656 void set_obj_size(uint64_t s
) {
662 uint64_t get_obj_size() {
666 uint64_t get_head_size() {
670 uint64_t get_max_head_size() {
671 return max_head_size
;
675 RGWObjManifest
*manifest
;
676 uint64_t part_ofs
; /* where current part starts */
677 uint64_t stripe_ofs
; /* where current stripe starts */
678 uint64_t ofs
; /* current position within the object */
679 uint64_t stripe_size
; /* current part size */
683 string cur_override_prefix
;
685 rgw_obj_select location
;
687 map
<uint64_t, RGWObjManifestRule
>::iterator rule_iter
;
688 map
<uint64_t, RGWObjManifestRule
>::iterator next_rule_iter
;
690 map
<uint64_t, RGWObjManifestPart
>::iterator explicit_iter
;
701 void update_explicit_pos();
706 void set_manifest(RGWObjManifest
*m
) {
711 obj_iterator() : manifest(NULL
) {
714 explicit obj_iterator(RGWObjManifest
*_m
) : manifest(_m
) {
716 if (!manifest
->empty()) {
720 obj_iterator(RGWObjManifest
*_m
, uint64_t _ofs
) : manifest(_m
) {
722 if (!manifest
->empty()) {
726 void seek(uint64_t ofs
);
729 bool operator==(const obj_iterator
& rhs
) {
730 return (ofs
== rhs
.ofs
);
732 bool operator!=(const obj_iterator
& rhs
) {
733 return (ofs
!= rhs
.ofs
);
735 const rgw_obj_select
& get_location() {
739 /* start of current stripe */
740 uint64_t get_stripe_ofs() {
741 if (manifest
->explicit_objs
) {
742 return explicit_iter
->first
;
747 /* current ofs relative to start of rgw object */
748 uint64_t get_ofs() const {
753 int get_cur_stripe() const {
757 /* current stripe size */
758 uint64_t get_stripe_size() {
759 if (manifest
->explicit_objs
) {
760 return explicit_iter
->second
.size
;
765 /* offset where data starts within current stripe */
766 uint64_t location_ofs() {
767 if (manifest
->explicit_objs
) {
768 return explicit_iter
->second
.loc_ofs
;
770 return 0; /* all stripes start at zero offset */
773 void update_location();
775 friend class RGWObjManifest
;
776 void dump(Formatter
*f
) const;
779 const obj_iterator
& obj_begin();
780 const obj_iterator
& obj_end();
781 obj_iterator
obj_find(uint64_t ofs
);
783 obj_iterator begin_iter
;
784 obj_iterator end_iter
;
787 * simple object generator. Using a simple single rule manifest.
790 RGWObjManifest
*manifest
;
792 uint64_t cur_part_ofs
;
795 uint64_t cur_stripe_size
;
800 rgw_obj_select cur_obj
;
802 RGWObjManifestRule rule
;
805 generator() : manifest(NULL
), last_ofs(0), cur_part_ofs(0), cur_part_id(0),
806 cur_stripe(0), cur_stripe_size(0) {}
807 int create_begin(CephContext
*cct
, RGWObjManifest
*manifest
,
808 const rgw_placement_rule
& head_placement_rule
,
809 const rgw_placement_rule
*tail_placement_rule
,
810 const rgw_bucket
& bucket
,
813 int create_next(uint64_t ofs
);
815 rgw_raw_obj
get_cur_obj(RGWZoneGroup
& zonegroup
, RGWZoneParams
& zone_params
) { return cur_obj
.get_raw_obj(zonegroup
, zone_params
); }
816 rgw_raw_obj
get_cur_obj(RGWRados
*store
) const { return cur_obj
.get_raw_obj(store
); }
818 /* total max size of current stripe (including head obj) */
819 uint64_t cur_stripe_max_size() const {
820 return cur_stripe_size
;
824 WRITE_CLASS_ENCODER(RGWObjManifest
)
826 struct RGWUploadPartInfo
{
829 uint64_t accounted_size
{0};
831 ceph::real_time modified
;
832 RGWObjManifest manifest
;
833 RGWCompressionInfo cs_info
;
835 RGWUploadPartInfo() : num(0), size(0) {}
837 void encode(bufferlist
& bl
) const {
838 ENCODE_START(4, 2, bl
);
842 encode(modified
, bl
);
843 encode(manifest
, bl
);
845 encode(accounted_size
, bl
);
848 void decode(bufferlist::const_iterator
& bl
) {
849 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl
);
853 decode(modified
, bl
);
855 decode(manifest
, bl
);
858 decode(accounted_size
, bl
);
860 accounted_size
= size
;
864 void dump(Formatter
*f
) const;
865 static void generate_test_instances(list
<RGWUploadPartInfo
*>& o
);
867 WRITE_CLASS_ENCODER(RGWUploadPartInfo
)
874 uint64_t size
; //< size of raw object
875 uint64_t accounted_size
{0}; //< size before compression, encryption
876 ceph::real_time mtime
;
882 RGWObjManifest manifest
;
892 uint32_t zone_short_id
;
894 /* important! don't forget to update copy constructor */
896 RGWObjVersionTracker objv_tracker
;
898 map
<string
, bufferlist
> attrset
;
899 RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
900 size(0), epoch(0), fake_tag(false), has_manifest(false),
901 has_data(false), prefetch_data(false), keep_tail(false), is_olh(false),
902 pg_ver(0), zone_short_id(0) {}
903 RGWObjState(const RGWObjState
& rhs
) : obj (rhs
.obj
) {
904 is_atomic
= rhs
.is_atomic
;
905 has_attrs
= rhs
.has_attrs
;
908 accounted_size
= rhs
.accounted_size
;
911 if (rhs
.obj_tag
.length()) {
912 obj_tag
= rhs
.obj_tag
;
914 if (rhs
.tail_tag
.length()) {
915 tail_tag
= rhs
.tail_tag
;
917 write_tag
= rhs
.write_tag
;
918 fake_tag
= rhs
.fake_tag
;
919 if (rhs
.has_manifest
) {
920 manifest
= rhs
.manifest
;
922 has_manifest
= rhs
.has_manifest
;
923 shadow_obj
= rhs
.shadow_obj
;
924 has_data
= rhs
.has_data
;
925 if (rhs
.data
.length()) {
928 prefetch_data
= rhs
.prefetch_data
;
929 keep_tail
= rhs
.keep_tail
;
931 objv_tracker
= rhs
.objv_tracker
;
935 bool get_attr(string name
, bufferlist
& dest
) {
936 map
<string
, bufferlist
>::iterator iter
= attrset
.find(name
);
937 if (iter
!= attrset
.end()) {
945 struct RGWRawObjState
{
947 bool has_attrs
{false};
950 ceph::real_time mtime
;
953 bool has_data
{false};
955 bool prefetch_data
{false};
958 /* important! don't forget to update copy constructor */
960 RGWObjVersionTracker objv_tracker
;
962 map
<string
, bufferlist
> attrset
;
964 RGWRawObjState(const RGWRawObjState
& rhs
) : obj (rhs
.obj
) {
965 has_attrs
= rhs
.has_attrs
;
970 if (rhs
.obj_tag
.length()) {
971 obj_tag
= rhs
.obj_tag
;
973 has_data
= rhs
.has_data
;
974 if (rhs
.data
.length()) {
977 prefetch_data
= rhs
.prefetch_data
;
979 objv_tracker
= rhs
.objv_tracker
;
983 struct RGWPoolIterCtx
{
984 librados::IoCtx io_ctx
;
985 librados::NObjectIterator iter
;
988 struct RGWListRawObjsCtx
{
990 RGWPoolIterCtx iter_ctx
;
992 RGWListRawObjsCtx() : initialized(false) {}
995 struct objexp_hint_entry
{
1000 ceph::real_time exp_time
;
1002 void encode(bufferlist
& bl
) const {
1003 ENCODE_START(2, 1, bl
);
1004 encode(bucket_name
, bl
);
1005 encode(bucket_id
, bl
);
1006 encode(obj_key
, bl
);
1007 encode(exp_time
, bl
);
1012 void decode(bufferlist::const_iterator
& bl
) {
1013 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
1014 DECODE_START(2, bl
);
1015 decode(bucket_name
, bl
);
1016 decode(bucket_id
, bl
);
1017 decode(obj_key
, bl
);
1018 decode(exp_time
, bl
);
1019 if (struct_v
>= 2) {
1027 WRITE_CLASS_ENCODER(objexp_hint_entry
)
1029 class RGWDataChangesLog
;
1030 class RGWMetaSyncStatusManager
;
1031 class RGWDataSyncStatusManager
;
1032 class RGWCoroutinesManagerRegistry
;
1034 class RGWGetBucketStats_CB
: public RefCountedObject
{
1037 map
<RGWObjCategory
, RGWStorageStats
> *stats
;
1039 explicit RGWGetBucketStats_CB(const rgw_bucket
& _bucket
) : bucket(_bucket
), stats(NULL
) {}
1040 ~RGWGetBucketStats_CB() override
{}
1041 virtual void handle_response(int r
) = 0;
1042 virtual void set_response(map
<RGWObjCategory
, RGWStorageStats
> *_stats
) {
1047 class RGWGetUserStats_CB
: public RefCountedObject
{
1050 RGWStorageStats stats
;
1052 explicit RGWGetUserStats_CB(const rgw_user
& _user
) : user(_user
) {}
1053 ~RGWGetUserStats_CB() override
{}
1054 virtual void handle_response(int r
) = 0;
1055 virtual void set_response(RGWStorageStats
& _stats
) {
1060 class RGWGetDirHeader_CB
;
1061 class RGWGetUserHeader_CB
;
1063 class RGWObjectCtx
{
1065 RWLock lock
{"RGWObjectCtx"};
1068 std::map
<rgw_obj
, RGWObjState
> objs_state
;
1070 explicit RGWObjectCtx(RGWRados
*_store
) : store(_store
) {}
1071 explicit RGWObjectCtx(RGWRados
*_store
, void *_s
) : store(_store
), s(_s
) {}
1073 void *get_private() {
1077 RGWRados
*get_store() {
1081 RGWObjState
*get_state(const rgw_obj
& obj
) {
1082 RGWObjState
*result
;
1083 typename
std::map
<rgw_obj
, RGWObjState
>::iterator iter
;
1085 assert (!obj
.empty());
1086 iter
= objs_state
.find(obj
);
1087 if (iter
!= objs_state
.end()) {
1088 result
= &iter
->second
;
1093 result
= &objs_state
[obj
];
1099 void set_atomic(rgw_obj
& obj
) {
1100 RWLock::WLocker
wl(lock
);
1101 assert (!obj
.empty());
1102 objs_state
[obj
].is_atomic
= true;
1104 void set_prefetch_data(const rgw_obj
& obj
) {
1105 RWLock::WLocker
wl(lock
);
1106 assert (!obj
.empty());
1107 objs_state
[obj
].prefetch_data
= true;
1110 void invalidate(const rgw_obj
& obj
) {
1111 RWLock::WLocker
wl(lock
);
1112 auto iter
= objs_state
.find(obj
);
1113 if (iter
== objs_state
.end()) {
1116 bool is_atomic
= iter
->second
.is_atomic
;
1117 bool prefetch_data
= iter
->second
.prefetch_data
;
1119 objs_state
.erase(iter
);
1121 if (is_atomic
|| prefetch_data
) {
1122 auto& state
= objs_state
[obj
];
1123 state
.is_atomic
= is_atomic
;
1124 state
.prefetch_data
= prefetch_data
;
1129 class RGWAsyncRadosProcessor
;
1132 class RGWChainedCacheImpl
;
1134 struct bucket_info_entry
{
1137 map
<string
, bufferlist
> attrs
;
1140 struct tombstone_entry
{
1141 ceph::real_time mtime
;
1142 uint32_t zone_short_id
;
1145 tombstone_entry() = default;
1146 explicit tombstone_entry(const RGWObjState
& state
)
1147 : mtime(state
.mtime
), zone_short_id(state
.zone_short_id
),
1148 pg_ver(state
.pg_ver
) {}
1151 class RGWIndexCompletionManager
;
1153 class RGWRados
: public AdminSocketHook
1156 friend class RGWMetaNotifier
;
1157 friend class RGWDataNotifier
;
1159 friend class RGWObjectExpirer
;
1160 friend class RGWMetaSyncProcessorThread
;
1161 friend class RGWDataSyncProcessorThread
;
1162 friend class RGWReshard
;
1163 friend class RGWBucketReshard
;
1164 friend class RGWBucketReshardLock
;
1165 friend class BucketIndexLockGuard
;
1166 friend class RGWCompleteMultipart
;
1168 static constexpr const char* admin_commands
[4][3] = {
1170 "cache list name=filter,type=CephString,req=false",
1171 "cache list [filter_str]: list object cache, possibly matching substrings" },
1173 "cache inspect name=target,type=CephString,req=true",
1174 "cache inspect target: print cache element" },
1176 "cache erase name=target,type=CephString,req=true",
1177 "cache erase target: erase element from cache" },
1180 "cache zap: erase all elements from cache" }
1183 /** Open the pool used as root for this gateway */
1184 int open_root_pool_ctx();
1185 int open_gc_pool_ctx();
1186 int open_lc_pool_ctx();
1187 int open_objexp_pool_ctx();
1188 int open_reshard_pool_ctx();
1190 int open_pool_ctx(const rgw_pool
& pool
, librados::IoCtx
& io_ctx
,
1192 int open_bucket_index_ctx(const RGWBucketInfo
& bucket_info
, librados::IoCtx
& index_ctx
);
1193 int open_bucket_index(const RGWBucketInfo
& bucket_info
, librados::IoCtx
& index_ctx
, string
& bucket_oid
);
1194 int open_bucket_index_base(const RGWBucketInfo
& bucket_info
, librados::IoCtx
& index_ctx
,
1195 string
& bucket_oid_base
);
1196 int open_bucket_index_shard(const RGWBucketInfo
& bucket_info
, librados::IoCtx
& index_ctx
,
1197 const string
& obj_key
, string
*bucket_obj
, int *shard_id
);
1198 int open_bucket_index_shard(const RGWBucketInfo
& bucket_info
, librados::IoCtx
& index_ctx
,
1199 int shard_id
, string
*bucket_obj
);
1200 int open_bucket_index(const RGWBucketInfo
& bucket_info
, librados::IoCtx
& index_ctx
,
1201 map
<int, string
>& bucket_objs
, int shard_id
= -1, map
<int, string
> *bucket_instance_ids
= NULL
);
1202 template<typename T
>
1203 int open_bucket_index(const RGWBucketInfo
& bucket_info
, librados::IoCtx
& index_ctx
,
1204 map
<int, string
>& oids
, map
<int, T
>& bucket_objs
,
1205 int shard_id
= -1, map
<int, string
> *bucket_instance_ids
= NULL
);
1206 void build_bucket_index_marker(const string
& shard_id_str
, const string
& shard_marker
,
1209 void get_bucket_instance_ids(const RGWBucketInfo
& bucket_info
, int shard_id
, map
<int, string
> *result
);
1211 std::atomic
<int64_t> max_req_id
= { 0 };
1217 RGWObjectExpirer
*obj_expirer
;
1221 bool run_sync_thread
;
1222 bool run_reshard_thread
;
1224 RGWAsyncRadosProcessor
* async_rados
;
1226 RGWMetaNotifier
*meta_notifier
;
1227 RGWDataNotifier
*data_notifier
;
1228 RGWMetaSyncProcessorThread
*meta_sync_processor_thread
;
1229 RGWSyncTraceManager
*sync_tracer
= nullptr;
1230 map
<string
, RGWDataSyncProcessorThread
*> data_sync_processor_threads
;
1232 boost::optional
<rgw::BucketTrimManager
> bucket_trim
;
1233 RGWSyncLogTrimThread
*sync_log_trimmer
{nullptr};
1235 Mutex meta_sync_thread_lock
;
1236 Mutex data_sync_thread_lock
;
1238 librados::IoCtx root_pool_ctx
; // .rgw
1240 double inject_notify_timeout_probability
= 0;
1241 unsigned max_notify_retries
= 0;
1243 friend class RGWWatcher
;
1245 Mutex bucket_id_lock
;
1247 // This field represents the number of bucket index object shards
1248 uint32_t bucket_index_max_shards
;
1250 int get_obj_head_ioctx(const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, librados::IoCtx
*ioctx
);
1251 int get_obj_head_ref(const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, rgw_rados_ref
*ref
);
1252 int get_system_obj_ref(const rgw_raw_obj
& obj
, rgw_rados_ref
*ref
);
1253 uint64_t max_bucket_id
;
1255 int get_olh_target_state(RGWObjectCtx
& rctx
, const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
,
1256 RGWObjState
*olh_state
, RGWObjState
**target_state
);
1257 int get_obj_state_impl(RGWObjectCtx
*rctx
, const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, RGWObjState
**state
,
1258 bool follow_olh
, bool assume_noent
= false);
1259 int append_atomic_test(RGWObjectCtx
*rctx
, const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
,
1260 librados::ObjectOperation
& op
, RGWObjState
**state
);
1261 int append_atomic_test(const RGWObjState
* astate
, librados::ObjectOperation
& op
);
1263 int update_placement_map();
1264 int store_bucket_info(RGWBucketInfo
& info
, map
<string
, bufferlist
> *pattrs
, RGWObjVersionTracker
*objv_tracker
, bool exclusive
);
1266 void remove_rgw_head_obj(librados::ObjectWriteOperation
& op
);
1267 void cls_obj_check_prefix_exist(librados::ObjectOperation
& op
, const string
& prefix
, bool fail_if_exist
);
1268 void cls_obj_check_mtime(librados::ObjectOperation
& op
, const real_time
& mtime
, bool high_precision_time
, RGWCheckMTimeType type
);
1272 librados::Rados rados
;
1274 using RGWChainedCacheImpl_bucket_info_entry
= RGWChainedCacheImpl
<bucket_info_entry
>;
1275 RGWChainedCacheImpl_bucket_info_entry
*binfo_cache
;
1277 using tombstone_cache_t
= lru_map
<rgw_obj
, tombstone_entry
>;
1278 tombstone_cache_t
*obj_tombstone_cache
;
1280 librados::IoCtx gc_pool_ctx
; // .rgw.gc
1281 librados::IoCtx lc_pool_ctx
; // .rgw.lc
1282 librados::IoCtx objexp_pool_ctx
;
1283 librados::IoCtx reshard_pool_ctx
;
1285 bool pools_initialized
;
1287 RGWQuotaHandler
*quota_handler
;
1289 RGWCoroutinesManagerRegistry
*cr_registry
;
1291 RGWSyncModuleInstanceRef sync_module
;
1292 bool writeable_zone
{false};
1294 RGWIndexCompletionManager
*index_completion_manager
{nullptr};
1296 bool use_cache
{false};
1298 RGWRados(): lock("rados_timer_lock"), timer(NULL
),
1299 gc(NULL
), lc(NULL
), obj_expirer(NULL
), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
1300 run_sync_thread(false), run_reshard_thread(false), async_rados(nullptr), meta_notifier(NULL
),
1301 data_notifier(NULL
), meta_sync_processor_thread(NULL
),
1302 meta_sync_thread_lock("meta_sync_thread_lock"), data_sync_thread_lock("data_sync_thread_lock"),
1303 bucket_id_lock("rados_bucket_id"),
1304 bucket_index_max_shards(0),
1305 max_bucket_id(0), cct(NULL
),
1306 binfo_cache(NULL
), obj_tombstone_cache(nullptr),
1307 pools_initialized(false),
1308 quota_handler(NULL
),
1310 meta_mgr(NULL
), data_log(NULL
), reshard(NULL
) {}
1312 RGWRados
& set_use_cache(bool status
) {
1321 RGWRados
& set_run_gc_thread(bool _use_gc_thread
) {
1322 use_gc_thread
= _use_gc_thread
;
1326 RGWRados
& set_run_lc_thread(bool _use_lc_thread
) {
1327 use_lc_thread
= _use_lc_thread
;
1331 RGWRados
& set_run_quota_threads(bool _run_quota_threads
) {
1332 quota_threads
= _run_quota_threads
;
1336 RGWRados
& set_run_sync_thread(bool _run_sync_thread
) {
1337 run_sync_thread
= _run_sync_thread
;
1341 RGWRados
& set_run_reshard_thread(bool _run_reshard_thread
) {
1342 run_reshard_thread
= _run_reshard_thread
;
1346 uint64_t get_new_req_id() {
1347 return ++max_req_id
;
1350 librados::IoCtx
* get_lc_pool_ctx() {
1351 return &lc_pool_ctx
;
1353 void set_context(CephContext
*_cct
) {
1360 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
1361 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
1365 // pulls missing periods for period_history
1366 std::unique_ptr
<RGWPeriodPuller
> period_puller
;
1367 // maintains a connected history of periods
1368 std::unique_ptr
<RGWPeriodHistory
> period_history
;
1370 RGWAsyncRadosProcessor
* get_async_rados() const { return async_rados
; };
1372 RGWMetadataManager
*meta_mgr
;
1374 RGWDataChangesLog
*data_log
;
1376 RGWReshard
*reshard
;
1377 std::shared_ptr
<RGWReshardWait
> reshard_wait
;
1379 virtual ~RGWRados() = default;
1381 tombstone_cache_t
*get_tombstone_cache() {
1382 return obj_tombstone_cache
;
1384 const RGWSyncModuleInstanceRef
& get_sync_module() {
1387 RGWSyncTraceManager
*get_sync_tracer() {
1391 int get_required_alignment(const rgw_pool
& pool
, uint64_t *alignment
);
1392 void get_max_aligned_size(uint64_t size
, uint64_t alignment
, uint64_t *max_size
);
1393 int get_max_chunk_size(const rgw_pool
& pool
, uint64_t *max_chunk_size
, uint64_t *palignment
= nullptr);
1394 int get_max_chunk_size(const rgw_placement_rule
& placement_rule
, const rgw_obj
& obj
, uint64_t *max_chunk_size
, uint64_t *palignment
= nullptr);
1396 uint32_t get_max_bucket_shards() {
1397 return rgw_shards_max();
1401 int get_raw_obj_ref(const rgw_raw_obj
& obj
, rgw_rados_ref
*ref
);
1403 int list_raw_objects_init(const rgw_pool
& pool
, const string
& marker
, RGWListRawObjsCtx
*ctx
);
1404 int list_raw_objects_next(const string
& prefix_filter
, int max
,
1405 RGWListRawObjsCtx
& ctx
, list
<string
>& oids
,
1406 bool *is_truncated
);
1407 int list_raw_objects(const rgw_pool
& pool
, const string
& prefix_filter
, int max
,
1408 RGWListRawObjsCtx
& ctx
, list
<string
>& oids
,
1409 bool *is_truncated
);
1410 string
list_raw_objs_get_cursor(RGWListRawObjsCtx
& ctx
);
1412 CephContext
*ctx() { return cct
; }
1413 /** do all necessary setup of the storage device */
1414 int initialize(CephContext
*_cct
) {
1416 return initialize();
1418 /** Initialize the RADOS instance and prepare to do other ops */
1419 int init_svc(bool raw
);
1421 int init_complete();
1425 int register_to_service_map(const string
& daemon_type
, const map
<string
, string
>& meta
);
1426 int update_service_map(std::map
<std::string
, std::string
>&& status
);
1429 int log_list_init(const string
& prefix
, RGWAccessHandle
*handle
);
1430 int log_list_next(RGWAccessHandle handle
, string
*name
);
1433 int log_remove(const string
& name
);
1436 int log_show_init(const string
& name
, RGWAccessHandle
*handle
);
1437 int log_show_next(RGWAccessHandle handle
, rgw_log_entry
*entry
);
1439 // log bandwidth info
1440 int log_usage(map
<rgw_user_bucket
, RGWUsageBatch
>& usage_info
);
1441 int read_usage(const rgw_user
& user
, const string
& bucket_name
, uint64_t start_epoch
, uint64_t end_epoch
,
1442 uint32_t max_entries
, bool *is_truncated
, RGWUsageIter
& read_iter
, map
<rgw_user_bucket
,
1443 rgw_usage_log_entry
>& usage
);
1444 int trim_usage(const rgw_user
& user
, const string
& bucket_name
, uint64_t start_epoch
, uint64_t end_epoch
);
1447 int create_pool(const rgw_pool
& pool
);
1449 int init_bucket_index(RGWBucketInfo
& bucket_info
, int num_shards
);
1450 int clean_bucket_index(RGWBucketInfo
& bucket_info
, int num_shards
);
1451 void create_bucket_id(string
*bucket_id
);
1453 bool get_obj_data_pool(const rgw_placement_rule
& placement_rule
, const rgw_obj
& obj
, rgw_pool
*pool
);
1454 bool obj_to_raw(const rgw_placement_rule
& placement_rule
, const rgw_obj
& obj
, rgw_raw_obj
*raw_obj
);
1456 int create_bucket(const RGWUserInfo
& owner
, rgw_bucket
& bucket
,
1457 const string
& zonegroup_id
,
1458 const rgw_placement_rule
& placement_rule
,
1459 const string
& swift_ver_location
,
1460 const RGWQuotaInfo
* pquota_info
,
1461 map
<std::string
,bufferlist
>& attrs
,
1462 RGWBucketInfo
& bucket_info
,
1464 obj_version
*pep_objv
,
1465 ceph::real_time creation_time
,
1466 rgw_bucket
*master_bucket
,
1467 uint32_t *master_num_shards
,
1468 bool exclusive
= true);
1470 RGWCoroutinesManagerRegistry
*get_cr_registry() { return cr_registry
; }
1472 struct BucketShard
{
1476 librados::IoCtx index_ctx
;
1479 explicit BucketShard(RGWRados
*_store
) : store(_store
), shard_id(-1) {}
1480 int init(const rgw_bucket
& _bucket
, const rgw_obj
& obj
, RGWBucketInfo
* out
);
1481 int init(const rgw_bucket
& _bucket
, int sid
, RGWBucketInfo
* out
);
1482 int init(const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
);
1483 int init(const RGWBucketInfo
& bucket_info
, int sid
);
1488 RGWBucketInfo bucket_info
;
1496 bool versioning_disabled
;
1498 bool bs_initialized
;
1501 int get_state(RGWObjState
**pstate
, bool follow_olh
, bool assume_noent
= false);
1502 void invalidate_state();
1504 int prepare_atomic_modification(librados::ObjectWriteOperation
& op
, bool reset_obj
, const string
*ptag
,
1505 const char *ifmatch
, const char *ifnomatch
, bool removal_op
, bool modify_tail
);
1506 int complete_atomic_modification();
1509 Object(RGWRados
*_store
, const RGWBucketInfo
& _bucket_info
, RGWObjectCtx
& _ctx
, const rgw_obj
& _obj
) : store(_store
), bucket_info(_bucket_info
),
1510 ctx(_ctx
), obj(_obj
), bs(store
),
1511 state(NULL
), versioning_disabled(false),
1512 bs_initialized(false) {}
1514 RGWRados
*get_store() { return store
; }
1515 rgw_obj
& get_obj() { return obj
; }
1516 RGWObjectCtx
& get_ctx() { return ctx
; }
1517 RGWBucketInfo
& get_bucket_info() { return bucket_info
; }
1518 int get_manifest(RGWObjManifest
**pmanifest
);
1520 int get_bucket_shard(BucketShard
**pbs
) {
1521 if (!bs_initialized
) {
1523 bs
.init(bucket_info
.bucket
, obj
, nullptr /* no RGWBucketInfo */);
1527 bs_initialized
= true;
1533 void set_versioning_disabled(bool status
) {
1534 versioning_disabled
= status
;
1537 bool versioning_enabled() {
1538 return (!versioning_disabled
&& bucket_info
.versioning_enabled());
1542 RGWRados::Object
*source
;
1544 struct GetObjState
{
1545 map
<rgw_pool
, librados::IoCtx
> io_ctxs
;
1547 librados::IoCtx
*cur_ioctx
{nullptr};
1549 rgw_raw_obj head_obj
;
1552 struct ConditionParams
{
1553 const ceph::real_time
*mod_ptr
;
1554 const ceph::real_time
*unmod_ptr
;
1555 bool high_precision_time
;
1556 uint32_t mod_zone_id
;
1557 uint64_t mod_pg_ver
;
1558 const char *if_match
;
1559 const char *if_nomatch
;
1562 mod_ptr(NULL
), unmod_ptr(NULL
), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
1563 if_match(NULL
), if_nomatch(NULL
) {}
1567 ceph::real_time
*lastmod
;
1569 map
<string
, bufferlist
> *attrs
;
1570 rgw_obj
*target_obj
;
1572 Params() : lastmod(nullptr), obj_size(nullptr), attrs(nullptr),
1573 target_obj(nullptr) {}
1576 explicit Read(RGWRados::Object
*_source
) : source(_source
) {}
1579 static int range_to_ofs(uint64_t obj_size
, int64_t &ofs
, int64_t &end
);
1580 int read(int64_t ofs
, int64_t end
, bufferlist
& bl
);
1581 int iterate(int64_t ofs
, int64_t end
, RGWGetDataCB
*cb
);
1582 int get_attr(const char *name
, bufferlist
& dest
);
1586 RGWRados::Object
*target
;
1589 ceph::real_time
*mtime
;
1590 map
<std::string
, bufferlist
>* rmattrs
;
1591 const bufferlist
*data
;
1592 RGWObjManifest
*manifest
;
1594 list
<rgw_obj_index_key
> *remove_objs
;
1595 ceph::real_time set_mtime
;
1597 RGWObjCategory category
;
1599 const char *if_match
;
1600 const char *if_nomatch
;
1601 std::optional
<uint64_t> olh_epoch
;
1602 ceph::real_time delete_at
;
1604 const string
*user_data
;
1605 rgw_zone_set
*zones_trace
;
1607 bool completeMultipart
;
1610 MetaParams() : mtime(NULL
), rmattrs(NULL
), data(NULL
), manifest(NULL
), ptag(NULL
),
1611 remove_objs(NULL
), category(RGWObjCategory::Main
), flags(0),
1612 if_match(NULL
), if_nomatch(NULL
), canceled(false), user_data(nullptr), zones_trace(nullptr),
1613 modify_tail(false), completeMultipart(false), appendable(false) {}
1616 explicit Write(RGWRados::Object
*_target
) : target(_target
) {}
1618 int _do_write_meta(uint64_t size
, uint64_t accounted_size
,
1619 map
<std::string
, bufferlist
>& attrs
,
1620 bool modify_tail
, bool assume_noent
,
1622 int write_meta(uint64_t size
, uint64_t accounted_size
,
1623 map
<std::string
, bufferlist
>& attrs
);
1624 int write_data(const char *data
, uint64_t ofs
, uint64_t len
, bool exclusive
);
1625 const req_state
* get_req_state() {
1626 return (req_state
*)target
->get_ctx().get_private();
1631 RGWRados::Object
*target
;
1633 struct DeleteParams
{
1634 rgw_user bucket_owner
;
1635 int versioning_status
;
1636 ACLOwner obj_owner
; /* needed for creation of deletion marker */
1638 string marker_version_id
;
1639 uint32_t bilog_flags
;
1640 list
<rgw_obj_index_key
> *remove_objs
;
1641 ceph::real_time expiration_time
;
1642 ceph::real_time unmod_since
;
1643 ceph::real_time mtime
; /* for setting delete marker mtime */
1644 bool high_precision_time
;
1645 rgw_zone_set
*zones_trace
;
1647 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL
), high_precision_time(false), zones_trace(nullptr) {}
1650 struct DeleteResult
{
1654 DeleteResult() : delete_marker(false) {}
1657 explicit Delete(RGWRados::Object
*_target
) : target(_target
) {}
1663 RGWRados::Object
*source
;
1667 RGWObjManifest manifest
;
1670 struct timespec mtime
;
1671 map
<string
, bufferlist
> attrs
;
1673 Result() : has_manifest(false), size(0) {}
1677 librados::IoCtx io_ctx
;
1678 librados::AioCompletion
*completion
;
1681 State() : completion(NULL
), ret(0) {}
1685 explicit Stat(RGWRados::Object
*_source
) : source(_source
) {}
1697 RGWBucketInfo bucket_info
;
1702 Bucket(RGWRados
*_store
, const RGWBucketInfo
& _bucket_info
) : store(_store
), bucket_info(_bucket_info
), bucket(bucket_info
.bucket
),
1703 shard_id(RGW_NO_SHARD
) {}
1704 RGWRados
*get_store() { return store
; }
1705 rgw_bucket
& get_bucket() { return bucket
; }
1706 RGWBucketInfo
& get_bucket_info() { return bucket_info
; }
1708 int update_bucket_id(const string
& new_bucket_id
);
1710 int get_shard_id() { return shard_id
; }
1711 void set_shard_id(int id
) {
1716 RGWRados::Bucket
*target
;
1719 uint16_t bilog_flags
{0};
1721 bool bs_initialized
{false};
1723 bool prepared
{false};
1724 rgw_zone_set
*zones_trace
{nullptr};
1728 bs
.init(target
->get_bucket(), obj
, nullptr /* no RGWBucketInfo */);
1732 bs_initialized
= true;
1736 void invalidate_bs() {
1737 bs_initialized
= false;
1740 int guard_reshard(BucketShard
**pbs
, std::function
<int(BucketShard
*)> call
);
1743 UpdateIndex(RGWRados::Bucket
*_target
, const rgw_obj
& _obj
) : target(_target
), obj(_obj
),
1744 bs(target
->get_store()) {
1745 blind
= (target
->get_bucket_info().index_type
== RGWBIType_Indexless
);
1748 int get_bucket_shard(BucketShard
**pbs
) {
1749 if (!bs_initialized
) {
1759 void set_bilog_flags(uint16_t flags
) {
1760 bilog_flags
= flags
;
1763 void set_zones_trace(rgw_zone_set
*_zones_trace
) {
1764 zones_trace
= _zones_trace
;
1767 int prepare(RGWModifyOp
, const string
*write_tag
);
1768 int complete(int64_t poolid
, uint64_t epoch
, uint64_t size
,
1769 uint64_t accounted_size
, ceph::real_time
& ut
,
1770 const string
& etag
, const string
& content_type
,
1771 const string
& storage_class
,
1772 bufferlist
*acl_bl
, RGWObjCategory category
,
1773 list
<rgw_obj_index_key
> *remove_objs
, const string
*user_data
= nullptr, bool appendable
= false);
1774 int complete_del(int64_t poolid
, uint64_t epoch
,
1775 ceph::real_time
& removed_mtime
, /* mtime of removed object */
1776 list
<rgw_obj_index_key
> *remove_objs
);
1779 const string
*get_optag() { return &optag
; }
1781 bool is_prepared() { return prepared
; }
1782 }; // class UpdateIndex
1786 // absolute maximum number of objects that
1787 // list_objects_(un)ordered can return
1788 static constexpr int64_t bucket_list_objects_absolute_max
= 25000;
1790 RGWRados::Bucket
*target
;
1791 rgw_obj_key next_marker
;
1793 int list_objects_ordered(int64_t max
,
1794 vector
<rgw_bucket_dir_entry
> *result
,
1795 map
<string
, bool> *common_prefixes
,
1796 bool *is_truncated
);
1797 int list_objects_unordered(int64_t max
,
1798 vector
<rgw_bucket_dir_entry
> *result
,
1799 map
<string
, bool> *common_prefixes
,
1800 bool *is_truncated
);
1808 rgw_obj_key end_marker
;
1811 RGWAccessListFilter
*filter
;
1813 bool allow_unordered
;
1818 list_versions(false),
1819 allow_unordered(false)
1823 explicit List(RGWRados::Bucket
*_target
) : target(_target
) {}
1825 int list_objects(int64_t max
,
1826 vector
<rgw_bucket_dir_entry
> *result
,
1827 map
<string
, bool> *common_prefixes
,
1828 bool *is_truncated
) {
1829 if (params
.allow_unordered
) {
1830 return list_objects_unordered(max
, result
, common_prefixes
,
1833 return list_objects_ordered(max
, result
, common_prefixes
,
1837 rgw_obj_key
& get_next_marker() {
1843 int on_last_entry_in_listing(RGWBucketInfo
& bucket_info
,
1844 const std::string
& obj_prefix
,
1845 const std::string
& obj_delim
,
1846 std::function
<int(const rgw_bucket_dir_entry
&)> handler
);
1848 bool swift_versioning_enabled(const RGWBucketInfo
& bucket_info
) const {
1849 return bucket_info
.has_swift_versioning() &&
1850 bucket_info
.swift_ver_location
.size();
1853 int swift_versioning_copy(RGWObjectCtx
& obj_ctx
, /* in/out */
1854 const rgw_user
& user
, /* in */
1855 RGWBucketInfo
& bucket_info
, /* in */
1856 rgw_obj
& obj
); /* in */
1857 int swift_versioning_restore(RGWSysObjectCtx
& sysobj_ctx
,
1858 RGWObjectCtx
& obj_ctx
, /* in/out */
1859 const rgw_user
& user
, /* in */
1860 RGWBucketInfo
& bucket_info
, /* in */
1861 rgw_obj
& obj
, /* in */
1862 bool& restored
); /* out */
1863 int copy_obj_to_remote_dest(RGWObjState
*astate
,
1864 map
<string
, bufferlist
>& src_attrs
,
1865 RGWRados::Object::Read
& read_op
,
1866 const rgw_user
& user_id
,
1868 ceph::real_time
*mtime
);
1872 ATTRSMOD_REPLACE
= 1,
1876 int rewrite_obj(RGWBucketInfo
& dest_bucket_info
, const rgw_obj
& obj
);
1878 int stat_remote_obj(RGWObjectCtx
& obj_ctx
,
1879 const rgw_user
& user_id
,
1881 const string
& source_zone
,
1883 RGWBucketInfo
& src_bucket_info
,
1884 real_time
*src_mtime
,
1886 const real_time
*mod_ptr
,
1887 const real_time
*unmod_ptr
,
1888 bool high_precision_time
,
1889 const char *if_match
,
1890 const char *if_nomatch
,
1891 map
<string
, bufferlist
> *pattrs
,
1892 map
<string
, string
> *pheaders
,
1897 int fetch_remote_obj(RGWObjectCtx
& obj_ctx
,
1898 const rgw_user
& user_id
,
1900 const string
& source_zone
,
1901 const rgw_obj
& dest_obj
,
1902 const rgw_obj
& src_obj
,
1903 RGWBucketInfo
& dest_bucket_info
,
1904 RGWBucketInfo
& src_bucket_info
,
1905 std::optional
<rgw_placement_rule
> dest_placement
,
1906 ceph::real_time
*src_mtime
,
1907 ceph::real_time
*mtime
,
1908 const ceph::real_time
*mod_ptr
,
1909 const ceph::real_time
*unmod_ptr
,
1910 bool high_precision_time
,
1911 const char *if_match
,
1912 const char *if_nomatch
,
1915 map
<string
, bufferlist
>& attrs
,
1916 RGWObjCategory category
,
1917 std::optional
<uint64_t> olh_epoch
,
1918 ceph::real_time delete_at
,
1921 void (*progress_cb
)(off_t
, void *),
1922 void *progress_data
,
1923 rgw_zone_set
*zones_trace
= nullptr,
1924 std::optional
<uint64_t>* bytes_transferred
= 0);
1927 * dest_obj: the object to copy into
1928 * src_obj: the object to copy from
1929 * attrs: usage depends on attrs_mod parameter
1930 * attrs_mod: the modification mode of the attrs, may have the following values:
1931 * ATTRSMOD_NONE - the attributes of the source object will be
1932 * copied without modifications, attrs parameter is ignored;
1933 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
1934 * parameter, source object attributes are not copied;
1935 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
1936 * are overwritten by values contained in attrs parameter.
1937 * Returns: 0 on success, -ERR# otherwise.
1939 int copy_obj(RGWObjectCtx
& obj_ctx
,
1940 const rgw_user
& user_id
,
1942 const string
& source_zone
,
1945 RGWBucketInfo
& dest_bucket_info
,
1946 RGWBucketInfo
& src_bucket_info
,
1947 const rgw_placement_rule
& dest_placement
,
1948 ceph::real_time
*src_mtime
,
1949 ceph::real_time
*mtime
,
1950 const ceph::real_time
*mod_ptr
,
1951 const ceph::real_time
*unmod_ptr
,
1952 bool high_precision_time
,
1953 const char *if_match
,
1954 const char *if_nomatch
,
1957 map
<std::string
, bufferlist
>& attrs
,
1958 RGWObjCategory category
,
1960 ceph::real_time delete_at
,
1964 void (*progress_cb
)(off_t
, void *),
1965 void *progress_data
);
1967 int copy_obj_data(RGWObjectCtx
& obj_ctx
,
1968 RGWBucketInfo
& dest_bucket_info
,
1969 const rgw_placement_rule
& dest_placement
,
1970 RGWRados::Object::Read
& read_op
, off_t end
,
1971 const rgw_obj
& dest_obj
,
1972 ceph::real_time
*mtime
,
1973 ceph::real_time set_mtime
,
1974 map
<string
, bufferlist
>& attrs
,
1976 ceph::real_time delete_at
,
1979 int transition_obj(RGWObjectCtx
& obj_ctx
,
1980 RGWBucketInfo
& bucket_info
,
1982 const rgw_placement_rule
& placement_rule
,
1983 const real_time
& mtime
,
1984 uint64_t olh_epoch
);
1986 int check_bucket_empty(RGWBucketInfo
& bucket_info
);
1990 * bucket: the name of the bucket to delete
1991 * Returns 0 on success, -ERR# otherwise.
1993 int delete_bucket(RGWBucketInfo
& bucket_info
, RGWObjVersionTracker
& objv_tracker
, bool check_empty
= true);
1995 void wakeup_meta_sync_shards(set
<int>& shard_ids
);
1996 void wakeup_data_sync_shards(const string
& source_zone
, map
<int, set
<string
> >& shard_ids
);
1998 RGWMetaSyncStatusManager
* get_meta_sync_manager();
1999 RGWDataSyncStatusManager
* get_data_sync_manager(const std::string
& source_zone
);
2001 int set_bucket_owner(rgw_bucket
& bucket
, ACLOwner
& owner
);
2002 int set_buckets_enabled(std::vector
<rgw_bucket
>& buckets
, bool enabled
);
2003 int bucket_suspended(rgw_bucket
& bucket
, bool *suspended
);
2005 /** Delete an object.*/
2006 int delete_obj(RGWObjectCtx
& obj_ctx
,
2007 const RGWBucketInfo
& bucket_owner
,
2008 const rgw_obj
& src_obj
,
2009 int versioning_status
,
2010 uint16_t bilog_flags
= 0,
2011 const ceph::real_time
& expiration_time
= ceph::real_time(),
2012 rgw_zone_set
*zones_trace
= nullptr);
2014 int delete_raw_obj(const rgw_raw_obj
& obj
);
2016 /** Remove an object from the bucket index */
2017 int delete_obj_index(const rgw_obj
& obj
, ceph::real_time mtime
);
2020 * Set an attr on an object.
2021 * bucket: name of the bucket holding the object
2022 * obj: name of the object to set the attr on
2023 * name: the attr to set
2024 * bl: the contents of the attr
2025 * Returns: 0 on success, -ERR# otherwise.
2027 int set_attr(void *ctx
, const RGWBucketInfo
& bucket_info
, rgw_obj
& obj
, const char *name
, bufferlist
& bl
);
2029 int set_attrs(void *ctx
, const RGWBucketInfo
& bucket_info
, rgw_obj
& obj
,
2030 map
<string
, bufferlist
>& attrs
,
2031 map
<string
, bufferlist
>* rmattrs
);
2033 int get_obj_state(RGWObjectCtx
*rctx
, const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, RGWObjState
**state
,
2034 bool follow_olh
, bool assume_noent
= false);
2035 int get_obj_state(RGWObjectCtx
*rctx
, const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, RGWObjState
**state
) {
2036 return get_obj_state(rctx
, bucket_info
, obj
, state
, true);
2039 using iterate_obj_cb
= int (*)(const rgw_raw_obj
&, off_t
, off_t
,
2040 off_t
, bool, RGWObjState
*, void*);
2042 int iterate_obj(RGWObjectCtx
& ctx
, const RGWBucketInfo
& bucket_info
,
2043 const rgw_obj
& obj
, off_t ofs
, off_t end
,
2044 uint64_t max_chunk_size
, iterate_obj_cb cb
, void *arg
);
2046 int flush_read_list(struct get_obj_data
*d
);
2048 int get_obj_iterate_cb(const rgw_raw_obj
& read_obj
, off_t obj_ofs
,
2049 off_t read_ofs
, off_t len
, bool is_head_obj
,
2050 RGWObjState
*astate
, void *arg
);
2052 void get_obj_aio_completion_cb(librados::completion_t cb
, void *arg
);
2055 * a simple object read without keeping state
2058 int raw_obj_stat(rgw_raw_obj
& obj
, uint64_t *psize
, ceph::real_time
*pmtime
, uint64_t *epoch
,
2059 map
<string
, bufferlist
> *attrs
, bufferlist
*first_chunk
,
2060 RGWObjVersionTracker
*objv_tracker
);
2062 int obj_operate(const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, librados::ObjectWriteOperation
*op
);
2063 int obj_operate(const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, librados::ObjectReadOperation
*op
);
2065 int guard_reshard(BucketShard
*bs
,
2066 const rgw_obj
& obj_instance
,
2067 const RGWBucketInfo
& bucket_info
,
2068 std::function
<int(BucketShard
*)> call
);
2069 int block_while_resharding(RGWRados::BucketShard
*bs
,
2070 string
*new_bucket_id
,
2071 const RGWBucketInfo
& bucket_info
,
2074 void bucket_index_guard_olh_op(RGWObjState
& olh_state
, librados::ObjectOperation
& op
);
2075 int olh_init_modification(const RGWBucketInfo
& bucket_info
, RGWObjState
& state
, const rgw_obj
& olh_obj
, string
*op_tag
);
2076 int olh_init_modification_impl(const RGWBucketInfo
& bucket_info
, RGWObjState
& state
, const rgw_obj
& olh_obj
, string
*op_tag
);
2077 int bucket_index_link_olh(const RGWBucketInfo
& bucket_info
, RGWObjState
& olh_state
,
2078 const rgw_obj
& obj_instance
, bool delete_marker
,
2079 const string
& op_tag
, struct rgw_bucket_dir_entry_meta
*meta
,
2081 ceph::real_time unmod_since
, bool high_precision_time
,
2082 rgw_zone_set
*zones_trace
= nullptr,
2083 bool log_data_change
= false);
2084 int bucket_index_unlink_instance(const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj_instance
, const string
& op_tag
, const string
& olh_tag
, uint64_t olh_epoch
, rgw_zone_set
*zones_trace
= nullptr);
2085 int bucket_index_read_olh_log(const RGWBucketInfo
& bucket_info
, RGWObjState
& state
, const rgw_obj
& obj_instance
, uint64_t ver_marker
,
2086 map
<uint64_t, vector
<rgw_bucket_olh_log_entry
> > *log
, bool *is_truncated
);
2087 int bucket_index_trim_olh_log(const RGWBucketInfo
& bucket_info
, RGWObjState
& obj_state
, const rgw_obj
& obj_instance
, uint64_t ver
);
2088 int bucket_index_clear_olh(const RGWBucketInfo
& bucket_info
, RGWObjState
& state
, const rgw_obj
& obj_instance
);
2089 int apply_olh_log(RGWObjectCtx
& ctx
, RGWObjState
& obj_state
, const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
,
2090 bufferlist
& obj_tag
, map
<uint64_t, vector
<rgw_bucket_olh_log_entry
> >& log
,
2091 uint64_t *plast_ver
, rgw_zone_set
*zones_trace
= nullptr);
2092 int update_olh(RGWObjectCtx
& obj_ctx
, RGWObjState
*state
, const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, rgw_zone_set
*zones_trace
= nullptr);
2093 int set_olh(RGWObjectCtx
& obj_ctx
, RGWBucketInfo
& bucket_info
, const rgw_obj
& target_obj
, bool delete_marker
, rgw_bucket_dir_entry_meta
*meta
,
2094 uint64_t olh_epoch
, ceph::real_time unmod_since
, bool high_precision_time
,
2095 rgw_zone_set
*zones_trace
= nullptr, bool log_data_change
= false);
2096 int repair_olh(RGWObjState
* state
, const RGWBucketInfo
& bucket_info
,
2097 const rgw_obj
& obj
);
2098 int unlink_obj_instance(RGWObjectCtx
& obj_ctx
, RGWBucketInfo
& bucket_info
, const rgw_obj
& target_obj
,
2099 uint64_t olh_epoch
, rgw_zone_set
*zones_trace
= nullptr);
2101 void check_pending_olh_entries(map
<string
, bufferlist
>& pending_entries
, map
<string
, bufferlist
> *rm_pending_entries
);
2102 int remove_olh_pending_entries(const RGWBucketInfo
& bucket_info
, RGWObjState
& state
, const rgw_obj
& olh_obj
, map
<string
, bufferlist
>& pending_attrs
);
2103 int follow_olh(const RGWBucketInfo
& bucket_info
, RGWObjectCtx
& ctx
, RGWObjState
*state
, const rgw_obj
& olh_obj
, rgw_obj
*target
);
2104 int get_olh(const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, RGWOLHInfo
*olh
);
2106 void gen_rand_obj_instance_name(rgw_obj_key
*target_key
);
2107 void gen_rand_obj_instance_name(rgw_obj
*target
);
2109 int update_containers_stats(map
<string
, RGWBucketEnt
>& m
);
2110 int append_async(rgw_raw_obj
& obj
, size_t size
, bufferlist
& bl
);
2113 void set_atomic(void *ctx
, rgw_obj
& obj
) {
2114 RGWObjectCtx
*rctx
= static_cast<RGWObjectCtx
*>(ctx
);
2115 rctx
->set_atomic(obj
);
2117 void set_prefetch_data(void *ctx
, const rgw_obj
& obj
) {
2118 RGWObjectCtx
*rctx
= static_cast<RGWObjectCtx
*>(ctx
);
2119 rctx
->set_prefetch_data(obj
);
2121 int decode_policy(bufferlist
& bl
, ACLOwner
*owner
);
2122 int get_bucket_stats(RGWBucketInfo
& bucket_info
, int shard_id
, string
*bucket_ver
, string
*master_ver
,
2123 map
<RGWObjCategory
, RGWStorageStats
>& stats
, string
*max_marker
, bool* syncstopped
= NULL
);
2124 int get_bucket_stats_async(RGWBucketInfo
& bucket_info
, int shard_id
, RGWGetBucketStats_CB
*cb
);
2125 int get_user_stats(const rgw_user
& user
, RGWStorageStats
& stats
);
2126 int get_user_stats_async(const rgw_user
& user
, RGWGetUserStats_CB
*cb
);
2127 void get_bucket_instance_obj(const rgw_bucket
& bucket
, rgw_raw_obj
& obj
);
2128 void get_bucket_meta_oid(const rgw_bucket
& bucket
, string
& oid
);
2130 int put_bucket_entrypoint_info(const string
& tenant_name
, const string
& bucket_name
, RGWBucketEntryPoint
& entry_point
,
2131 bool exclusive
, RGWObjVersionTracker
& objv_tracker
, ceph::real_time mtime
,
2132 map
<string
, bufferlist
> *pattrs
);
2133 int put_bucket_instance_info(RGWBucketInfo
& info
, bool exclusive
, ceph::real_time mtime
, map
<string
, bufferlist
> *pattrs
);
2134 int get_bucket_entrypoint_info(RGWSysObjectCtx
& obj_ctx
, const string
& tenant_name
, const string
& bucket_name
,
2135 RGWBucketEntryPoint
& entry_point
, RGWObjVersionTracker
*objv_tracker
,
2136 ceph::real_time
*pmtime
, map
<string
, bufferlist
> *pattrs
, rgw_cache_entry_info
*cache_info
= NULL
,
2137 boost::optional
<obj_version
> refresh_version
= boost::none
);
2138 int get_bucket_instance_info(RGWSysObjectCtx
& obj_ctx
, const string
& meta_key
, RGWBucketInfo
& info
, ceph::real_time
*pmtime
, map
<string
, bufferlist
> *pattrs
);
2139 int get_bucket_instance_info(RGWSysObjectCtx
& obj_ctx
, const rgw_bucket
& bucket
, RGWBucketInfo
& info
, ceph::real_time
*pmtime
, map
<string
, bufferlist
> *pattrs
);
2140 int get_bucket_instance_from_oid(RGWSysObjectCtx
& obj_ctx
, const string
& oid
, RGWBucketInfo
& info
, ceph::real_time
*pmtime
, map
<string
, bufferlist
> *pattrs
,
2141 rgw_cache_entry_info
*cache_info
= NULL
,
2142 boost::optional
<obj_version
> refresh_version
= boost::none
);
2144 int convert_old_bucket_info(RGWSysObjectCtx
& obj_ctx
, const string
& tenant_name
, const string
& bucket_name
);
2145 static void make_bucket_entry_name(const string
& tenant_name
, const string
& bucket_name
, string
& bucket_entry
);
2149 int _get_bucket_info(RGWSysObjectCtx
& obj_ctx
, const string
& tenant
,
2150 const string
& bucket_name
, RGWBucketInfo
& info
,
2152 map
<string
, bufferlist
> *pattrs
,
2153 boost::optional
<obj_version
> refresh_version
);
2156 bool call(std::string_view command
, const cmdmap_t
& cmdmap
,
2157 std::string_view format
,
2158 bufferlist
& out
) override final
;
2161 // `call_list` must iterate over all cache entries and call
2162 // `cache_list_dump_helper` with the supplied Formatter on any that
2163 // include `filter` as a substring.
2165 void call_list(const std::optional
<std::string
>& filter
,
2167 // `call_inspect` must look up the requested target and, if found,
2168 // dump it to the supplied Formatter and return true. If not found,
2169 // it must return false.
2171 bool call_inspect(const std::string
& target
, Formatter
* format
);
2173 // `call_erase` must erase the requested target and return true. If
2174 // the requested target does not exist, it should return false.
2175 bool call_erase(const std::string
& target
);
2177 // `call_zap` must erase the cache.
2181 int get_bucket_info(RGWSysObjectCtx
& obj_ctx
,
2182 const string
& tenant_name
, const string
& bucket_name
,
2183 RGWBucketInfo
& info
,
2184 ceph::real_time
*pmtime
, map
<string
, bufferlist
> *pattrs
= NULL
);
2186 // Returns 0 on successful refresh. Returns error code if there was
2187 // an error or the version stored on the OSD is the same as that
2188 // presented in the BucketInfo structure.
2190 int try_refresh_bucket_info(RGWBucketInfo
& info
,
2191 ceph::real_time
*pmtime
,
2192 map
<string
, bufferlist
> *pattrs
= nullptr);
2194 int put_linked_bucket_info(RGWBucketInfo
& info
, bool exclusive
, ceph::real_time mtime
, obj_version
*pep_objv
,
2195 map
<string
, bufferlist
> *pattrs
, bool create_entry_point
);
2197 int cls_obj_prepare_op(BucketShard
& bs
, RGWModifyOp op
, string
& tag
, rgw_obj
& obj
, uint16_t bilog_flags
, rgw_zone_set
*zones_trace
= nullptr);
2198 int cls_obj_complete_op(BucketShard
& bs
, const rgw_obj
& obj
, RGWModifyOp op
, string
& tag
, int64_t pool
, uint64_t epoch
,
2199 rgw_bucket_dir_entry
& ent
, RGWObjCategory category
, list
<rgw_obj_index_key
> *remove_objs
, uint16_t bilog_flags
, rgw_zone_set
*zones_trace
= nullptr);
2200 int cls_obj_complete_add(BucketShard
& bs
, const rgw_obj
& obj
, string
& tag
, int64_t pool
, uint64_t epoch
, rgw_bucket_dir_entry
& ent
,
2201 RGWObjCategory category
, list
<rgw_obj_index_key
> *remove_objs
, uint16_t bilog_flags
, rgw_zone_set
*zones_trace
= nullptr);
2202 int cls_obj_complete_del(BucketShard
& bs
, string
& tag
, int64_t pool
, uint64_t epoch
, rgw_obj
& obj
,
2203 ceph::real_time
& removed_mtime
, list
<rgw_obj_index_key
> *remove_objs
, uint16_t bilog_flags
, rgw_zone_set
*zones_trace
= nullptr);
2204 int cls_obj_complete_cancel(BucketShard
& bs
, string
& tag
, rgw_obj
& obj
, uint16_t bilog_flags
, rgw_zone_set
*zones_trace
= nullptr);
2205 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo
& bucket_info
, uint64_t timeout
);
2206 int cls_bucket_list_ordered(RGWBucketInfo
& bucket_info
, int shard_id
,
2207 const rgw_obj_index_key
& start
,
2208 const string
& prefix
,
2209 uint32_t num_entries
, bool list_versions
,
2210 map
<string
, rgw_bucket_dir_entry
>& m
,
2212 rgw_obj_index_key
*last_entry
,
2213 bool (*force_check_filter
)(const string
& name
) = nullptr);
2214 int cls_bucket_list_unordered(RGWBucketInfo
& bucket_info
, int shard_id
,
2215 const rgw_obj_index_key
& start
,
2216 const string
& prefix
,
2217 uint32_t num_entries
, bool list_versions
,
2218 vector
<rgw_bucket_dir_entry
>& ent_list
,
2219 bool *is_truncated
, rgw_obj_index_key
*last_entry
,
2220 bool (*force_check_filter
)(const string
& name
) = nullptr);
2221 int cls_bucket_head(const RGWBucketInfo
& bucket_info
, int shard_id
, vector
<rgw_bucket_dir_header
>& headers
, map
<int, string
> *bucket_instance_ids
= NULL
);
2222 int cls_bucket_head_async(const RGWBucketInfo
& bucket_info
, int shard_id
, RGWGetDirHeader_CB
*ctx
, int *num_aio
);
2223 int list_bi_log_entries(RGWBucketInfo
& bucket_info
, int shard_id
, string
& marker
, uint32_t max
, std::list
<rgw_bi_log_entry
>& result
, bool *truncated
);
2224 int trim_bi_log_entries(RGWBucketInfo
& bucket_info
, int shard_id
, string
& marker
, string
& end_marker
);
2225 int resync_bi_log_entries(RGWBucketInfo
& bucket_info
, int shard_id
);
2226 int stop_bi_log_entries(RGWBucketInfo
& bucket_info
, int shard_id
);
2227 int get_bi_log_status(RGWBucketInfo
& bucket_info
, int shard_id
, map
<int, string
>& max_marker
);
2229 int bi_get_instance(const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, rgw_bucket_dir_entry
*dirent
);
2230 int bi_get_olh(const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, rgw_bucket_olh_entry
*olh
);
2231 int bi_get(const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
, BIIndexType index_type
, rgw_cls_bi_entry
*entry
);
2232 void bi_put(librados::ObjectWriteOperation
& op
, BucketShard
& bs
, rgw_cls_bi_entry
& entry
);
2233 int bi_put(BucketShard
& bs
, rgw_cls_bi_entry
& entry
);
2234 int bi_put(rgw_bucket
& bucket
, rgw_obj
& obj
, rgw_cls_bi_entry
& entry
);
2235 int bi_list(rgw_bucket
& bucket
, int shard_id
, const string
& filter_obj
, const string
& marker
, uint32_t max
, list
<rgw_cls_bi_entry
> *entries
, bool *is_truncated
);
2236 int bi_list(BucketShard
& bs
, const string
& filter_obj
, const string
& marker
, uint32_t max
, list
<rgw_cls_bi_entry
> *entries
, bool *is_truncated
);
2237 int bi_list(rgw_bucket
& bucket
, const string
& obj_name
, const string
& marker
, uint32_t max
,
2238 list
<rgw_cls_bi_entry
> *entries
, bool *is_truncated
);
2239 int bi_remove(BucketShard
& bs
);
2241 int cls_obj_usage_log_add(const string
& oid
, rgw_usage_log_info
& info
);
2242 int cls_obj_usage_log_read(const string
& oid
, const string
& user
, const string
& bucket
, uint64_t start_epoch
,
2243 uint64_t end_epoch
, uint32_t max_entries
, string
& read_iter
, map
<rgw_user_bucket
,
2244 rgw_usage_log_entry
>& usage
, bool *is_truncated
);
2245 int cls_obj_usage_log_trim(const string
& oid
, const string
& user
, const string
& bucket
, uint64_t start_epoch
,
2246 uint64_t end_epoch
);
2247 int cls_obj_usage_log_clear(string
& oid
);
2249 int key_to_shard_id(const string
& key
, int max_shards
);
2250 void shard_name(const string
& prefix
, unsigned max_shards
, const string
& key
, string
& name
, int *shard_id
);
2251 void shard_name(const string
& prefix
, unsigned max_shards
, const string
& section
, const string
& key
, string
& name
);
2252 void shard_name(const string
& prefix
, unsigned shard_id
, string
& name
);
2253 int get_target_shard_id(const RGWBucketInfo
& bucket_info
, const string
& obj_key
, int *shard_id
);
2254 void time_log_prepare_entry(cls_log_entry
& entry
, const ceph::real_time
& ut
, const string
& section
, const string
& key
, bufferlist
& bl
);
2255 int time_log_add_init(librados::IoCtx
& io_ctx
);
2256 int time_log_add(const string
& oid
, list
<cls_log_entry
>& entries
,
2257 librados::AioCompletion
*completion
, bool monotonic_inc
= true);
2258 int time_log_add(const string
& oid
, const ceph::real_time
& ut
, const string
& section
, const string
& key
, bufferlist
& bl
);
2259 int time_log_list(const string
& oid
, const ceph::real_time
& start_time
, const ceph::real_time
& end_time
,
2260 int max_entries
, list
<cls_log_entry
>& entries
,
2261 const string
& marker
, string
*out_marker
, bool *truncated
);
2262 int time_log_info(const string
& oid
, cls_log_header
*header
);
2263 int time_log_info_async(librados::IoCtx
& io_ctx
, const string
& oid
, cls_log_header
*header
, librados::AioCompletion
*completion
);
2264 int time_log_trim(const string
& oid
, const ceph::real_time
& start_time
, const ceph::real_time
& end_time
,
2265 const string
& from_marker
, const string
& to_marker
,
2266 librados::AioCompletion
*completion
= nullptr);
2268 string
objexp_hint_get_shardname(int shard_num
);
2269 int objexp_key_shard(const rgw_obj_index_key
& key
);
2270 void objexp_get_shard(int shard_num
,
2271 string
& shard
); /* out */
2272 int objexp_hint_add(const ceph::real_time
& delete_at
,
2273 const string
& tenant_name
,
2274 const string
& bucket_name
,
2275 const string
& bucket_id
,
2276 const rgw_obj_index_key
& obj_key
);
2277 int objexp_hint_list(const string
& oid
,
2278 const ceph::real_time
& start_time
,
2279 const ceph::real_time
& end_time
,
2280 const int max_entries
,
2281 const string
& marker
,
2282 list
<cls_timeindex_entry
>& entries
, /* out */
2283 string
*out_marker
, /* out */
2284 bool *truncated
); /* out */
2285 int objexp_hint_parse(cls_timeindex_entry
&ti_entry
,
2286 objexp_hint_entry
& hint_entry
); /* out */
2287 int objexp_hint_trim(const string
& oid
,
2288 const ceph::real_time
& start_time
,
2289 const ceph::real_time
& end_time
,
2290 const string
& from_marker
= std::string(),
2291 const string
& to_marker
= std::string());
2293 int lock_exclusive(const rgw_pool
& pool
, const string
& oid
, ceph::timespan
& duration
, string
& zone_id
, string
& owner_id
);
2294 int unlock(const rgw_pool
& pool
, const string
& oid
, string
& zone_id
, string
& owner_id
);
2296 void update_gc_chain(rgw_obj
& head_obj
, RGWObjManifest
& manifest
, cls_rgw_obj_chain
*chain
);
2297 int send_chain_to_gc(cls_rgw_obj_chain
& chain
, const string
& tag
, bool sync
);
2298 int gc_operate(string
& oid
, librados::ObjectWriteOperation
*op
);
2299 int gc_aio_operate(string
& oid
, librados::ObjectWriteOperation
*op
, librados::AioCompletion
**pc
= nullptr);
2300 int gc_operate(string
& oid
, librados::ObjectReadOperation
*op
, bufferlist
*pbl
);
2302 int list_gc_objs(int *index
, string
& marker
, uint32_t max
, bool expired_only
, std::list
<cls_rgw_gc_obj_info
>& result
, bool *truncated
);
2303 int process_gc(bool expired_only
);
2304 bool process_expire_objects();
2305 int defer_gc(void *ctx
, const RGWBucketInfo
& bucket_info
, const rgw_obj
& obj
);
2308 int list_lc_progress(const string
& marker
, uint32_t max_entries
, map
<string
, int> *progress_map
);
2310 int bucket_check_index(RGWBucketInfo
& bucket_info
,
2311 map
<RGWObjCategory
, RGWStorageStats
> *existing_stats
,
2312 map
<RGWObjCategory
, RGWStorageStats
> *calculated_stats
);
2313 int bucket_rebuild_index(RGWBucketInfo
& bucket_info
);
2314 int bucket_set_reshard(const RGWBucketInfo
& bucket_info
, const cls_rgw_bucket_instance_entry
& entry
);
2315 int remove_objs_from_index(RGWBucketInfo
& bucket_info
, list
<rgw_obj_index_key
>& oid_list
);
2316 int move_rados_obj(librados::IoCtx
& src_ioctx
,
2317 const string
& src_oid
, const string
& src_locator
,
2318 librados::IoCtx
& dst_ioctx
,
2319 const string
& dst_oid
, const string
& dst_locator
);
2320 int fix_head_obj_locator(const RGWBucketInfo
& bucket_info
, bool copy_obj
, bool remove_bad
, rgw_obj_key
& key
);
2321 int fix_tail_obj_locator(const RGWBucketInfo
& bucket_info
, rgw_obj_key
& key
, bool fix
, bool *need_fix
);
2323 int cls_user_get_header(const string
& user_id
, cls_user_header
*header
);
2324 int cls_user_reset_stats(const string
& user_id
);
2325 int cls_user_get_header_async(const string
& user_id
, RGWGetUserHeader_CB
*ctx
);
2326 int cls_user_sync_bucket_stats(rgw_raw_obj
& user_obj
, const RGWBucketInfo
& bucket_info
);
2327 int cls_user_list_buckets(rgw_raw_obj
& obj
,
2328 const string
& in_marker
,
2329 const string
& end_marker
,
2331 list
<cls_user_bucket_entry
>& entries
,
2334 int cls_user_add_bucket(rgw_raw_obj
& obj
, const cls_user_bucket_entry
& entry
);
2335 int cls_user_update_buckets(rgw_raw_obj
& obj
, list
<cls_user_bucket_entry
>& entries
, bool add
);
2336 int cls_user_complete_stats_sync(rgw_raw_obj
& obj
);
2337 int complete_sync_user_stats(const rgw_user
& user_id
);
2338 int cls_user_remove_bucket(rgw_raw_obj
& obj
, const cls_user_bucket
& bucket
);
2339 int cls_user_get_bucket_stats(const rgw_bucket
& bucket
, cls_user_bucket_entry
& entry
);
2341 int check_quota(const rgw_user
& bucket_owner
, rgw_bucket
& bucket
,
2342 RGWQuotaInfo
& user_quota
, RGWQuotaInfo
& bucket_quota
, uint64_t obj_size
, bool check_size_only
= false);
2344 int check_bucket_shards(const RGWBucketInfo
& bucket_info
, const rgw_bucket
& bucket
,
2345 RGWQuotaInfo
& bucket_quota
);
2347 int add_bucket_to_reshard(const RGWBucketInfo
& bucket_info
, uint32_t new_num_shards
);
2349 uint64_t instance_id();
2351 librados::Rados
* get_rados_handle();
2353 int delete_raw_obj_aio(const rgw_raw_obj
& obj
, list
<librados::AioCompletion
*>& handles
);
2354 int delete_obj_aio(const rgw_obj
& obj
, RGWBucketInfo
& info
, RGWObjState
*astate
,
2355 list
<librados::AioCompletion
*>& handles
, bool keep_index_consistent
);
2357 /* mfa/totp stuff */
2359 void prepare_mfa_write(librados::ObjectWriteOperation
*op
,
2360 RGWObjVersionTracker
*objv_tracker
,
2361 const ceph::real_time
& mtime
);
2363 string
get_mfa_oid(const rgw_user
& user
);
2364 int get_mfa_ref(const rgw_user
& user
, rgw_rados_ref
*ref
);
2365 int check_mfa(const rgw_user
& user
, const string
& otp_id
, const string
& pin
);
2366 int create_mfa(const rgw_user
& user
, const rados::cls::otp::otp_info_t
& config
,
2367 RGWObjVersionTracker
*objv_tracker
, const ceph::real_time
& mtime
);
2368 int remove_mfa(const rgw_user
& user
, const string
& id
,
2369 RGWObjVersionTracker
*objv_tracker
, const ceph::real_time
& mtime
);
2370 int get_mfa(const rgw_user
& user
, const string
& id
, rados::cls::otp::otp_info_t
*result
);
2371 int list_mfa(const rgw_user
& user
, list
<rados::cls::otp::otp_info_t
> *result
);
2372 int otp_get_current_time(const rgw_user
& user
, ceph::real_time
*result
);
2374 /* mfa interfaces used by metadata engine */
2375 int set_mfa(const string
& oid
, const list
<rados::cls::otp::otp_info_t
>& entries
, bool reset_obj
,
2376 RGWObjVersionTracker
*objv_tracker
, const ceph::real_time
& mtime
);
2377 int list_mfa(const string
& oid
, list
<rados::cls::otp::otp_info_t
> *result
,
2378 RGWObjVersionTracker
*objv_tracker
, ceph::real_time
*pmtime
);
2381 * This is a helper method, it generates a list of bucket index objects with the given
2382 * bucket base oid and number of shards.
2384 * bucket_oid_base [in] - base name of the bucket index object;
2385 * num_shards [in] - number of bucket index object shards.
2386 * bucket_objs [out] - filled by this method, a list of bucket index objects.
2388 void get_bucket_index_objects(const string
& bucket_oid_base
, uint32_t num_shards
,
2389 map
<int, string
>& bucket_objs
, int shard_id
= -1);
2392 * Get the bucket index object with the given base bucket index object and object key,
2393 * and the number of bucket index shards.
2395 * bucket_oid_base [in] - bucket object base name.
2396 * obj_key [in] - object key.
2397 * num_shards [in] - number of bucket index shards.
2398 * hash_type [in] - type of hash to find the shard ID.
2399 * bucket_obj [out] - the bucket index object for the given object.
2401 * Return 0 on success, a failure code otherwise.
2403 int get_bucket_index_object(const string
& bucket_oid_base
, const string
& obj_key
,
2404 uint32_t num_shards
, RGWBucketInfo::BIShardsHashType hash_type
, string
*bucket_obj
, int *shard
);
2406 void get_bucket_index_object(const string
& bucket_oid_base
, uint32_t num_shards
,
2407 int shard_id
, string
*bucket_obj
);
2410 * Check the actual on-disk state of the object specified
2411 * by list_state, and fill in the time and size of object.
2412 * Then append any changes to suggested_updates for
2413 * the rgw class' dir_suggest_changes function.
2415 * Note that this can maul list_state; don't use it afterwards. Also
2416 * it expects object to already be filled in from list_state; it only
2417 * sets the size and mtime.
2419 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
2420 * and -errno on other failures. (-ENOENT is not a failure, and it
2421 * will encode that info as a suggested update.)
2423 int check_disk_state(librados::IoCtx io_ctx
,
2424 const RGWBucketInfo
& bucket_info
,
2425 rgw_bucket_dir_entry
& list_state
,
2426 rgw_bucket_dir_entry
& object
,
2427 bufferlist
& suggested_updates
);
2430 * Init pool iteration
2431 * pool: pool to use for the ctx initialization
2432 * ctx: context object to use for the iteration
2433 * Returns: 0 on success, -ERR# otherwise.
2435 int pool_iterate_begin(const rgw_pool
& pool
, RGWPoolIterCtx
& ctx
);
2438 * Init pool iteration
2440 * cursor: position to start iteration
2441 * ctx: context object to use for the iteration
2442 * Returns: 0 on success, -ERR# otherwise.
2444 int pool_iterate_begin(const rgw_pool
& pool
, const string
& cursor
, RGWPoolIterCtx
& ctx
);
2447 * Get pool iteration position
2448 * ctx: context object to use for the iteration
2449 * Returns: string representation of position
2451 string
pool_iterate_get_cursor(RGWPoolIterCtx
& ctx
);
2454 * Iterate over pool return object names, use optional filter
2455 * ctx: iteration context, initialized with pool_iterate_begin()
2456 * num: max number of objects to return
2457 * objs: a vector that the results will append into
2458 * is_truncated: if not NULL, will hold true iff iteration is complete
2459 * filter: if not NULL, will be used to filter returned objects
2460 * Returns: 0 on success, -ERR# otherwise.
2462 int pool_iterate(RGWPoolIterCtx
& ctx
, uint32_t num
, vector
<rgw_bucket_dir_entry
>& objs
,
2463 bool *is_truncated
, RGWAccessListFilter
*filter
);
2465 uint64_t next_bucket_id();
2468 class RGWStoreManager
{
2470 RGWStoreManager() {}
2471 static RGWRados
*get_storage(CephContext
*cct
, bool use_gc_thread
, bool use_lc_thread
, bool quota_threads
,
2472 bool run_sync_thread
, bool run_reshard_thread
, bool use_cache
= true) {
2473 RGWRados
*store
= init_storage_provider(cct
, use_gc_thread
, use_lc_thread
, quota_threads
, run_sync_thread
,
2474 run_reshard_thread
, use_cache
);
2477 static RGWRados
*get_raw_storage(CephContext
*cct
) {
2478 RGWRados
*store
= init_raw_storage_provider(cct
);
2481 static RGWRados
*init_storage_provider(CephContext
*cct
, bool use_gc_thread
, bool use_lc_thread
, bool quota_threads
, bool run_sync_thread
, bool run_reshard_thread
, bool use_metadata_cache
);
2482 static RGWRados
*init_raw_storage_provider(CephContext
*cct
);
2483 static void close_storage(RGWRados
*store
);
2494 RGWMPObj(const string
& _oid
, const string
& _upload_id
) {
2495 init(_oid
, _upload_id
, _upload_id
);
2497 void init(const string
& _oid
, const string
& _upload_id
) {
2498 init(_oid
, _upload_id
, _upload_id
);
2500 void init(const string
& _oid
, const string
& _upload_id
, const string
& part_unique_str
) {
2506 upload_id
= _upload_id
;
2508 meta
= prefix
+ upload_id
+ MP_META_SUFFIX
;
2509 prefix
.append(part_unique_str
);
2511 const string
& get_meta() const { return meta
; }
2512 string
get_part(int num
) const {
2514 snprintf(buf
, 16, ".%d", num
);
2519 string
get_part(const string
& part
) const {
2525 const string
& get_upload_id() const {
2528 const string
& get_key() const {
2531 bool from_meta(string
& meta
) {
2532 int end_pos
= meta
.rfind('.'); // search for ".meta"
2535 int mid_pos
= meta
.rfind('.', end_pos
- 1); // <key>.<upload_id>
2538 oid
= meta
.substr(0, mid_pos
);
2539 upload_id
= meta
.substr(mid_pos
+ 1, end_pos
- mid_pos
- 1);
2540 init(oid
, upload_id
, upload_id
);
2549 }; // class RGWMPObj
2552 class RGWRadosThread
{
2553 class Worker
: public Thread
{
2555 RGWRadosThread
*processor
;
2560 Mutex::Locker
l(lock
);
2564 void wait_interval(const utime_t
& wait_time
) {
2565 Mutex::Locker
l(lock
);
2566 cond
.WaitInterval(lock
, wait_time
);
2570 Worker(CephContext
*_cct
, RGWRadosThread
*_p
) : cct(_cct
), processor(_p
), lock("RGWRadosThread::Worker") {}
2571 void *entry() override
;
2573 Mutex::Locker
l(lock
);
2584 std::atomic
<bool> down_flag
= { false };
2588 virtual uint64_t interval_msec() = 0;
2589 virtual void stop_process() {}
2591 RGWRadosThread(RGWRados
*_store
, const string
& thread_name
= "radosgw")
2592 : worker(NULL
), cct(_store
->ctx()), store(_store
), thread_name(thread_name
) {}
2593 virtual ~RGWRadosThread() {
2597 virtual int init() { return 0; }
2598 virtual int process() = 0;
2600 bool going_down() { return down_flag
; }