]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/rgw_rados.h
bump version to 12.2.12-pve1
[ceph.git] / ceph / src / rgw / rgw_rados.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#ifndef CEPH_RGWRADOS_H
5#define CEPH_RGWRADOS_H
6
7#include <functional>
8
9#include "include/rados/librados.hpp"
10#include "include/Context.h"
3a9019d9 11#include "common/admin_socket.h"
7c673cae
FG
12#include "common/RefCountedObj.h"
13#include "common/RWLock.h"
14#include "common/ceph_time.h"
15#include "common/lru_map.h"
16#include "rgw_common.h"
17#include "cls/rgw/cls_rgw_types.h"
18#include "cls/version/cls_version_types.h"
19#include "cls/log/cls_log_types.h"
20#include "cls/statelog/cls_statelog_types.h"
21#include "cls/timeindex/cls_timeindex_types.h"
22#include "rgw_log.h"
23#include "rgw_metadata.h"
24#include "rgw_meta_sync_status.h"
25#include "rgw_period_puller.h"
26#include "rgw_sync_module.h"
b32b8144 27#include "rgw_sync_log_trim.h"
7c673cae
FG
28
29class RGWWatcher;
30class SafeTimer;
31class ACLOwner;
32class RGWGC;
33class RGWMetaNotifier;
34class RGWDataNotifier;
35class RGWLC;
36class RGWObjectExpirer;
37class RGWMetaSyncProcessorThread;
38class RGWDataSyncProcessorThread;
39class RGWSyncLogTrimThread;
40class RGWRESTConn;
41struct RGWZoneGroup;
42struct RGWZoneParams;
31f18b77
FG
43class RGWReshard;
44class RGWReshardWait;
7c673cae
FG
45
46/* flags for put_obj_meta() */
47#define PUT_OBJ_CREATE 0x01
48#define PUT_OBJ_EXCL 0x02
49#define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
50
51#define RGW_OBJ_NS_MULTIPART "multipart"
52#define RGW_OBJ_NS_SHADOW "shadow"
53
54#define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
55
56#define RGW_NO_SHARD -1
57
31f18b77
FG
58#define RGW_SHARDS_PRIME_0 7877
59#define RGW_SHARDS_PRIME_1 65521
60
1adf2230 61// only called by rgw_shard_id and rgw_bucket_shard_index
31f18b77
FG
62static inline int rgw_shards_mod(unsigned hval, int max_shards)
63{
64 if (max_shards <= RGW_SHARDS_PRIME_0) {
65 return hval % RGW_SHARDS_PRIME_0 % max_shards;
66 }
67 return hval % RGW_SHARDS_PRIME_1 % max_shards;
68}
69
1adf2230
AA
70// used for logging and tagging
71static inline int rgw_shard_id(const string& key, int max_shards)
31f18b77 72{
1adf2230
AA
73 return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()),
74 max_shards);
75}
76
77// used for bucket indices
78static inline uint32_t rgw_bucket_shard_index(const std::string& key,
79 int num_shards) {
80 uint32_t sid = ceph_str_hash_linux(key.c_str(), key.size());
81 uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
82 return rgw_shards_mod(sid2, num_shards);
31f18b77
FG
83}
84
85static inline int rgw_shards_max()
86{
87 return RGW_SHARDS_PRIME_1;
88}
7c673cae
FG
89
90static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
91{
92 if (bucket.marker.empty() || orig_oid.empty()) {
93 oid = orig_oid;
94 } else {
95 oid = bucket.marker;
96 oid.append("_");
97 oid.append(orig_oid);
98 }
99}
100
101static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
102{
103 const rgw_bucket& bucket = obj.bucket;
104 prepend_bucket_marker(bucket, obj.get_oid(), oid);
105 const string& loc = obj.key.get_loc();
106 if (!loc.empty()) {
107 prepend_bucket_marker(bucket, loc, locator);
108 } else {
109 locator.clear();
110 }
111}
112
113int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool, librados::IoCtx& ioctx, bool create = false);
114
115int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
116
117static inline bool rgw_raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj)
118{
119 ssize_t pos = raw_obj.oid.find('_');
120 if (pos < 0) {
121 return false;
122 }
123
124 if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) {
125 return false;
126 }
127 obj->bucket = bucket;
128
129 return true;
130}
131
132struct rgw_bucket_placement {
133 string placement_rule;
134 rgw_bucket bucket;
135
136 void dump(Formatter *f) const;
137};
138
139class rgw_obj_select {
140 string placement_rule;
141 rgw_obj obj;
142 rgw_raw_obj raw_obj;
143 bool is_raw;
144
145public:
146 rgw_obj_select() : is_raw(false) {}
147 rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {}
148 rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {}
149 rgw_obj_select(const rgw_obj_select& rhs) {
c07f9fc5 150 placement_rule = rhs.placement_rule;
7c673cae
FG
151 is_raw = rhs.is_raw;
152 if (is_raw) {
153 raw_obj = rhs.raw_obj;
154 } else {
155 obj = rhs.obj;
156 }
157 }
158
159 rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const;
160 rgw_raw_obj get_raw_obj(RGWRados *store) const;
161
162 rgw_obj_select& operator=(const rgw_obj& rhs) {
163 obj = rhs;
164 is_raw = false;
165 return *this;
166 }
167
168 rgw_obj_select& operator=(const rgw_raw_obj& rhs) {
169 raw_obj = rhs;
170 is_raw = true;
171 return *this;
172 }
173
174 void set_placement_rule(const string& rule) {
175 placement_rule = rule;
176 }
177};
178
179struct compression_block {
180 uint64_t old_ofs;
181 uint64_t new_ofs;
182 uint64_t len;
183
184 void encode(bufferlist& bl) const {
185 ENCODE_START(1, 1, bl);
186 ::encode(old_ofs, bl);
187 ::encode(new_ofs, bl);
188 ::encode(len, bl);
189 ENCODE_FINISH(bl);
190 }
191
192 void decode(bufferlist::iterator& bl) {
193 DECODE_START(1, bl);
194 ::decode(old_ofs, bl);
195 ::decode(new_ofs, bl);
196 ::decode(len, bl);
197 DECODE_FINISH(bl);
198 }
199};
200WRITE_CLASS_ENCODER(compression_block)
201
202struct RGWCompressionInfo {
203 string compression_type;
204 uint64_t orig_size;
205 vector<compression_block> blocks;
206
207 RGWCompressionInfo() : compression_type("none"), orig_size(0) {}
208 RGWCompressionInfo(const RGWCompressionInfo& cs_info) : compression_type(cs_info.compression_type),
209 orig_size(cs_info.orig_size),
210 blocks(cs_info.blocks) {}
211
212 void encode(bufferlist& bl) const {
213 ENCODE_START(1, 1, bl);
214 ::encode(compression_type, bl);
215 ::encode(orig_size, bl);
216 ::encode(blocks, bl);
217 ENCODE_FINISH(bl);
218 }
219
220 void decode(bufferlist::iterator& bl) {
221 DECODE_START(1, bl);
222 ::decode(compression_type, bl);
223 ::decode(orig_size, bl);
224 ::decode(blocks, bl);
225 DECODE_FINISH(bl);
226 }
227};
228WRITE_CLASS_ENCODER(RGWCompressionInfo)
229
230int rgw_compression_info_from_attrset(map<string, bufferlist>& attrs, bool& need_decompress, RGWCompressionInfo& cs_info);
231
232struct RGWOLHInfo {
233 rgw_obj target;
234 bool removed;
235
236 RGWOLHInfo() : removed(false) {}
237
238 void encode(bufferlist& bl) const {
239 ENCODE_START(1, 1, bl);
240 ::encode(target, bl);
241 ::encode(removed, bl);
242 ENCODE_FINISH(bl);
243 }
244
245 void decode(bufferlist::iterator& bl) {
246 DECODE_START(1, bl);
247 ::decode(target, bl);
248 ::decode(removed, bl);
249 DECODE_FINISH(bl);
250 }
251 static void generate_test_instances(list<RGWOLHInfo*>& o);
252 void dump(Formatter *f) const;
253};
254WRITE_CLASS_ENCODER(RGWOLHInfo)
255
256struct RGWOLHPendingInfo {
257 ceph::real_time time;
258
259 RGWOLHPendingInfo() {}
260
261 void encode(bufferlist& bl) const {
262 ENCODE_START(1, 1, bl);
263 ::encode(time, bl);
264 ENCODE_FINISH(bl);
265 }
266
267 void decode(bufferlist::iterator& bl) {
268 DECODE_START(1, bl);
269 ::decode(time, bl);
270 DECODE_FINISH(bl);
271 }
272
273 void dump(Formatter *f) const;
274};
275WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
276
277struct RGWUsageBatch {
278 map<ceph::real_time, rgw_usage_log_entry> m;
279
280 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
281 bool exists = m.find(t) != m.end();
282 *account = !exists;
283 m[t].aggregate(entry);
284 }
285};
286
287struct RGWUsageIter {
288 string read_iter;
289 uint32_t index;
290
291 RGWUsageIter() : index(0) {}
292};
293
294class RGWGetDataCB {
295protected:
296 uint64_t extra_data_len;
297public:
298 virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
299 RGWGetDataCB() : extra_data_len(0) {}
300 virtual ~RGWGetDataCB() {}
301 virtual void set_extra_data_len(uint64_t len) {
302 extra_data_len = len;
303 }
304 /**
305 * Flushes any cached data. Used by RGWGetObjFilter.
306 * Return logic same as handle_data.
307 */
308 virtual int flush() {
309 return 0;
310 }
311 /**
312 * Allows to extend fetch range of RGW object. Used by RGWGetObjFilter.
313 */
314 virtual int fixup_range(off_t& bl_ofs, off_t& bl_end) {
315 return 0;
316 }
317};
318
319class RGWAccessListFilter {
320public:
321 virtual ~RGWAccessListFilter() {}
322 virtual bool filter(string& name, string& key) = 0;
323};
324
325struct RGWCloneRangeInfo {
326 rgw_obj src;
327 off_t src_ofs;
328 off_t dst_ofs;
329 uint64_t len;
330};
331
332struct RGWObjManifestPart {
333 rgw_obj loc; /* the object where the data is located */
334 uint64_t loc_ofs; /* the offset at that object where the data is located */
335 uint64_t size; /* the part size */
336
337 RGWObjManifestPart() : loc_ofs(0), size(0) {}
338
339 void encode(bufferlist& bl) const {
340 ENCODE_START(2, 2, bl);
341 ::encode(loc, bl);
342 ::encode(loc_ofs, bl);
343 ::encode(size, bl);
344 ENCODE_FINISH(bl);
345 }
346
347 void decode(bufferlist::iterator& bl) {
348 DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
349 ::decode(loc, bl);
350 ::decode(loc_ofs, bl);
351 ::decode(size, bl);
352 DECODE_FINISH(bl);
353 }
354
355 void dump(Formatter *f) const;
356 static void generate_test_instances(list<RGWObjManifestPart*>& o);
357};
358WRITE_CLASS_ENCODER(RGWObjManifestPart)
359
360/*
361 The manifest defines a set of rules for structuring the object parts.
362 There are a few terms to note:
363 - head: the head part of the object, which is the part that contains
364 the first chunk of data. An object might not have a head (as in the
365 case of multipart-part objects).
366 - stripe: data portion of a single rgw object that resides on a single
367 rados object.
368 - part: a collection of stripes that make a contiguous part of an
369 object. A regular object will only have one part (although might have
370 many stripes), a multipart object might have many parts. Each part
371 has a fixed stripe size, although the last stripe of a part might
372 be smaller than that. Consecutive parts may be merged if their stripe
373 value is the same.
374*/
375
376struct RGWObjManifestRule {
377 uint32_t start_part_num;
378 uint64_t start_ofs;
379 uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */
380 uint64_t stripe_max_size; /* underlying obj max size */
381 string override_prefix;
382
383 RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
384 RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) :
385 start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {}
386
387 void encode(bufferlist& bl) const {
388 ENCODE_START(2, 1, bl);
389 ::encode(start_part_num, bl);
390 ::encode(start_ofs, bl);
391 ::encode(part_size, bl);
392 ::encode(stripe_max_size, bl);
393 ::encode(override_prefix, bl);
394 ENCODE_FINISH(bl);
395 }
396
397 void decode(bufferlist::iterator& bl) {
398 DECODE_START(2, bl);
399 ::decode(start_part_num, bl);
400 ::decode(start_ofs, bl);
401 ::decode(part_size, bl);
402 ::decode(stripe_max_size, bl);
403 if (struct_v >= 2)
404 ::decode(override_prefix, bl);
405 DECODE_FINISH(bl);
406 }
407 void dump(Formatter *f) const;
408};
409WRITE_CLASS_ENCODER(RGWObjManifestRule)
410
411class RGWObjManifest {
412protected:
413 bool explicit_objs; /* old manifest? */
414 map<uint64_t, RGWObjManifestPart> objs;
415
416 uint64_t obj_size;
417
418 rgw_obj obj;
419 uint64_t head_size;
420 string head_placement_rule;
421
422 uint64_t max_head_size;
423 string prefix;
424 rgw_bucket_placement tail_placement; /* might be different than the original bucket,
425 as object might have been copied across pools */
426 map<uint64_t, RGWObjManifestRule> rules;
427
428 string tail_instance; /* tail object's instance */
429
430 void convert_to_explicit(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
431 int append_explicit(RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
432 void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter, string *override_prefix);
433
434 void update_iterators() {
435 begin_iter.seek(0);
436 end_iter.seek(obj_size);
437 }
438public:
439
440 RGWObjManifest() : explicit_objs(false), obj_size(0), head_size(0), max_head_size(0),
441 begin_iter(this), end_iter(this) {}
442 RGWObjManifest(const RGWObjManifest& rhs) {
443 *this = rhs;
444 }
445 RGWObjManifest& operator=(const RGWObjManifest& rhs) {
446 explicit_objs = rhs.explicit_objs;
447 objs = rhs.objs;
448 obj_size = rhs.obj_size;
449 obj = rhs.obj;
450 head_size = rhs.head_size;
451 max_head_size = rhs.max_head_size;
452 prefix = rhs.prefix;
453 tail_placement = rhs.tail_placement;
454 rules = rhs.rules;
455 tail_instance = rhs.tail_instance;
456
457 begin_iter.set_manifest(this);
458 end_iter.set_manifest(this);
459
460 begin_iter.seek(rhs.begin_iter.get_ofs());
461 end_iter.seek(rhs.end_iter.get_ofs());
462
463 return *this;
464 }
465
466 map<uint64_t, RGWObjManifestPart>& get_explicit_objs() {
467 return objs;
468 }
469
470
471 void set_explicit(uint64_t _size, map<uint64_t, RGWObjManifestPart>& _objs) {
472 explicit_objs = true;
473 obj_size = _size;
474 objs.swap(_objs);
475 }
476
477 void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj_select *location);
478
479 void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) {
480 RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size);
481 rules[0] = rule;
482 max_head_size = tail_ofs;
483 }
484
485 void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) {
486 RGWObjManifestRule rule(0, 0, 0, stripe_max_size);
487 rule.start_part_num = part_num;
488 rules[0] = rule;
489 max_head_size = 0;
490 }
491
492 void encode(bufferlist& bl) const {
493 ENCODE_START(7, 6, bl);
494 ::encode(obj_size, bl);
495 ::encode(objs, bl);
496 ::encode(explicit_objs, bl);
497 ::encode(obj, bl);
498 ::encode(head_size, bl);
499 ::encode(max_head_size, bl);
500 ::encode(prefix, bl);
501 ::encode(rules, bl);
502 bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket);
503 ::encode(encode_tail_bucket, bl);
504 if (encode_tail_bucket) {
505 ::encode(tail_placement.bucket, bl);
506 }
507 bool encode_tail_instance = (tail_instance != obj.key.instance);
508 ::encode(encode_tail_instance, bl);
509 if (encode_tail_instance) {
510 ::encode(tail_instance, bl);
511 }
512 ::encode(head_placement_rule, bl);
513 ::encode(tail_placement.placement_rule, bl);
514 ENCODE_FINISH(bl);
515 }
516
517 void decode(bufferlist::iterator& bl) {
518 DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl);
519 ::decode(obj_size, bl);
520 ::decode(objs, bl);
521 if (struct_v >= 3) {
522 ::decode(explicit_objs, bl);
523 ::decode(obj, bl);
524 ::decode(head_size, bl);
525 ::decode(max_head_size, bl);
526 ::decode(prefix, bl);
527 ::decode(rules, bl);
528 } else {
529 explicit_objs = true;
530 if (!objs.empty()) {
531 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
532 obj = iter->second.loc;
533 head_size = iter->second.size;
534 max_head_size = head_size;
535 }
536 }
537
538 if (explicit_objs && head_size > 0 && !objs.empty()) {
539 /* patch up manifest due to issue 16435:
540 * the first object in the explicit objs list might not be the one we need to access, use the
541 * head object instead if set. This would happen if we had an old object that was created
542 * when the explicit objs manifest was around, and it got copied.
543 */
544 rgw_obj& obj_0 = objs[0].loc;
545 if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) {
546 objs[0].loc = obj;
547 objs[0].size = head_size;
548 }
549 }
550
551 if (struct_v >= 4) {
552 if (struct_v < 6) {
553 ::decode(tail_placement.bucket, bl);
554 } else {
555 bool need_to_decode;
556 ::decode(need_to_decode, bl);
557 if (need_to_decode) {
558 ::decode(tail_placement.bucket, bl);
559 } else {
560 tail_placement.bucket = obj.bucket;
561 }
562 }
563 }
564
565 if (struct_v >= 5) {
566 if (struct_v < 6) {
567 ::decode(tail_instance, bl);
568 } else {
569 bool need_to_decode;
570 ::decode(need_to_decode, bl);
571 if (need_to_decode) {
572 ::decode(tail_instance, bl);
573 } else {
574 tail_instance = obj.key.instance;
575 }
576 }
577 } else { // old object created before 'tail_instance' field added to manifest
578 tail_instance = obj.key.instance;
579 }
580
581 if (struct_v >= 7) {
582 ::decode(head_placement_rule, bl);
583 ::decode(tail_placement.placement_rule, bl);
584 }
585
586 update_iterators();
587 DECODE_FINISH(bl);
588 }
589
590 void dump(Formatter *f) const;
591 static void generate_test_instances(list<RGWObjManifest*>& o);
592
593 int append(RGWObjManifest& m, RGWZoneGroup& zonegroup, RGWZoneParams& zone_params);
594 int append(RGWObjManifest& m, RGWRados *store);
595
596 bool get_rule(uint64_t ofs, RGWObjManifestRule *rule);
597
598 bool empty() {
599 if (explicit_objs)
600 return objs.empty();
601 return rules.empty();
602 }
603
604 bool has_explicit_objs() {
605 return explicit_objs;
606 }
607
608 bool has_tail() {
609 if (explicit_objs) {
610 if (objs.size() == 1) {
611 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
612 rgw_obj& o = iter->second.loc;
613 return !(obj == o);
614 }
615 return (objs.size() >= 2);
616 }
617 return (obj_size > head_size);
618 }
619
620 void set_head(const string& placement_rule, const rgw_obj& _o, uint64_t _s) {
621 head_placement_rule = placement_rule;
622 obj = _o;
623 head_size = _s;
624
625 if (explicit_objs && head_size > 0) {
626 objs[0].loc = obj;
627 objs[0].size = head_size;
628 }
629 }
630
631 const rgw_obj& get_obj() {
632 return obj;
633 }
634
635 void set_tail_placement(const string& placement_rule, const rgw_bucket& _b) {
636 tail_placement.placement_rule = placement_rule;
637 tail_placement.bucket = _b;
638 }
639
640 const rgw_bucket_placement& get_tail_placement() {
641 return tail_placement;
642 }
643
644 const string& get_head_placement_rule() {
645 return head_placement_rule;
646 }
647
648 void set_prefix(const string& _p) {
649 prefix = _p;
650 }
651
652 const string& get_prefix() {
653 return prefix;
654 }
655
656 void set_tail_instance(const string& _ti) {
657 tail_instance = _ti;
658 }
659
660 const string& get_tail_instance() {
661 return tail_instance;
662 }
663
664 void set_head_size(uint64_t _s) {
665 head_size = _s;
666 }
667
668 void set_obj_size(uint64_t s) {
669 obj_size = s;
670
671 update_iterators();
672 }
673
674 uint64_t get_obj_size() {
675 return obj_size;
676 }
677
678 uint64_t get_head_size() {
679 return head_size;
680 }
681
682 void set_max_head_size(uint64_t s) {
683 max_head_size = s;
684 }
685
686 uint64_t get_max_head_size() {
687 return max_head_size;
688 }
689
690 class obj_iterator {
691 RGWObjManifest *manifest;
692 uint64_t part_ofs; /* where current part starts */
693 uint64_t stripe_ofs; /* where current stripe starts */
694 uint64_t ofs; /* current position within the object */
695 uint64_t stripe_size; /* current part size */
696
697 int cur_part_id;
698 int cur_stripe;
699 string cur_override_prefix;
700
701 rgw_obj_select location;
702
703 map<uint64_t, RGWObjManifestRule>::iterator rule_iter;
704 map<uint64_t, RGWObjManifestRule>::iterator next_rule_iter;
705
706 map<uint64_t, RGWObjManifestPart>::iterator explicit_iter;
707
708 void init() {
709 part_ofs = 0;
710 stripe_ofs = 0;
711 ofs = 0;
712 stripe_size = 0;
713 cur_part_id = 0;
714 cur_stripe = 0;
715 }
716
717 void update_explicit_pos();
718
719
720 protected:
721
722 void set_manifest(RGWObjManifest *m) {
723 manifest = m;
724 }
725
726 public:
727 obj_iterator() : manifest(NULL) {
728 init();
729 }
730 explicit obj_iterator(RGWObjManifest *_m) : manifest(_m) {
731 init();
732 if (!manifest->empty()) {
733 seek(0);
734 }
735 }
736 obj_iterator(RGWObjManifest *_m, uint64_t _ofs) : manifest(_m) {
737 init();
738 if (!manifest->empty()) {
739 seek(_ofs);
740 }
741 }
742 void seek(uint64_t ofs);
743
744 void operator++();
745 bool operator==(const obj_iterator& rhs) {
746 return (ofs == rhs.ofs);
747 }
748 bool operator!=(const obj_iterator& rhs) {
749 return (ofs != rhs.ofs);
750 }
751 const rgw_obj_select& get_location() {
752 return location;
753 }
754
755 /* start of current stripe */
756 uint64_t get_stripe_ofs() {
757 if (manifest->explicit_objs) {
758 return explicit_iter->first;
759 }
760 return stripe_ofs;
761 }
762
763 /* current ofs relative to start of rgw object */
764 uint64_t get_ofs() const {
765 return ofs;
766 }
767
768 /* stripe number */
769 int get_cur_stripe() const {
770 return cur_stripe;
771 }
772
773 /* current stripe size */
774 uint64_t get_stripe_size() {
775 if (manifest->explicit_objs) {
776 return explicit_iter->second.size;
777 }
778 return stripe_size;
779 }
780
781 /* offset where data starts within current stripe */
782 uint64_t location_ofs() {
783 if (manifest->explicit_objs) {
784 return explicit_iter->second.loc_ofs;
785 }
786 return 0; /* all stripes start at zero offset */
787 }
788
789 void update_location();
790
791 friend class RGWObjManifest;
792 };
793
794 const obj_iterator& obj_begin();
795 const obj_iterator& obj_end();
796 obj_iterator obj_find(uint64_t ofs);
797
798 obj_iterator begin_iter;
799 obj_iterator end_iter;
800
801 /*
802 * simple object generator. Using a simple single rule manifest.
803 */
804 class generator {
805 RGWObjManifest *manifest;
806 uint64_t last_ofs;
807 uint64_t cur_part_ofs;
808 int cur_part_id;
809 int cur_stripe;
810 uint64_t cur_stripe_size;
811 string cur_oid;
812
813 string oid_prefix;
814
815 rgw_obj_select cur_obj;
7c673cae
FG
816
817 RGWObjManifestRule rule;
818
819 public:
820 generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0),
821 cur_stripe(0), cur_stripe_size(0) {}
822 int create_begin(CephContext *cct, RGWObjManifest *manifest, const string& placement_rule, rgw_bucket& bucket, rgw_obj& obj);
823
824 int create_next(uint64_t ofs);
825
826 rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); }
827 rgw_raw_obj get_cur_obj(RGWRados *store) { return cur_obj.get_raw_obj(store); }
828
829 /* total max size of current stripe (including head obj) */
830 uint64_t cur_stripe_max_size() {
831 return cur_stripe_size;
832 }
833 };
834};
835WRITE_CLASS_ENCODER(RGWObjManifest)
836
837struct RGWUploadPartInfo {
838 uint32_t num;
839 uint64_t size;
840 uint64_t accounted_size{0};
841 string etag;
842 ceph::real_time modified;
843 RGWObjManifest manifest;
844 RGWCompressionInfo cs_info;
845
846 RGWUploadPartInfo() : num(0), size(0) {}
847
848 void encode(bufferlist& bl) const {
849 ENCODE_START(4, 2, bl);
850 ::encode(num, bl);
851 ::encode(size, bl);
852 ::encode(etag, bl);
853 ::encode(modified, bl);
854 ::encode(manifest, bl);
855 ::encode(cs_info, bl);
856 ::encode(accounted_size, bl);
857 ENCODE_FINISH(bl);
858 }
859 void decode(bufferlist::iterator& bl) {
860 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
861 ::decode(num, bl);
862 ::decode(size, bl);
863 ::decode(etag, bl);
864 ::decode(modified, bl);
865 if (struct_v >= 3)
866 ::decode(manifest, bl);
867 if (struct_v >= 4) {
868 ::decode(cs_info, bl);
869 ::decode(accounted_size, bl);
870 } else {
871 accounted_size = size;
872 }
873 DECODE_FINISH(bl);
874 }
875 void dump(Formatter *f) const;
876 static void generate_test_instances(list<RGWUploadPartInfo*>& o);
877};
878WRITE_CLASS_ENCODER(RGWUploadPartInfo)
879
880struct RGWObjState {
881 rgw_obj obj;
882 bool is_atomic;
883 bool has_attrs;
884 bool exists;
885 uint64_t size; //< size of raw object
886 uint64_t accounted_size{0}; //< size before compression, encryption
887 ceph::real_time mtime;
888 uint64_t epoch;
889 bufferlist obj_tag;
181888fb 890 bufferlist tail_tag;
7c673cae
FG
891 string write_tag;
892 bool fake_tag;
893 RGWObjManifest manifest;
894 bool has_manifest;
895 string shadow_obj;
896 bool has_data;
897 bufferlist data;
898 bool prefetch_data;
899 bool keep_tail;
900 bool is_olh;
901 bufferlist olh_tag;
902 uint64_t pg_ver;
903 uint32_t zone_short_id;
904
905 /* important! don't forget to update copy constructor */
906
907 RGWObjVersionTracker objv_tracker;
908
909 map<string, bufferlist> attrset;
910 RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
911 size(0), epoch(0), fake_tag(false), has_manifest(false),
912 has_data(false), prefetch_data(false), keep_tail(false), is_olh(false),
913 pg_ver(0), zone_short_id(0) {}
914 RGWObjState(const RGWObjState& rhs) : obj (rhs.obj) {
915 is_atomic = rhs.is_atomic;
916 has_attrs = rhs.has_attrs;
917 exists = rhs.exists;
918 size = rhs.size;
919 accounted_size = rhs.accounted_size;
920 mtime = rhs.mtime;
921 epoch = rhs.epoch;
922 if (rhs.obj_tag.length()) {
923 obj_tag = rhs.obj_tag;
924 }
181888fb
FG
925 if (rhs.tail_tag.length()) {
926 tail_tag = rhs.tail_tag;
927 }
7c673cae
FG
928 write_tag = rhs.write_tag;
929 fake_tag = rhs.fake_tag;
930 if (rhs.has_manifest) {
931 manifest = rhs.manifest;
932 }
933 has_manifest = rhs.has_manifest;
934 shadow_obj = rhs.shadow_obj;
935 has_data = rhs.has_data;
936 if (rhs.data.length()) {
937 data = rhs.data;
938 }
939 prefetch_data = rhs.prefetch_data;
940 keep_tail = rhs.keep_tail;
941 is_olh = rhs.is_olh;
942 objv_tracker = rhs.objv_tracker;
943 pg_ver = rhs.pg_ver;
944 }
945
946 bool get_attr(string name, bufferlist& dest) {
947 map<string, bufferlist>::iterator iter = attrset.find(name);
948 if (iter != attrset.end()) {
949 dest = iter->second;
950 return true;
951 }
952 return false;
953 }
954};
955
956struct RGWRawObjState {
957 rgw_raw_obj obj;
958 bool has_attrs{false};
959 bool exists{false};
960 uint64_t size{0};
961 ceph::real_time mtime;
962 uint64_t epoch;
963 bufferlist obj_tag;
964 bool has_data{false};
965 bufferlist data;
966 bool prefetch_data{false};
967 uint64_t pg_ver{0};
968
969 /* important! don't forget to update copy constructor */
970
971 RGWObjVersionTracker objv_tracker;
972
973 map<string, bufferlist> attrset;
974 RGWRawObjState() {}
975 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
976 has_attrs = rhs.has_attrs;
977 exists = rhs.exists;
978 size = rhs.size;
979 mtime = rhs.mtime;
980 epoch = rhs.epoch;
981 if (rhs.obj_tag.length()) {
982 obj_tag = rhs.obj_tag;
983 }
984 has_data = rhs.has_data;
985 if (rhs.data.length()) {
986 data = rhs.data;
987 }
988 prefetch_data = rhs.prefetch_data;
989 pg_ver = rhs.pg_ver;
990 objv_tracker = rhs.objv_tracker;
991 }
992};
993
994struct RGWPoolIterCtx {
995 librados::IoCtx io_ctx;
996 librados::NObjectIterator iter;
997};
998
999struct RGWListRawObjsCtx {
1000 bool initialized;
1001 RGWPoolIterCtx iter_ctx;
1002
1003 RGWListRawObjsCtx() : initialized(false) {}
1004};
1005
1006struct RGWDefaultSystemMetaObjInfo {
1007 string default_id;
1008
1009 void encode(bufferlist& bl) const {
1010 ENCODE_START(1, 1, bl);
1011 ::encode(default_id, bl);
1012 ENCODE_FINISH(bl);
1013 }
1014
1015 void decode(bufferlist::iterator& bl) {
1016 DECODE_START(1, bl);
1017 ::decode(default_id, bl);
1018 DECODE_FINISH(bl);
1019 }
1020
1021 void dump(Formatter *f) const;
1022 void decode_json(JSONObj *obj);
1023};
1024WRITE_CLASS_ENCODER(RGWDefaultSystemMetaObjInfo)
1025
1026struct RGWNameToId {
1027 string obj_id;
1028
1029 void encode(bufferlist& bl) const {
1030 ENCODE_START(1, 1, bl);
1031 ::encode(obj_id, bl);
1032 ENCODE_FINISH(bl);
1033 }
1034
1035 void decode(bufferlist::iterator& bl) {
1036 DECODE_START(1, bl);
1037 ::decode(obj_id, bl);
1038 DECODE_FINISH(bl);
1039 }
1040
1041 void dump(Formatter *f) const;
1042 void decode_json(JSONObj *obj);
1043};
1044WRITE_CLASS_ENCODER(RGWNameToId)
1045
1046class RGWSystemMetaObj {
1047protected:
1048 string id;
1049 string name;
1050
1051 CephContext *cct;
1052 RGWRados *store;
1053
1054 int store_name(bool exclusive);
1055 int store_info(bool exclusive);
1056 int read_info(const string& obj_id, bool old_format = false);
1057 int read_id(const string& obj_name, string& obj_id);
1058 int read_default(RGWDefaultSystemMetaObjInfo& default_info,
1059 const string& oid);
1060 /* read and use default id */
1061 int use_default(bool old_format = false);
1062
1063public:
1064 RGWSystemMetaObj() : cct(NULL), store(NULL) {}
1065 RGWSystemMetaObj(const string& _name): name(_name), cct(NULL), store(NULL) {}
1066 RGWSystemMetaObj(const string& _id, const string& _name) : id(_id), name(_name), cct(NULL), store(NULL) {}
1067 RGWSystemMetaObj(CephContext *_cct, RGWRados *_store): cct(_cct), store(_store){}
1068 RGWSystemMetaObj(const string& _name, CephContext *_cct, RGWRados *_store): name(_name), cct(_cct), store(_store){}
1069 const string& get_name() const { return name; }
1070 const string& get_id() const { return id; }
1071
1072 void set_name(const string& _name) { name = _name;}
1073 void set_id(const string& _id) { id = _id;}
1074 void clear_id() { id.clear(); }
1075
1076 virtual ~RGWSystemMetaObj() {}
1077
1078 virtual void encode(bufferlist& bl) const {
1079 ENCODE_START(1, 1, bl);
1080 ::encode(id, bl);
1081 ::encode(name, bl);
1082 ENCODE_FINISH(bl);
1083 }
1084
1085 virtual void decode(bufferlist::iterator& bl) {
1086 DECODE_START(1, bl);
1087 ::decode(id, bl);
1088 ::decode(name, bl);
1089 DECODE_FINISH(bl);
1090 }
1091
1092 void reinit_instance(CephContext *_cct, RGWRados *_store) {
1093 cct = _cct;
1094 store = _store;
1095 }
1096 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true, bool old_format = false);
1097 virtual int read_default_id(string& default_id, bool old_format = false);
1098 virtual int set_as_default(bool exclusive = false);
1099 int delete_default();
1100 virtual int create(bool exclusive = true);
1101 int delete_obj(bool old_format = false);
1102 int rename(const string& new_name);
1103 int update() { return store_info(false);}
1104 int update_name() { return store_name(false);}
1105 int read();
1106 int write(bool exclusive);
1107
1108 virtual rgw_pool get_pool(CephContext *cct) = 0;
1109 virtual const string get_default_oid(bool old_format = false) = 0;
1110 virtual const string& get_names_oid_prefix() = 0;
1111 virtual const string& get_info_oid_prefix(bool old_format = false) = 0;
1112 virtual const string& get_predefined_name(CephContext *cct) = 0;
1113
1114 void dump(Formatter *f) const;
1115 void decode_json(JSONObj *obj);
1116};
1117WRITE_CLASS_ENCODER(RGWSystemMetaObj)
1118
1119struct RGWZonePlacementInfo {
1120 rgw_pool index_pool;
1121 rgw_pool data_pool;
1122 rgw_pool data_extra_pool; /* if not set we should use data_pool */
1123 RGWBucketIndexType index_type;
1124 std::string compression_type;
1125
1126 RGWZonePlacementInfo() : index_type(RGWBIType_Normal) {}
1127
1128 void encode(bufferlist& bl) const {
1129 ENCODE_START(6, 1, bl);
1130 ::encode(index_pool.to_str(), bl);
1131 ::encode(data_pool.to_str(), bl);
1132 ::encode(data_extra_pool.to_str(), bl);
1133 ::encode((uint32_t)index_type, bl);
1134 ::encode(compression_type, bl);
1135 ENCODE_FINISH(bl);
1136 }
1137
1138 void decode(bufferlist::iterator& bl) {
1139 DECODE_START(6, bl);
1140 string index_pool_str;
1141 string data_pool_str;
1142 ::decode(index_pool_str, bl);
1143 index_pool = rgw_pool(index_pool_str);
1144 ::decode(data_pool_str, bl);
1145 data_pool = rgw_pool(data_pool_str);
1146 if (struct_v >= 4) {
1147 string data_extra_pool_str;
1148 ::decode(data_extra_pool_str, bl);
1149 data_extra_pool = rgw_pool(data_extra_pool_str);
1150 }
1151 if (struct_v >= 5) {
1152 uint32_t it;
1153 ::decode(it, bl);
1154 index_type = (RGWBucketIndexType)it;
1155 }
1156 if (struct_v >= 6) {
1157 ::decode(compression_type, bl);
1158 }
1159 DECODE_FINISH(bl);
1160 }
31f18b77 1161 const rgw_pool& get_data_extra_pool() const {
7c673cae
FG
1162 if (data_extra_pool.empty()) {
1163 return data_pool;
1164 }
1165 return data_extra_pool;
1166 }
1167 void dump(Formatter *f) const;
1168 void decode_json(JSONObj *obj);
1169};
1170WRITE_CLASS_ENCODER(RGWZonePlacementInfo)
1171
1172struct RGWZoneParams : RGWSystemMetaObj {
1173 rgw_pool domain_root;
1174 rgw_pool metadata_heap;
1175 rgw_pool control_pool;
1176 rgw_pool gc_pool;
1177 rgw_pool lc_pool;
1178 rgw_pool log_pool;
1179 rgw_pool intent_log_pool;
1180 rgw_pool usage_log_pool;
1181
1182 rgw_pool user_keys_pool;
1183 rgw_pool user_email_pool;
1184 rgw_pool user_swift_pool;
1185 rgw_pool user_uid_pool;
1186 rgw_pool roles_pool;
31f18b77 1187 rgw_pool reshard_pool;
7c673cae
FG
1188
1189 RGWAccessKey system_key;
1190
1191 map<string, RGWZonePlacementInfo> placement_pools;
1192
1193 string realm_id;
1194
31f18b77 1195 map<string, string, ltstr_nocase> tier_config;
7c673cae
FG
1196
1197 RGWZoneParams() : RGWSystemMetaObj() {}
1198 RGWZoneParams(const string& name) : RGWSystemMetaObj(name){}
1199 RGWZoneParams(const string& id, const string& name) : RGWSystemMetaObj(id, name) {}
1200 RGWZoneParams(const string& id, const string& name, const string& _realm_id)
1201 : RGWSystemMetaObj(id, name), realm_id(_realm_id) {}
1202
1203 rgw_pool get_pool(CephContext *cct);
1204 const string get_default_oid(bool old_format = false) override;
1205 const string& get_names_oid_prefix() override;
1206 const string& get_info_oid_prefix(bool old_format = false) override;
1207 const string& get_predefined_name(CephContext *cct) override;
1208
1209 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true,
1210 bool old_format = false);
1211 using RGWSystemMetaObj::init;
1212 int read_default_id(string& default_id, bool old_format = false) override;
1213 int set_as_default(bool exclusive = false) override;
1214 int create_default(bool old_format = false);
1215 int create(bool exclusive = true) override;
1216 int fix_pool_names();
1217
1218 const string& get_compression_type(const string& placement_rule) const;
1219
1220 void encode(bufferlist& bl) const override {
31f18b77 1221 ENCODE_START(10, 1, bl);
7c673cae
FG
1222 ::encode(domain_root, bl);
1223 ::encode(control_pool, bl);
1224 ::encode(gc_pool, bl);
1225 ::encode(log_pool, bl);
1226 ::encode(intent_log_pool, bl);
1227 ::encode(usage_log_pool, bl);
1228 ::encode(user_keys_pool, bl);
1229 ::encode(user_email_pool, bl);
1230 ::encode(user_swift_pool, bl);
1231 ::encode(user_uid_pool, bl);
1232 RGWSystemMetaObj::encode(bl);
1233 ::encode(system_key, bl);
1234 ::encode(placement_pools, bl);
1235 ::encode(metadata_heap, bl);
1236 ::encode(realm_id, bl);
1237 ::encode(lc_pool, bl);
1238 ::encode(tier_config, bl);
1239 ::encode(roles_pool, bl);
31f18b77 1240 ::encode(reshard_pool, bl);
7c673cae
FG
1241 ENCODE_FINISH(bl);
1242 }
1243
1244 void decode(bufferlist::iterator& bl) override {
31f18b77 1245 DECODE_START(10, bl);
7c673cae
FG
1246 ::decode(domain_root, bl);
1247 ::decode(control_pool, bl);
1248 ::decode(gc_pool, bl);
1249 ::decode(log_pool, bl);
1250 ::decode(intent_log_pool, bl);
1251 ::decode(usage_log_pool, bl);
1252 ::decode(user_keys_pool, bl);
1253 ::decode(user_email_pool, bl);
1254 ::decode(user_swift_pool, bl);
1255 ::decode(user_uid_pool, bl);
1256 if (struct_v >= 6) {
1257 RGWSystemMetaObj::decode(bl);
1258 } else if (struct_v >= 2) {
1259 ::decode(name, bl);
1260 id = name;
1261 }
1262 if (struct_v >= 3)
1263 ::decode(system_key, bl);
1264 if (struct_v >= 4)
1265 ::decode(placement_pools, bl);
1266 if (struct_v >= 5)
1267 ::decode(metadata_heap, bl);
1268 if (struct_v >= 6) {
1269 ::decode(realm_id, bl);
1270 }
1271 if (struct_v >= 7) {
1272 ::decode(lc_pool, bl);
1273 } else {
d2e6a577 1274 lc_pool = log_pool.name + ":lc";
7c673cae
FG
1275 }
1276 if (struct_v >= 8) {
1277 ::decode(tier_config, bl);
1278 }
1279 if (struct_v >= 9) {
1280 ::decode(roles_pool, bl);
1281 } else {
d2e6a577 1282 roles_pool = name + ".rgw.meta:roles";
7c673cae 1283 }
31f18b77
FG
1284 if (struct_v >= 10) {
1285 ::decode(reshard_pool, bl);
1286 } else {
c07f9fc5 1287 reshard_pool = log_pool.name + ":reshard";
31f18b77 1288 }
7c673cae
FG
1289 DECODE_FINISH(bl);
1290 }
1291 void dump(Formatter *f) const;
1292 void decode_json(JSONObj *obj);
1293 static void generate_test_instances(list<RGWZoneParams*>& o);
1294
7c673cae
FG
1295 bool get_placement(const string& placement_id, RGWZonePlacementInfo *placement) const {
1296 auto iter = placement_pools.find(placement_id);
1297 if (iter == placement_pools.end()) {
1298 return false;
1299 }
1300 *placement = iter->second;
1301 return true;
1302 }
1303
1304 /*
1305 * return data pool of the head object
1306 */
1307 bool get_head_data_pool(const string& placement_id, const rgw_obj& obj, rgw_pool *pool) const {
1308 const rgw_data_placement_target& explicit_placement = obj.bucket.explicit_placement;
1309 if (!explicit_placement.data_pool.empty()) {
1310 if (!obj.in_extra_data) {
1311 *pool = explicit_placement.data_pool;
1312 } else {
1313 *pool = explicit_placement.get_data_extra_pool();
1314 }
1315 return true;
1316 }
1317 if (placement_id.empty()) {
1318 return false;
1319 }
1320 auto iter = placement_pools.find(placement_id);
1321 if (iter == placement_pools.end()) {
1322 return false;
1323 }
1324 if (!obj.in_extra_data) {
1325 *pool = iter->second.data_pool;
1326 } else {
31f18b77 1327 *pool = iter->second.get_data_extra_pool();
7c673cae
FG
1328 }
1329 return true;
1330 }
1331};
1332WRITE_CLASS_ENCODER(RGWZoneParams)
1333
1334struct RGWZone {
1335 string id;
1336 string name;
1337 list<string> endpoints;
1338 bool log_meta;
1339 bool log_data;
1340 bool read_only;
1341 string tier_type;
1342
1343/**
1344 * Represents the number of shards for the bucket index object, a value of zero
1345 * indicates there is no sharding. By default (no sharding, the name of the object
1346 * is '.dir.{marker}', with sharding, the name is '.dir.{marker}.{sharding_id}',
1347 * sharding_id is zero-based value. It is not recommended to set a too large value
1348 * (e.g. thousand) as it increases the cost for bucket listing.
1349 */
1350 uint32_t bucket_index_max_shards;
1351
1352 bool sync_from_all;
1353 set<string> sync_from; /* list of zones to sync from */
1354
1355 RGWZone() : log_meta(false), log_data(false), read_only(false), bucket_index_max_shards(0),
1356 sync_from_all(true) {}
1357
1358 void encode(bufferlist& bl) const {
1359 ENCODE_START(6, 1, bl);
1360 ::encode(name, bl);
1361 ::encode(endpoints, bl);
1362 ::encode(log_meta, bl);
1363 ::encode(log_data, bl);
1364 ::encode(bucket_index_max_shards, bl);
1365 ::encode(id, bl);
1366 ::encode(read_only, bl);
1367 ::encode(tier_type, bl);
1368 ::encode(sync_from_all, bl);
1369 ::encode(sync_from, bl);
1370 ENCODE_FINISH(bl);
1371 }
1372
1373 void decode(bufferlist::iterator& bl) {
1374 DECODE_START(6, bl);
1375 ::decode(name, bl);
1376 if (struct_v < 4) {
1377 id = name;
1378 }
1379 ::decode(endpoints, bl);
1380 if (struct_v >= 2) {
1381 ::decode(log_meta, bl);
1382 ::decode(log_data, bl);
1383 }
1384 if (struct_v >= 3) {
1385 ::decode(bucket_index_max_shards, bl);
1386 }
1387 if (struct_v >= 4) {
1388 ::decode(id, bl);
1389 ::decode(read_only, bl);
1390 }
1391 if (struct_v >= 5) {
1392 ::decode(tier_type, bl);
1393 }
1394 if (struct_v >= 6) {
1395 ::decode(sync_from_all, bl);
1396 ::decode(sync_from, bl);
1397 }
1398 DECODE_FINISH(bl);
1399 }
1400 void dump(Formatter *f) const;
1401 void decode_json(JSONObj *obj);
1402 static void generate_test_instances(list<RGWZone*>& o);
1403
1404 bool is_read_only() { return read_only; }
1405
28e407b8 1406 bool syncs_from(const string& zone_id) const {
7c673cae
FG
1407 return (sync_from_all || sync_from.find(zone_id) != sync_from.end());
1408 }
1409};
1410WRITE_CLASS_ENCODER(RGWZone)
1411
1412struct RGWDefaultZoneGroupInfo {
1413 string default_zonegroup;
1414
1415 void encode(bufferlist& bl) const {
1416 ENCODE_START(1, 1, bl);
1417 ::encode(default_zonegroup, bl);
1418 ENCODE_FINISH(bl);
1419 }
1420
1421 void decode(bufferlist::iterator& bl) {
1422 DECODE_START(1, bl);
1423 ::decode(default_zonegroup, bl);
1424 DECODE_FINISH(bl);
1425 }
1426 void dump(Formatter *f) const;
1427 void decode_json(JSONObj *obj);
1428 //todo: implement ceph-dencoder
1429};
1430WRITE_CLASS_ENCODER(RGWDefaultZoneGroupInfo)
1431
1432struct RGWZoneGroupPlacementTarget {
1433 string name;
1434 set<string> tags;
1435
c07f9fc5 1436 bool user_permitted(list<string>& user_tags) const {
7c673cae
FG
1437 if (tags.empty()) {
1438 return true;
1439 }
1440 for (auto& rule : user_tags) {
1441 if (tags.find(rule) != tags.end()) {
1442 return true;
1443 }
1444 }
1445 return false;
1446 }
1447
1448 void encode(bufferlist& bl) const {
1449 ENCODE_START(1, 1, bl);
1450 ::encode(name, bl);
1451 ::encode(tags, bl);
1452 ENCODE_FINISH(bl);
1453 }
1454
1455 void decode(bufferlist::iterator& bl) {
1456 DECODE_START(1, bl);
1457 ::decode(name, bl);
1458 ::decode(tags, bl);
1459 DECODE_FINISH(bl);
1460 }
1461 void dump(Formatter *f) const;
1462 void decode_json(JSONObj *obj);
1463};
1464WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTarget)
1465
1466
1467struct RGWZoneGroup : public RGWSystemMetaObj {
1468 string api_name;
1469 list<string> endpoints;
1470 bool is_master;
1471
1472 string master_zone;
1473 map<string, RGWZone> zones;
1474
1475 map<string, RGWZoneGroupPlacementTarget> placement_targets;
1476 string default_placement;
1477
1478 list<string> hostnames;
1479 list<string> hostnames_s3website;
1480 // TODO: Maybe convert hostnames to a map<string,list<string>> for
1481 // endpoint_type->hostnames
1482/*
148320:05 < _robbat21irssi> maybe I do someting like: if (hostname_map.empty()) { populate all map keys from hostnames; };
148420:05 < _robbat21irssi> but that's a later compatability migration planning bit
148520:06 < yehudasa> more like if (!hostnames.empty()) {
148620:06 < yehudasa> for (list<string>::iterator iter = hostnames.begin(); iter != hostnames.end(); ++iter) {
148720:06 < yehudasa> hostname_map["s3"].append(iter->second);
148820:07 < yehudasa> hostname_map["s3website"].append(iter->second);
148920:07 < yehudasa> s/append/push_back/g
149020:08 < _robbat21irssi> inner loop over APIs
149120:08 < yehudasa> yeah, probably
149220:08 < _robbat21irssi> s3, s3website, swift, swith_auth, swift_website
1493*/
1494 map<string, list<string> > api_hostname_map;
1495 map<string, list<string> > api_endpoints_map;
1496
1497 string realm_id;
1498
1499 RGWZoneGroup(): is_master(false){}
1500 RGWZoneGroup(const std::string &id, const std::string &name):RGWSystemMetaObj(id, name) {}
1501 RGWZoneGroup(const std::string &_name):RGWSystemMetaObj(_name) {}
1502 RGWZoneGroup(const std::string &_name, bool _is_master, CephContext *cct, RGWRados* store,
1503 const string& _realm_id, const list<string>& _endpoints)
1504 : RGWSystemMetaObj(_name, cct , store), endpoints(_endpoints), is_master(_is_master),
1505 realm_id(_realm_id) {}
1506
1507 bool is_master_zonegroup() const { return is_master;}
1508 void update_master(bool _is_master) {
1509 is_master = _is_master;
1510 post_process_params();
1511 }
1512 void post_process_params();
1513
1514 void encode(bufferlist& bl) const override {
1515 ENCODE_START(4, 1, bl);
1516 ::encode(name, bl);
1517 ::encode(api_name, bl);
1518 ::encode(is_master, bl);
1519 ::encode(endpoints, bl);
1520 ::encode(master_zone, bl);
1521 ::encode(zones, bl);
1522 ::encode(placement_targets, bl);
1523 ::encode(default_placement, bl);
1524 ::encode(hostnames, bl);
1525 ::encode(hostnames_s3website, bl);
1526 RGWSystemMetaObj::encode(bl);
1527 ::encode(realm_id, bl);
1528 ENCODE_FINISH(bl);
1529 }
1530
1531 void decode(bufferlist::iterator& bl) override {
1532 DECODE_START(4, bl);
1533 ::decode(name, bl);
1534 ::decode(api_name, bl);
1535 ::decode(is_master, bl);
1536 ::decode(endpoints, bl);
1537 ::decode(master_zone, bl);
1538 ::decode(zones, bl);
1539 ::decode(placement_targets, bl);
1540 ::decode(default_placement, bl);
1541 if (struct_v >= 2) {
1542 ::decode(hostnames, bl);
1543 }
1544 if (struct_v >= 3) {
1545 ::decode(hostnames_s3website, bl);
1546 }
1547 if (struct_v >= 4) {
1548 RGWSystemMetaObj::decode(bl);
1549 ::decode(realm_id, bl);
1550 } else {
1551 id = name;
1552 }
1553 DECODE_FINISH(bl);
1554 }
1555
1556 int read_default_id(string& default_id, bool old_format = false) override;
1557 int set_as_default(bool exclusive = false) override;
1558 int create_default(bool old_format = false);
1559 int equals(const string& other_zonegroup) const;
1560 int add_zone(const RGWZoneParams& zone_params, bool *is_master, bool *read_only,
1561 const list<string>& endpoints, const string *ptier_type,
1562 bool *psync_from_all, list<string>& sync_from, list<string>& sync_from_rm);
1563 int remove_zone(const std::string& zone_id);
1564 int rename_zone(const RGWZoneParams& zone_params);
1565 rgw_pool get_pool(CephContext *cct);
1566 const string get_default_oid(bool old_region_format = false) override;
1567 const string& get_info_oid_prefix(bool old_region_format = false) override;
1568 const string& get_names_oid_prefix() override;
1569 const string& get_predefined_name(CephContext *cct) override;
1570
1571 void dump(Formatter *f) const;
1572 void decode_json(JSONObj *obj);
1573 static void generate_test_instances(list<RGWZoneGroup*>& o);
1574};
1575WRITE_CLASS_ENCODER(RGWZoneGroup)
1576
1577struct RGWPeriodMap
1578{
1579 string id;
1580 map<string, RGWZoneGroup> zonegroups;
1581 map<string, RGWZoneGroup> zonegroups_by_api;
1582 map<string, uint32_t> short_zone_ids;
1583
1584 string master_zonegroup;
1585
1586 void encode(bufferlist& bl) const;
1587 void decode(bufferlist::iterator& bl);
1588
1589 int update(const RGWZoneGroup& zonegroup, CephContext *cct);
1590
1591 void dump(Formatter *f) const;
1592 void decode_json(JSONObj *obj);
1593
1594 void reset() {
1595 zonegroups.clear();
1596 zonegroups_by_api.clear();
1597 master_zonegroup.clear();
1598 }
1599
1600 uint32_t get_zone_short_id(const string& zone_id) const;
1601};
1602WRITE_CLASS_ENCODER(RGWPeriodMap)
1603
1604struct RGWPeriodConfig
1605{
1606 RGWQuotaInfo bucket_quota;
1607 RGWQuotaInfo user_quota;
1608
1609 void encode(bufferlist& bl) const {
1610 ENCODE_START(1, 1, bl);
1611 ::encode(bucket_quota, bl);
1612 ::encode(user_quota, bl);
1613 ENCODE_FINISH(bl);
1614 }
1615
1616 void decode(bufferlist::iterator& bl) {
1617 DECODE_START(1, bl);
1618 ::decode(bucket_quota, bl);
1619 ::decode(user_quota, bl);
1620 DECODE_FINISH(bl);
1621 }
1622
1623 void dump(Formatter *f) const;
1624 void decode_json(JSONObj *obj);
1625
1626 // the period config must be stored in a local object outside of the period,
1627 // so that it can be used in a default configuration where no realm/period
1628 // exists
1629 int read(RGWRados *store, const std::string& realm_id);
1630 int write(RGWRados *store, const std::string& realm_id);
1631
1632 static std::string get_oid(const std::string& realm_id);
1633 static rgw_pool get_pool(CephContext *cct);
1634};
1635WRITE_CLASS_ENCODER(RGWPeriodConfig)
1636
1637/* for backward comaptability */
1638struct RGWRegionMap {
1639
1640 map<string, RGWZoneGroup> regions;
1641
1642 string master_region;
1643
1644 RGWQuotaInfo bucket_quota;
1645 RGWQuotaInfo user_quota;
1646
1647 void encode(bufferlist& bl) const;
1648 void decode(bufferlist::iterator& bl);
1649
1650 void dump(Formatter *f) const;
1651 void decode_json(JSONObj *obj);
1652};
1653WRITE_CLASS_ENCODER(RGWRegionMap)
1654
1655struct RGWZoneGroupMap {
1656
1657 map<string, RGWZoneGroup> zonegroups;
1658 map<string, RGWZoneGroup> zonegroups_by_api;
1659
1660 string master_zonegroup;
1661
1662 RGWQuotaInfo bucket_quota;
1663 RGWQuotaInfo user_quota;
1664
1665 /* constract the map */
1666 int read(CephContext *cct, RGWRados *store);
1667
1668 void encode(bufferlist& bl) const;
1669 void decode(bufferlist::iterator& bl);
1670
1671 void dump(Formatter *f) const;
1672 void decode_json(JSONObj *obj);
1673};
1674WRITE_CLASS_ENCODER(RGWZoneGroupMap)
1675
1676class RGWRealm;
1677
1678struct objexp_hint_entry {
1679 string tenant;
1680 string bucket_name;
1681 string bucket_id;
1682 rgw_obj_key obj_key;
1683 ceph::real_time exp_time;
1684
1685 void encode(bufferlist& bl) const {
1686 ENCODE_START(2, 1, bl);
1687 ::encode(bucket_name, bl);
1688 ::encode(bucket_id, bl);
1689 ::encode(obj_key, bl);
1690 ::encode(exp_time, bl);
1691 ::encode(tenant, bl);
1692 ENCODE_FINISH(bl);
1693 }
1694
1695 void decode(bufferlist::iterator& bl) {
1696 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
1697 DECODE_START(2, bl);
1698 ::decode(bucket_name, bl);
1699 ::decode(bucket_id, bl);
1700 ::decode(obj_key, bl);
1701 ::decode(exp_time, bl);
1702 if (struct_v >= 2) {
1703 ::decode(tenant, bl);
1704 } else {
1705 tenant.clear();
1706 }
1707 DECODE_FINISH(bl);
1708 }
1709};
1710WRITE_CLASS_ENCODER(objexp_hint_entry)
1711
1712class RGWPeriod;
1713
1714class RGWRealm : public RGWSystemMetaObj
1715{
1716 string current_period;
1717 epoch_t epoch{0}; //< realm epoch, incremented for each new period
1718
1719 int create_control(bool exclusive);
1720 int delete_control();
1721public:
1722 RGWRealm() {}
1723 RGWRealm(const string& _id, const string& _name = "") : RGWSystemMetaObj(_id, _name) {}
1724 RGWRealm(CephContext *_cct, RGWRados *_store): RGWSystemMetaObj(_cct, _store) {}
1725 RGWRealm(const string& _name, CephContext *_cct, RGWRados *_store): RGWSystemMetaObj(_name, _cct, _store){}
1726
1727 void encode(bufferlist& bl) const override {
1728 ENCODE_START(1, 1, bl);
1729 RGWSystemMetaObj::encode(bl);
1730 ::encode(current_period, bl);
1731 ::encode(epoch, bl);
1732 ENCODE_FINISH(bl);
1733 }
1734
1735 void decode(bufferlist::iterator& bl) override {
1736 DECODE_START(1, bl);
1737 RGWSystemMetaObj::decode(bl);
1738 ::decode(current_period, bl);
1739 ::decode(epoch, bl);
1740 DECODE_FINISH(bl);
1741 }
1742
1743 int create(bool exclusive = true) override;
1744 int delete_obj();
1745 rgw_pool get_pool(CephContext *cct);
1746 const string get_default_oid(bool old_format = false) override;
1747 const string& get_names_oid_prefix() override;
1748 const string& get_info_oid_prefix(bool old_format = false) override;
1749 const string& get_predefined_name(CephContext *cct) override;
1750
1751 using RGWSystemMetaObj::read_id; // expose as public for radosgw-admin
1752
1753 void dump(Formatter *f) const;
1754 void decode_json(JSONObj *obj);
1755
1756 const string& get_current_period() const {
1757 return current_period;
1758 }
1759 int set_current_period(RGWPeriod& period);
1760 void clear_current_period_and_epoch() {
1761 current_period.clear();
1762 epoch = 0;
1763 }
1764 epoch_t get_epoch() const { return epoch; }
1765
1766 string get_control_oid();
1767 /// send a notify on the realm control object
1768 int notify_zone(bufferlist& bl);
1769 /// notify the zone of a new period
1770 int notify_new_period(const RGWPeriod& period);
1771};
1772WRITE_CLASS_ENCODER(RGWRealm)
1773
1774struct RGWPeriodLatestEpochInfo {
1775 epoch_t epoch;
1776
1777 void encode(bufferlist& bl) const {
1778 ENCODE_START(1, 1, bl);
1779 ::encode(epoch, bl);
1780 ENCODE_FINISH(bl);
1781 }
1782
1783 void decode(bufferlist::iterator& bl) {
1784 DECODE_START(1, bl);
1785 ::decode(epoch, bl);
1786 DECODE_FINISH(bl);
1787 }
1788
1789 void dump(Formatter *f) const;
1790 void decode_json(JSONObj *obj);
1791};
1792WRITE_CLASS_ENCODER(RGWPeriodLatestEpochInfo)
1793
1794class RGWPeriod
1795{
1796 string id;
1797 epoch_t epoch;
1798 string predecessor_uuid;
1799 std::vector<std::string> sync_status;
1800 RGWPeriodMap period_map;
1801 RGWPeriodConfig period_config;
1802 string master_zonegroup;
1803 string master_zone;
1804
1805 string realm_id;
1806 string realm_name;
1807 epoch_t realm_epoch{1}; //< realm epoch when period was made current
1808
1809 CephContext *cct;
1810 RGWRados *store;
1811
1812 int read_info();
224ce89b
WB
1813 int read_latest_epoch(RGWPeriodLatestEpochInfo& epoch_info,
1814 RGWObjVersionTracker *objv = nullptr);
7c673cae
FG
1815 int use_latest_epoch();
1816 int use_current_period();
1817
1818 const string get_period_oid();
1819 const string get_period_oid_prefix();
1820
1821 // gather the metadata sync status for each shard; only for use on master zone
1822 int update_sync_status(const RGWPeriod &current_period,
1823 std::ostream& error_stream, bool force_if_stale);
1824
1825public:
1826 RGWPeriod() : epoch(0), cct(NULL), store(NULL) {}
1827
1828 RGWPeriod(const string& period_id, epoch_t _epoch = 0)
1829 : id(period_id), epoch(_epoch),
1830 cct(NULL), store(NULL) {}
1831
1832 const string& get_id() const { return id; }
1833 epoch_t get_epoch() const { return epoch; }
1834 epoch_t get_realm_epoch() const { return realm_epoch; }
1835 const string& get_predecessor() const { return predecessor_uuid; }
1836 const string& get_master_zone() const { return master_zone; }
1837 const string& get_master_zonegroup() const { return master_zonegroup; }
1838 const string& get_realm() const { return realm_id; }
1839 const RGWPeriodMap& get_map() const { return period_map; }
1840 RGWPeriodConfig& get_config() { return period_config; }
1841 const RGWPeriodConfig& get_config() const { return period_config; }
1842 const std::vector<std::string>& get_sync_status() const { return sync_status; }
1843 rgw_pool get_pool(CephContext *cct);
1844 const string& get_latest_epoch_oid();
1845 const string& get_info_oid_prefix();
1846
1847 void set_user_quota(RGWQuotaInfo& user_quota) {
1848 period_config.user_quota = user_quota;
1849 }
1850
1851 void set_bucket_quota(RGWQuotaInfo& bucket_quota) {
1852 period_config.bucket_quota = bucket_quota;
1853 }
1854
1855 void set_id(const string& id) {
1856 this->id = id;
1857 period_map.id = id;
1858 }
1859 void set_epoch(epoch_t epoch) { this->epoch = epoch; }
1860 void set_realm_epoch(epoch_t epoch) { realm_epoch = epoch; }
1861
1862 void set_predecessor(const string& predecessor)
1863 {
1864 predecessor_uuid = predecessor;
1865 }
1866
1867 void set_realm_id(const string& _realm_id) {
1868 realm_id = _realm_id;
1869 }
1870
1871 int reflect();
1872
1873 int get_zonegroup(RGWZoneGroup& zonegroup,
1874 const string& zonegroup_id);
1875
3efd9988 1876 bool is_single_zonegroup() const
224ce89b
WB
1877 {
1878 return (period_map.zonegroups.size() == 1);
1879 }
1880
1881 /*
1882 returns true if there are several zone groups with a least one zone
1883 */
1884 bool is_multi_zonegroups_with_zones()
1885 {
1886 int count = 0;
1887 for (const auto& zg: period_map.zonegroups) {
1888 if (zg.second.zones.size() > 0) {
1889 if (count++ > 0) {
1890 return true;
1891 }
1892 }
1893 }
1894 return false;
1895 }
7c673cae
FG
1896
1897 int get_latest_epoch(epoch_t& epoch);
224ce89b
WB
1898 int set_latest_epoch(epoch_t epoch, bool exclusive = false,
1899 RGWObjVersionTracker *objv = nullptr);
1900 // update latest_epoch if the given epoch is higher, else return -EEXIST
1901 int update_latest_epoch(epoch_t epoch);
7c673cae
FG
1902
1903 int init(CephContext *_cct, RGWRados *_store, const string &period_realm_id, const string &period_realm_name = "",
1904 bool setup_obj = true);
1905 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true);
7c673cae
FG
1906
1907 int create(bool exclusive = true);
1908 int delete_obj();
1909 int store_info(bool exclusive);
1910 int add_zonegroup(const RGWZoneGroup& zonegroup);
1911
1912 void fork();
1913 int update();
1914
1915 // commit a staging period; only for use on master zone
1916 int commit(RGWRealm& realm, const RGWPeriod &current_period,
1917 std::ostream& error_stream, bool force_if_stale = false);
1918
1919 void encode(bufferlist& bl) const {
1920 ENCODE_START(1, 1, bl);
1921 ::encode(id, bl);
1922 ::encode(epoch, bl);
1923 ::encode(realm_epoch, bl);
1924 ::encode(predecessor_uuid, bl);
1925 ::encode(sync_status, bl);
1926 ::encode(period_map, bl);
1927 ::encode(master_zone, bl);
1928 ::encode(master_zonegroup, bl);
1929 ::encode(period_config, bl);
1930 ::encode(realm_id, bl);
1931 ::encode(realm_name, bl);
1932 ENCODE_FINISH(bl);
1933 }
1934
1935 void decode(bufferlist::iterator& bl) {
1936 DECODE_START(1, bl);
1937 ::decode(id, bl);
1938 ::decode(epoch, bl);
1939 ::decode(realm_epoch, bl);
1940 ::decode(predecessor_uuid, bl);
1941 ::decode(sync_status, bl);
1942 ::decode(period_map, bl);
1943 ::decode(master_zone, bl);
1944 ::decode(master_zonegroup, bl);
1945 ::decode(period_config, bl);
1946 ::decode(realm_id, bl);
1947 ::decode(realm_name, bl);
1948 DECODE_FINISH(bl);
1949 }
1950 void dump(Formatter *f) const;
1951 void decode_json(JSONObj *obj);
1952
1953 static string get_staging_id(const string& realm_id) {
1954 return realm_id + ":staging";
1955 }
1956};
1957WRITE_CLASS_ENCODER(RGWPeriod)
1958
1959class RGWDataChangesLog;
1960class RGWMetaSyncStatusManager;
1961class RGWDataSyncStatusManager;
1962class RGWReplicaLogger;
1963class RGWCoroutinesManagerRegistry;
1964
1965class RGWStateLog {
1966 RGWRados *store;
1967 int num_shards;
1968 string module_name;
1969
1970 void oid_str(int shard, string& oid);
1971 int get_shard_num(const string& object);
1972 string get_oid(const string& object);
1973 int open_ioctx(librados::IoCtx& ioctx);
1974
1975 struct list_state {
1976 int cur_shard;
1977 int max_shard;
1978 string marker;
1979 string client_id;
1980 string op_id;
1981 string object;
1982
1983 list_state() : cur_shard(0), max_shard(0) {}
1984 };
1985
1986protected:
1987 virtual bool dump_entry_internal(const cls_statelog_entry& entry, Formatter *f) {
1988 return false;
1989 }
1990
1991public:
1992 RGWStateLog(RGWRados *_store, int _num_shards, const string& _module_name) :
1993 store(_store), num_shards(_num_shards), module_name(_module_name) {}
1994 virtual ~RGWStateLog() {}
1995
1996 int store_entry(const string& client_id, const string& op_id, const string& object,
1997 uint32_t state, bufferlist *bl, uint32_t *check_state);
1998
1999 int remove_entry(const string& client_id, const string& op_id, const string& object);
2000
2001 void init_list_entries(const string& client_id, const string& op_id, const string& object,
2002 void **handle);
2003
2004 int list_entries(void *handle, int max_entries, list<cls_statelog_entry>& entries, bool *done);
2005
2006 void finish_list_entries(void *handle);
2007
2008 virtual void dump_entry(const cls_statelog_entry& entry, Formatter *f);
2009};
2010
2011/*
2012 * state transitions:
2013 *
2014 * unknown -> in-progress -> complete
2015 * -> error
2016 *
2017 * user can try setting the 'abort' state, and it can only succeed if state is
2018 * in-progress.
2019 *
2020 * state renewal cannot switch state (stays in the same state)
2021 *
2022 * rgw can switch from in-progress to complete
2023 * rgw can switch from in-progress to error
2024 *
2025 * rgw can switch from abort to cancelled
2026 *
2027 */
2028
2029class RGWOpState : public RGWStateLog {
2030protected:
2031 bool dump_entry_internal(const cls_statelog_entry& entry, Formatter *f) override;
2032public:
2033
2034 enum OpState {
2035 OPSTATE_UNKNOWN = 0,
2036 OPSTATE_IN_PROGRESS = 1,
2037 OPSTATE_COMPLETE = 2,
2038 OPSTATE_ERROR = 3,
2039 OPSTATE_ABORT = 4,
2040 OPSTATE_CANCELLED = 5,
2041 };
2042
2043 explicit RGWOpState(RGWRados *_store);
2044
2045 int state_from_str(const string& s, OpState *state);
2046 int set_state(const string& client_id, const string& op_id, const string& object, OpState state);
2047 int renew_state(const string& client_id, const string& op_id, const string& object, OpState state);
2048};
2049
2050class RGWOpStateSingleOp
2051{
2052 RGWOpState os;
2053 string client_id;
2054 string op_id;
2055 string object;
2056
2057 CephContext *cct;
2058
2059 RGWOpState::OpState cur_state;
2060 ceph::real_time last_update;
2061
2062public:
2063 RGWOpStateSingleOp(RGWRados *store, const string& cid, const string& oid, const string& obj);
2064
2065 int set_state(RGWOpState::OpState state);
2066 int renew_state();
2067};
2068
2069class RGWGetBucketStats_CB : public RefCountedObject {
2070protected:
2071 rgw_bucket bucket;
2072 map<RGWObjCategory, RGWStorageStats> *stats;
2073public:
224ce89b 2074 explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
7c673cae
FG
2075 ~RGWGetBucketStats_CB() override {}
2076 virtual void handle_response(int r) = 0;
2077 virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
2078 stats = _stats;
2079 }
2080};
2081
2082class RGWGetUserStats_CB : public RefCountedObject {
2083protected:
2084 rgw_user user;
2085 RGWStorageStats stats;
2086public:
2087 explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
2088 ~RGWGetUserStats_CB() override {}
2089 virtual void handle_response(int r) = 0;
2090 virtual void set_response(RGWStorageStats& _stats) {
2091 stats = _stats;
2092 }
2093};
2094
2095class RGWGetDirHeader_CB;
2096class RGWGetUserHeader_CB;
2097
2098struct rgw_rados_ref {
2099 rgw_pool pool;
2100 string oid;
2101 string key;
2102 librados::IoCtx ioctx;
2103};
2104
2105class RGWChainedCache {
2106public:
2107 virtual ~RGWChainedCache() {}
2108 virtual void chain_cb(const string& key, void *data) = 0;
2109 virtual void invalidate(const string& key) = 0;
2110 virtual void invalidate_all() = 0;
2111
2112 struct Entry {
2113 RGWChainedCache *cache;
2114 const string& key;
2115 void *data;
2116
2117 Entry(RGWChainedCache *_c, const string& _k, void *_d) : cache(_c), key(_k), data(_d) {}
2118 };
2119};
2120
2121template <class T, class S>
2122class RGWObjectCtxImpl {
2123 RGWRados *store;
2124 std::map<T, S> objs_state;
2125 RWLock lock;
2126
2127public:
2128 RGWObjectCtxImpl(RGWRados *_store) : store(_store), lock("RGWObjectCtxImpl") {}
2129
2130 S *get_state(const T& obj) {
2131 S *result;
2132 typename std::map<T, S>::iterator iter;
2133 lock.get_read();
2134 assert (!obj.empty());
2135 iter = objs_state.find(obj);
2136 if (iter != objs_state.end()) {
2137 result = &iter->second;
2138 lock.unlock();
2139 } else {
2140 lock.unlock();
2141 lock.get_write();
2142 result = &objs_state[obj];
2143 lock.unlock();
2144 }
2145 return result;
2146 }
2147
2148 void set_atomic(T& obj) {
2149 RWLock::WLocker wl(lock);
2150 assert (!obj.empty());
2151 objs_state[obj].is_atomic = true;
2152 }
2153 void set_prefetch_data(T& obj) {
2154 RWLock::WLocker wl(lock);
2155 assert (!obj.empty());
2156 objs_state[obj].prefetch_data = true;
2157 }
2158 void invalidate(T& obj) {
2159 RWLock::WLocker wl(lock);
2160 auto iter = objs_state.find(obj);
2161 if (iter == objs_state.end()) {
2162 return;
2163 }
2164 bool is_atomic = iter->second.is_atomic;
2165 bool prefetch_data = iter->second.prefetch_data;
2166
2167 objs_state.erase(iter);
2168
2169 if (is_atomic || prefetch_data) {
2170 auto& s = objs_state[obj];
2171 s.is_atomic = is_atomic;
2172 s.prefetch_data = prefetch_data;
2173 }
2174 }
2175};
2176
2177template<>
2178void RGWObjectCtxImpl<rgw_obj, RGWObjState>::invalidate(rgw_obj& obj);
2179
2180template<>
2181void RGWObjectCtxImpl<rgw_raw_obj, RGWRawObjState>::invalidate(rgw_raw_obj& obj);
2182
2183struct RGWObjectCtx {
2184 RGWRados *store;
2185 void *user_ctx;
2186
2187 RGWObjectCtxImpl<rgw_obj, RGWObjState> obj;
2188 RGWObjectCtxImpl<rgw_raw_obj, RGWRawObjState> raw;
2189
2190 explicit RGWObjectCtx(RGWRados *_store) : store(_store), user_ctx(NULL), obj(store), raw(store) { }
2191 RGWObjectCtx(RGWRados *_store, void *_user_ctx) : store(_store), user_ctx(_user_ctx), obj(store), raw(store) { }
2192};
2193
2194class Finisher;
2195class RGWAsyncRadosProcessor;
2196
2197template <class T>
2198class RGWChainedCacheImpl;
2199
2200struct bucket_info_entry {
2201 RGWBucketInfo info;
2202 real_time mtime;
2203 map<string, bufferlist> attrs;
2204};
2205
2206struct tombstone_entry {
2207 ceph::real_time mtime;
2208 uint32_t zone_short_id;
2209 uint64_t pg_ver;
2210
2211 tombstone_entry() = default;
2212 tombstone_entry(const RGWObjState& state)
2213 : mtime(state.mtime), zone_short_id(state.zone_short_id),
2214 pg_ver(state.pg_ver) {}
2215};
2216
31f18b77
FG
2217class RGWIndexCompletionManager;
2218
3a9019d9 2219class RGWRados : public AdminSocketHook
7c673cae
FG
2220{
2221 friend class RGWGC;
2222 friend class RGWMetaNotifier;
2223 friend class RGWDataNotifier;
2224 friend class RGWLC;
2225 friend class RGWObjectExpirer;
2226 friend class RGWMetaSyncProcessorThread;
2227 friend class RGWDataSyncProcessorThread;
2228 friend class RGWStateLog;
2229 friend class RGWReplicaLogger;
31f18b77
FG
2230 friend class RGWReshard;
2231 friend class RGWBucketReshard;
f64942e4 2232 friend class RGWBucketReshardLock;
31f18b77 2233 friend class BucketIndexLockGuard;
d2e6a577 2234 friend class RGWCompleteMultipart;
7c673cae 2235
3a9019d9
FG
2236 static const char* admin_commands[4][3];
2237
7c673cae
FG
2238 /** Open the pool used as root for this gateway */
2239 int open_root_pool_ctx();
2240 int open_gc_pool_ctx();
2241 int open_lc_pool_ctx();
2242 int open_objexp_pool_ctx();
31f18b77 2243 int open_reshard_pool_ctx();
7c673cae
FG
2244
2245 int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx);
2246 int open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx);
2247 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid);
2248 int open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2249 string& bucket_oid_base);
2250 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2251 const string& obj_key, string *bucket_obj, int *shard_id);
2252 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2253 int shard_id, string *bucket_obj);
2254 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2255 map<int, string>& bucket_objs, int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
2256 template<typename T>
2257 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2258 map<int, string>& oids, map<int, T>& bucket_objs,
2259 int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
2260 void build_bucket_index_marker(const string& shard_id_str, const string& shard_marker,
2261 string *marker);
2262
2263 void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result);
2264
2265 std::atomic<int64_t> max_req_id = { 0 };
2266 Mutex lock;
2267 Mutex watchers_lock;
2268 SafeTimer *timer;
2269
2270 RGWGC *gc;
2271 RGWLC *lc;
2272 RGWObjectExpirer *obj_expirer;
2273 bool use_gc_thread;
2274 bool use_lc_thread;
2275 bool quota_threads;
2276 bool run_sync_thread;
31f18b77 2277 bool run_reshard_thread;
7c673cae
FG
2278
2279 RGWAsyncRadosProcessor* async_rados;
2280
2281 RGWMetaNotifier *meta_notifier;
2282 RGWDataNotifier *data_notifier;
2283 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
2284 map<string, RGWDataSyncProcessorThread *> data_sync_processor_threads;
2285
b32b8144 2286 boost::optional<rgw::BucketTrimManager> bucket_trim;
7c673cae
FG
2287 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
2288
2289 Mutex meta_sync_thread_lock;
2290 Mutex data_sync_thread_lock;
2291
2292 int num_watchers;
2293 RGWWatcher **watchers;
2294 std::set<int> watchers_set;
2295 librados::IoCtx root_pool_ctx; // .rgw
2296 librados::IoCtx control_pool_ctx; // .rgw.control
2297 bool watch_initialized;
2298
2299 friend class RGWWatcher;
2300
2301 Mutex bucket_id_lock;
2302
2303 // This field represents the number of bucket index object shards
2304 uint32_t bucket_index_max_shards;
2305
2306 int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
2307 int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
224ce89b 2308 int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae
FG
2309 uint64_t max_bucket_id;
2310
2311 int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2312 RGWObjState *olh_state, RGWObjState **target_state);
2313 int get_system_obj_state_impl(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker);
2314 int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
2315 bool follow_olh, bool assume_noent = false);
2316 int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2317 librados::ObjectOperation& op, RGWObjState **state);
2318
2319 int update_placement_map();
2320 int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
2321
2322 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
2323 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
2324 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
2325protected:
2326 CephContext *cct;
2327
2328 std::vector<librados::Rados> rados;
2329 uint32_t next_rados_handle;
2330 RWLock handle_lock;
2331 std::map<pthread_t, int> rados_map;
2332
2333 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
2334 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
2335
2336 using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
2337 tombstone_cache_t *obj_tombstone_cache;
2338
2339 librados::IoCtx gc_pool_ctx; // .rgw.gc
2340 librados::IoCtx lc_pool_ctx; // .rgw.lc
2341 librados::IoCtx objexp_pool_ctx;
31f18b77 2342 librados::IoCtx reshard_pool_ctx;
7c673cae
FG
2343
2344 bool pools_initialized;
2345
7c673cae
FG
2346 string trans_id_suffix;
2347
2348 RGWQuotaHandler *quota_handler;
2349
2350 Finisher *finisher;
31f18b77 2351
7c673cae
FG
2352 RGWCoroutinesManagerRegistry *cr_registry;
2353
2354 RGWSyncModulesManager *sync_modules_manager{nullptr};
2355 RGWSyncModuleInstanceRef sync_module;
2356 bool writeable_zone{false};
2357
2358 RGWZoneGroup zonegroup;
2359 RGWZone zone_public_config; /* external zone params, e.g., entrypoints, log flags, etc. */
2360 RGWZoneParams zone_params; /* internal zone params, e.g., rados pools */
2361 uint32_t zone_short_id;
2362
2363 RGWPeriod current_period;
31f18b77
FG
2364
2365 RGWIndexCompletionManager *index_completion_manager{nullptr};
7c673cae
FG
2366public:
2367 RGWRados() : lock("rados_timer_lock"), watchers_lock("watchers_lock"), timer(NULL),
2368 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
31f18b77 2369 run_sync_thread(false), run_reshard_thread(false), async_rados(nullptr), meta_notifier(NULL),
7c673cae
FG
2370 data_notifier(NULL), meta_sync_processor_thread(NULL),
2371 meta_sync_thread_lock("meta_sync_thread_lock"), data_sync_thread_lock("data_sync_thread_lock"),
2372 num_watchers(0), watchers(NULL),
2373 watch_initialized(false),
2374 bucket_id_lock("rados_bucket_id"),
2375 bucket_index_max_shards(0),
2376 max_bucket_id(0), cct(NULL),
2377 next_rados_handle(0),
2378 handle_lock("rados_handle_lock"),
2379 binfo_cache(NULL), obj_tombstone_cache(nullptr),
2380 pools_initialized(false),
2381 quota_handler(NULL),
2382 finisher(NULL),
2383 cr_registry(NULL),
2384 zone_short_id(0),
2385 rest_master_conn(NULL),
31f18b77 2386 meta_mgr(NULL), data_log(NULL), reshard(NULL) {}
7c673cae
FG
2387
2388 uint64_t get_new_req_id() {
2389 return ++max_req_id;
2390 }
2391
2392 librados::IoCtx* get_lc_pool_ctx() {
2393 return &lc_pool_ctx;
2394 }
2395 void set_context(CephContext *_cct) {
2396 cct = _cct;
2397 }
2398
2399 /**
2400 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
2401 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
2402 */
2403 void init_host_id() {
2404 /* uint64_t needs 16, two '-' separators and a trailing null */
2405 const string& zone_name = get_zone().name;
2406 const string& zonegroup_name = zonegroup.get_name();
2407 char charbuf[16 + zone_name.size() + zonegroup_name.size() + 2 + 1];
2408 snprintf(charbuf, sizeof(charbuf), "%llx-%s-%s", (unsigned long long)instance_id(), zone_name.c_str(), zonegroup_name.c_str());
2409 string s(charbuf);
2410 host_id = s;
2411 }
2412
2413 string host_id;
2414
2415 RGWRealm realm;
2416
2417 RGWRESTConn *rest_master_conn;
2418 map<string, RGWRESTConn *> zone_conn_map;
2419 map<string, RGWRESTConn *> zone_data_sync_from_map;
2420 map<string, RGWRESTConn *> zone_data_notify_to_map;
2421 map<string, RGWRESTConn *> zonegroup_conn_map;
2422
2423 map<string, string> zone_id_by_name;
2424 map<string, RGWZone> zone_by_id;
2425
2426 RGWRESTConn *get_zone_conn_by_id(const string& id) {
2427 auto citer = zone_conn_map.find(id);
2428 if (citer == zone_conn_map.end()) {
2429 return NULL;
2430 }
2431
2432 return citer->second;
2433 }
2434
2435 RGWRESTConn *get_zone_conn_by_name(const string& name) {
2436 auto i = zone_id_by_name.find(name);
2437 if (i == zone_id_by_name.end()) {
2438 return NULL;
2439 }
2440
2441 return get_zone_conn_by_id(i->second);
2442 }
2443
2444 bool find_zone_id_by_name(const string& name, string *id) {
2445 auto i = zone_id_by_name.find(name);
2446 if (i == zone_id_by_name.end()) {
2447 return false;
2448 }
2449 *id = i->second;
2450 return true;
2451 }
2452
2453 int get_zonegroup(const string& id, RGWZoneGroup& zonegroup) {
2454 int ret = 0;
2455 if (id == get_zonegroup().get_id()) {
2456 zonegroup = get_zonegroup();
2457 } else if (!current_period.get_id().empty()) {
2458 ret = current_period.get_zonegroup(zonegroup, id);
2459 }
2460 return ret;
2461 }
2462
2463 RGWRealm& get_realm() {
2464 return realm;
2465 }
2466
2467 RGWZoneParams& get_zone_params() { return zone_params; }
2468 RGWZoneGroup& get_zonegroup() {
2469 return zonegroup;
2470 }
2471 RGWZone& get_zone() {
2472 return zone_public_config;
2473 }
2474
2475 bool zone_is_writeable() {
2476 return writeable_zone && !get_zone().is_read_only();
2477 }
2478
2479 uint32_t get_zone_short_id() const {
2480 return zone_short_id;
2481 }
2482
2483 bool zone_syncs_from(RGWZone& target_zone, RGWZone& source_zone);
2484
2485 const RGWQuotaInfo& get_bucket_quota() {
2486 return current_period.get_config().bucket_quota;
2487 }
2488
2489 const RGWQuotaInfo& get_user_quota() {
2490 return current_period.get_config().user_quota;
2491 }
2492
2493 const string& get_current_period_id() {
2494 return current_period.get_id();
2495 }
31f18b77
FG
2496
2497 bool has_zonegroup_api(const std::string& api) const {
2498 if (!current_period.get_id().empty()) {
2499 const auto& zonegroups_by_api = current_period.get_map().zonegroups_by_api;
2500 if (zonegroups_by_api.find(api) != zonegroups_by_api.end())
2501 return true;
2502 }
2503 return false;
2504 }
2505
7c673cae
FG
2506 // pulls missing periods for period_history
2507 std::unique_ptr<RGWPeriodPuller> period_puller;
2508 // maintains a connected history of periods
2509 std::unique_ptr<RGWPeriodHistory> period_history;
2510
2511 RGWAsyncRadosProcessor* get_async_rados() const { return async_rados; };
2512
2513 RGWMetadataManager *meta_mgr;
2514
2515 RGWDataChangesLog *data_log;
2516
31f18b77
FG
2517 RGWReshard *reshard;
2518 std::shared_ptr<RGWReshardWait> reshard_wait;
2519
7c673cae
FG
2520 virtual ~RGWRados() = default;
2521
2522 tombstone_cache_t *get_tombstone_cache() {
2523 return obj_tombstone_cache;
2524 }
2525
2526 RGWSyncModulesManager *get_sync_modules_manager() {
2527 return sync_modules_manager;
2528 }
2529 const RGWSyncModuleInstanceRef& get_sync_module() {
2530 return sync_module;
2531 }
2532
2533 int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
2534 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size);
2535 int get_max_chunk_size(const string& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size);
2536
2537 uint32_t get_max_bucket_shards() {
31f18b77 2538 return rgw_shards_max();
7c673cae
FG
2539 }
2540
181888fb 2541
224ce89b 2542 int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae 2543
181888fb
FG
2544 int list_raw_objects_init(const rgw_pool& pool, const string& marker, RGWListRawObjsCtx *ctx);
2545 int list_raw_objects_next(const string& prefix_filter, int max,
2546 RGWListRawObjsCtx& ctx, list<string>& oids,
2547 bool *is_truncated);
7c673cae
FG
2548 int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
2549 RGWListRawObjsCtx& ctx, list<string>& oids,
2550 bool *is_truncated);
181888fb 2551 string list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx);
7c673cae
FG
2552
2553 int list_raw_prefixed_objs(const rgw_pool& pool, const string& prefix, list<string>& result);
2554 int list_zonegroups(list<string>& zonegroups);
2555 int list_regions(list<string>& regions);
2556 int list_zones(list<string>& zones);
2557 int list_realms(list<string>& realms);
2558 int list_periods(list<string>& periods);
2559 int list_periods(const string& current_period, list<string>& periods);
2560 void tick();
2561
2562 CephContext *ctx() { return cct; }
2563 /** do all necessary setup of the storage device */
31f18b77 2564 int initialize(CephContext *_cct, bool _use_gc_thread, bool _use_lc_thread, bool _quota_threads, bool _run_sync_thread, bool _run_reshard_thread) {
7c673cae
FG
2565 set_context(_cct);
2566 use_gc_thread = _use_gc_thread;
2567 use_lc_thread = _use_lc_thread;
2568 quota_threads = _quota_threads;
2569 run_sync_thread = _run_sync_thread;
31f18b77 2570 run_reshard_thread = _run_reshard_thread;
7c673cae
FG
2571 return initialize();
2572 }
2573 /** Initialize the RADOS instance and prepare to do other ops */
2574 virtual int init_rados();
2575 int init_zg_from_period(bool *initialized);
2576 int init_zg_from_local(bool *creating_defaults);
2577 int init_complete();
2578 int replace_region_with_zonegroup();
2579 int convert_regionmap();
2580 int initialize();
2581 void finalize();
2582
224ce89b
WB
2583 int register_to_service_map(const string& daemon_type, const map<string, string>& meta);
2584
7c673cae
FG
2585 void schedule_context(Context *c);
2586
2587 /** set up a bucket listing. handle is filled in. */
2588 int list_buckets_init(RGWAccessHandle *handle);
2589 /**
2590 * get the next bucket in the listing. obj is filled in,
2591 * handle is updated.
2592 */
2593 int list_buckets_next(rgw_bucket_dir_entry& obj, RGWAccessHandle *handle);
2594
2595 /// list logs
2596 int log_list_init(const string& prefix, RGWAccessHandle *handle);
2597 int log_list_next(RGWAccessHandle handle, string *name);
2598
2599 /// remove log
2600 int log_remove(const string& name);
2601
2602 /// show log
2603 int log_show_init(const string& name, RGWAccessHandle *handle);
2604 int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
2605
2606 // log bandwidth info
2607 int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
2608 int read_usage(const rgw_user& user, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
2609 bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage);
2610 int trim_usage(rgw_user& user, uint64_t start_epoch, uint64_t end_epoch);
2611
2612 int create_pool(const rgw_pool& pool);
2613
7c673cae 2614 int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
f64942e4 2615 int clean_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
7c673cae
FG
2616 int select_bucket_placement(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
2617 string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
2618 int select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info);
2619 int select_new_bucket_location(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
2620 string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
2621 int select_bucket_location_by_rule(const string& location_rule, RGWZonePlacementInfo *rule_info);
2622 void create_bucket_id(string *bucket_id);
2623
2624 bool get_obj_data_pool(const string& placement_rule, const rgw_obj& obj, rgw_pool *pool);
2625 bool obj_to_raw(const string& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
2626
2627 int create_bucket(RGWUserInfo& owner, rgw_bucket& bucket,
2628 const string& zonegroup_id,
2629 const string& placement_rule,
2630 const string& swift_ver_location,
2631 const RGWQuotaInfo * pquota_info,
2632 map<std::string,bufferlist>& attrs,
2633 RGWBucketInfo& bucket_info,
2634 obj_version *pobjv,
2635 obj_version *pep_objv,
2636 ceph::real_time creation_time,
2637 rgw_bucket *master_bucket,
2638 uint32_t *master_num_shards,
2639 bool exclusive = true);
2640 int add_bucket_placement(const rgw_pool& new_pool);
2641 int remove_bucket_placement(const rgw_pool& new_pool);
2642 int list_placement_set(set<rgw_pool>& names);
2643 int create_pools(vector<rgw_pool>& pools, vector<int>& retcodes);
2644
2645 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
2646
2647 class SystemObject {
2648 RGWRados *store;
2649 RGWObjectCtx& ctx;
2650 rgw_raw_obj obj;
2651
2652 RGWObjState *state;
2653
2654 protected:
2655 int get_state(RGWRawObjState **pstate, RGWObjVersionTracker *objv_tracker);
2656
2657 public:
2658 SystemObject(RGWRados *_store, RGWObjectCtx& _ctx, rgw_raw_obj& _obj) : store(_store), ctx(_ctx), obj(_obj), state(NULL) {}
2659
2660 void invalidate_state();
2661
2662 RGWRados *get_store() { return store; }
2663 rgw_raw_obj& get_obj() { return obj; }
2664 RGWObjectCtx& get_ctx() { return ctx; }
2665
2666 struct Read {
2667 RGWRados::SystemObject *source;
2668
2669 struct GetObjState {
2670 rgw_rados_ref ref;
2671 bool has_ref{false};
2672 uint64_t last_ver{0};
2673
2674 GetObjState() {}
2675
2676 int get_ref(RGWRados *store, rgw_raw_obj& obj, rgw_rados_ref **pref);
2677 } state;
2678
2679 struct StatParams {
2680 ceph::real_time *lastmod;
2681 uint64_t *obj_size;
2682 map<string, bufferlist> *attrs;
7c673cae 2683
31f18b77 2684 StatParams() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
7c673cae
FG
2685 } stat_params;
2686
2687 struct ReadParams {
224ce89b 2688 rgw_cache_entry_info *cache_info{nullptr};
7c673cae
FG
2689 map<string, bufferlist> *attrs;
2690
2691 ReadParams() : attrs(NULL) {}
2692 } read_params;
2693
2694 explicit Read(RGWRados::SystemObject *_source) : source(_source) {}
2695
2696 int stat(RGWObjVersionTracker *objv_tracker);
b32b8144
FG
2697 int read(int64_t ofs, int64_t end, bufferlist& bl, RGWObjVersionTracker *objv_tracker,
2698 boost::optional<obj_version> refresh_version = boost::none);
7c673cae
FG
2699 int get_attr(const char *name, bufferlist& dest);
2700 };
2701 };
2702
2703 struct BucketShard {
2704 RGWRados *store;
2705 rgw_bucket bucket;
2706 int shard_id;
2707 librados::IoCtx index_ctx;
2708 string bucket_obj;
2709
2710 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
f64942e4
AA
2711 int init(const rgw_bucket& _bucket, const rgw_obj& obj, RGWBucketInfo* out);
2712 int init(const rgw_bucket& _bucket, int sid, RGWBucketInfo* out);
a8e16298 2713 int init(const RGWBucketInfo& bucket_info, const rgw_obj& obj);
b32b8144 2714 int init(const RGWBucketInfo& bucket_info, int sid);
7c673cae
FG
2715 };
2716
2717 class Object {
2718 RGWRados *store;
2719 RGWBucketInfo bucket_info;
2720 RGWObjectCtx& ctx;
2721 rgw_obj obj;
2722
2723 BucketShard bs;
2724
2725 RGWObjState *state;
2726
2727 bool versioning_disabled;
2728
2729 bool bs_initialized;
2730
2731 protected:
2732 int get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent = false);
2733 void invalidate_state();
2734
2735 int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
181888fb 2736 const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail);
7c673cae
FG
2737 int complete_atomic_modification();
2738
2739 public:
2740 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
2741 ctx(_ctx), obj(_obj), bs(store),
2742 state(NULL), versioning_disabled(false),
2743 bs_initialized(false) {}
2744
2745 RGWRados *get_store() { return store; }
2746 rgw_obj& get_obj() { return obj; }
2747 RGWObjectCtx& get_ctx() { return ctx; }
2748 RGWBucketInfo& get_bucket_info() { return bucket_info; }
2749 int get_manifest(RGWObjManifest **pmanifest);
2750
2751 int get_bucket_shard(BucketShard **pbs) {
2752 if (!bs_initialized) {
f64942e4
AA
2753 int r =
2754 bs.init(bucket_info.bucket, obj, nullptr /* no RGWBucketInfo */);
7c673cae
FG
2755 if (r < 0) {
2756 return r;
2757 }
2758 bs_initialized = true;
2759 }
2760 *pbs = &bs;
2761 return 0;
2762 }
2763
2764 void set_versioning_disabled(bool status) {
2765 versioning_disabled = status;
2766 }
2767
2768 bool versioning_enabled() {
2769 return (!versioning_disabled && bucket_info.versioning_enabled());
2770 }
2771
2772 struct Read {
2773 RGWRados::Object *source;
2774
2775 struct GetObjState {
2776 librados::IoCtx io_ctx;
2777 rgw_obj obj;
2778 rgw_raw_obj head_obj;
2779 } state;
2780
2781 struct ConditionParams {
2782 const ceph::real_time *mod_ptr;
2783 const ceph::real_time *unmod_ptr;
2784 bool high_precision_time;
2785 uint32_t mod_zone_id;
2786 uint64_t mod_pg_ver;
2787 const char *if_match;
2788 const char *if_nomatch;
2789
2790 ConditionParams() :
2791 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
2792 if_match(NULL), if_nomatch(NULL) {}
2793 } conds;
2794
2795 struct Params {
2796 ceph::real_time *lastmod;
2797 uint64_t *obj_size;
2798 map<string, bufferlist> *attrs;
7c673cae 2799
31f18b77 2800 Params() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
7c673cae
FG
2801 } params;
2802
2803 explicit Read(RGWRados::Object *_source) : source(_source) {}
2804
2805 int prepare();
2806 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
2807 int read(int64_t ofs, int64_t end, bufferlist& bl);
2808 int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb);
2809 int get_attr(const char *name, bufferlist& dest);
2810 };
2811
2812 struct Write {
2813 RGWRados::Object *target;
2814
2815 struct MetaParams {
2816 ceph::real_time *mtime;
2817 map<std::string, bufferlist>* rmattrs;
2818 const bufferlist *data;
2819 RGWObjManifest *manifest;
2820 const string *ptag;
2821 list<rgw_obj_index_key> *remove_objs;
2822 ceph::real_time set_mtime;
2823 rgw_user owner;
2824 RGWObjCategory category;
2825 int flags;
2826 const char *if_match;
2827 const char *if_nomatch;
91327a77 2828 boost::optional<uint64_t> olh_epoch;
7c673cae
FG
2829 ceph::real_time delete_at;
2830 bool canceled;
2831 const string *user_data;
31f18b77 2832 rgw_zone_set *zones_trace;
181888fb 2833 bool modify_tail;
3efd9988 2834 bool completeMultipart;
7c673cae
FG
2835
2836 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
2837 remove_objs(NULL), category(RGW_OBJ_CATEGORY_MAIN), flags(0),
91327a77 2838 if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr),
3efd9988 2839 modify_tail(false), completeMultipart(false) {}
7c673cae
FG
2840 } meta;
2841
2842 explicit Write(RGWRados::Object *_target) : target(_target) {}
2843
2844 int _do_write_meta(uint64_t size, uint64_t accounted_size,
2845 map<std::string, bufferlist>& attrs,
181888fb 2846 bool modify_tail, bool assume_noent,
7c673cae
FG
2847 void *index_op);
2848 int write_meta(uint64_t size, uint64_t accounted_size,
2849 map<std::string, bufferlist>& attrs);
2850 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
2851 };
2852
2853 struct Delete {
2854 RGWRados::Object *target;
2855
2856 struct DeleteParams {
2857 rgw_user bucket_owner;
2858 int versioning_status;
2859 ACLOwner obj_owner; /* needed for creation of deletion marker */
2860 uint64_t olh_epoch;
2861 string marker_version_id;
2862 uint32_t bilog_flags;
2863 list<rgw_obj_index_key> *remove_objs;
2864 ceph::real_time expiration_time;
2865 ceph::real_time unmod_since;
2866 ceph::real_time mtime; /* for setting delete marker mtime */
2867 bool high_precision_time;
31f18b77 2868 rgw_zone_set *zones_trace;
7c673cae 2869
31f18b77 2870 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr) {}
7c673cae
FG
2871 } params;
2872
2873 struct DeleteResult {
2874 bool delete_marker;
2875 string version_id;
2876
2877 DeleteResult() : delete_marker(false) {}
2878 } result;
2879
2880 explicit Delete(RGWRados::Object *_target) : target(_target) {}
2881
2882 int delete_obj();
2883 };
2884
2885 struct Stat {
2886 RGWRados::Object *source;
2887
2888 struct Result {
2889 rgw_obj obj;
2890 RGWObjManifest manifest;
2891 bool has_manifest;
2892 uint64_t size;
2893 struct timespec mtime;
2894 map<string, bufferlist> attrs;
2895
2896 Result() : has_manifest(false), size(0) {}
2897 } result;
2898
2899 struct State {
2900 librados::IoCtx io_ctx;
2901 librados::AioCompletion *completion;
2902 int ret;
2903
2904 State() : completion(NULL), ret(0) {}
2905 } state;
2906
2907
2908 explicit Stat(RGWRados::Object *_source) : source(_source) {}
2909
2910 int stat_async();
2911 int wait();
2912 int stat();
2913 private:
2914 int finish();
2915 };
2916 };
2917
2918 class Bucket {
2919 RGWRados *store;
2920 RGWBucketInfo bucket_info;
2921 rgw_bucket& bucket;
2922 int shard_id;
2923
2924 public:
2925 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
2926 shard_id(RGW_NO_SHARD) {}
2927 RGWRados *get_store() { return store; }
2928 rgw_bucket& get_bucket() { return bucket; }
2929 RGWBucketInfo& get_bucket_info() { return bucket_info; }
2930
31f18b77
FG
2931 int update_bucket_id(const string& new_bucket_id);
2932
7c673cae
FG
2933 int get_shard_id() { return shard_id; }
2934 void set_shard_id(int id) {
2935 shard_id = id;
2936 }
2937
2938 class UpdateIndex {
2939 RGWRados::Bucket *target;
2940 string optag;
2941 rgw_obj obj;
2942 uint16_t bilog_flags{0};
2943 BucketShard bs;
2944 bool bs_initialized{false};
2945 bool blind;
2946 bool prepared{false};
31f18b77
FG
2947 rgw_zone_set *zones_trace{nullptr};
2948
2949 int init_bs() {
f64942e4
AA
2950 int r =
2951 bs.init(target->get_bucket(), obj, nullptr /* no RGWBucketInfo */);
31f18b77
FG
2952 if (r < 0) {
2953 return r;
2954 }
2955 bs_initialized = true;
2956 return 0;
2957 }
2958
2959 void invalidate_bs() {
2960 bs_initialized = false;
2961 }
2962
2963 int guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call);
7c673cae
FG
2964 public:
2965
2966 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
2967 bs(target->get_store()) {
2968 blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
2969 }
2970
2971 int get_bucket_shard(BucketShard **pbs) {
2972 if (!bs_initialized) {
31f18b77 2973 int r = init_bs();
7c673cae
FG
2974 if (r < 0) {
2975 return r;
2976 }
7c673cae
FG
2977 }
2978 *pbs = &bs;
2979 return 0;
2980 }
2981
2982 void set_bilog_flags(uint16_t flags) {
2983 bilog_flags = flags;
2984 }
31f18b77
FG
2985
2986 void set_zones_trace(rgw_zone_set *_zones_trace) {
2987 zones_trace = _zones_trace;
2988 }
7c673cae
FG
2989
2990 int prepare(RGWModifyOp, const string *write_tag);
2991 int complete(int64_t poolid, uint64_t epoch, uint64_t size,
2992 uint64_t accounted_size, ceph::real_time& ut,
2993 const string& etag, const string& content_type,
2994 bufferlist *acl_bl, RGWObjCategory category,
2995 list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr);
2996 int complete_del(int64_t poolid, uint64_t epoch,
2997 ceph::real_time& removed_mtime, /* mtime of removed object */
2998 list<rgw_obj_index_key> *remove_objs);
2999 int cancel();
3000
3001 const string *get_optag() { return &optag; }
3002
3003 bool is_prepared() { return prepared; }
1adf2230
AA
3004 }; // class UpdateIndex
3005
3006 class List {
3007 protected:
7c673cae 3008
7c673cae
FG
3009 RGWRados::Bucket *target;
3010 rgw_obj_key next_marker;
3011
1adf2230
AA
3012 int list_objects_ordered(int64_t max,
3013 vector<rgw_bucket_dir_entry> *result,
3014 map<string, bool> *common_prefixes,
3015 bool *is_truncated);
3016 int list_objects_unordered(int64_t max,
3017 vector<rgw_bucket_dir_entry> *result,
3018 map<string, bool> *common_prefixes,
3019 bool *is_truncated);
3020
3021 public:
3022
7c673cae
FG
3023 struct Params {
3024 string prefix;
3025 string delim;
3026 rgw_obj_key marker;
3027 rgw_obj_key end_marker;
3028 string ns;
3029 bool enforce_ns;
3030 RGWAccessListFilter *filter;
3031 bool list_versions;
1adf2230
AA
3032 bool allow_unordered;
3033
3034 Params() :
3035 enforce_ns(true),
3036 filter(NULL),
3037 list_versions(false),
3038 allow_unordered(false)
3039 {}
7c673cae
FG
3040 } params;
3041
7c673cae
FG
3042 explicit List(RGWRados::Bucket *_target) : target(_target) {}
3043
1adf2230
AA
3044 int list_objects(int64_t max,
3045 vector<rgw_bucket_dir_entry> *result,
3046 map<string, bool> *common_prefixes,
3047 bool *is_truncated) {
3048 if (params.allow_unordered) {
3049 return list_objects_unordered(max, result, common_prefixes,
3050 is_truncated);
3051 } else {
3052 return list_objects_ordered(max, result, common_prefixes,
3053 is_truncated);
3054 }
3055 }
7c673cae
FG
3056 rgw_obj_key& get_next_marker() {
3057 return next_marker;
3058 }
1adf2230
AA
3059 }; // class List
3060 }; // class Bucket
7c673cae
FG
3061
3062 /** Write/overwrite an object to the bucket storage. */
3063 virtual int put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, ceph::real_time *mtime,
3064 map<std::string, bufferlist>& attrs, int flags,
3065 bufferlist& data,
3066 RGWObjVersionTracker *objv_tracker,
3067 ceph::real_time set_mtime /* 0 for don't set */);
3068
3069 virtual int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
3070 off_t ofs, bool exclusive,
3071 RGWObjVersionTracker *objv_tracker = nullptr);
3072 int aio_put_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
3073 off_t ofs, bool exclusive, void **handle);
3074
3075 int put_system_obj(void *ctx, rgw_raw_obj& obj, const char *data, size_t len, bool exclusive,
3076 ceph::real_time *mtime, map<std::string, bufferlist>& attrs, RGWObjVersionTracker *objv_tracker,
3077 ceph::real_time set_mtime) {
3078 bufferlist bl;
3079 bl.append(data, len);
3080 int flags = PUT_OBJ_CREATE;
3081 if (exclusive)
3082 flags |= PUT_OBJ_EXCL;
3083
3084 return put_system_obj_impl(obj, len, mtime, attrs, flags, bl, objv_tracker, set_mtime);
3085 }
3086 int aio_wait(void *handle);
3087 bool aio_completed(void *handle);
3088
3089 int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
3090 const std::string& obj_prefix,
3091 const std::string& obj_delim,
3092 std::function<int(const rgw_bucket_dir_entry&)> handler);
3093
3094 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
3095 return bucket_info.has_swift_versioning() &&
3096 bucket_info.swift_ver_location.size();
3097 }
3098
3099 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
3100 const rgw_user& user, /* in */
3101 RGWBucketInfo& bucket_info, /* in */
3102 rgw_obj& obj); /* in */
3103 int swift_versioning_restore(RGWObjectCtx& obj_ctx, /* in/out */
3104 const rgw_user& user, /* in */
3105 RGWBucketInfo& bucket_info, /* in */
3106 rgw_obj& obj, /* in */
3107 bool& restored); /* out */
3108 int copy_obj_to_remote_dest(RGWObjState *astate,
3109 map<string, bufferlist>& src_attrs,
3110 RGWRados::Object::Read& read_op,
3111 const rgw_user& user_id,
3112 rgw_obj& dest_obj,
3113 ceph::real_time *mtime);
3114
3115 enum AttrsMod {
3116 ATTRSMOD_NONE = 0,
3117 ATTRSMOD_REPLACE = 1,
3118 ATTRSMOD_MERGE = 2
3119 };
3120
3121 int rewrite_obj(RGWBucketInfo& dest_bucket_info, rgw_obj& obj);
3122
3123 int stat_remote_obj(RGWObjectCtx& obj_ctx,
3124 const rgw_user& user_id,
3125 const string& client_id,
3126 req_info *info,
3127 const string& source_zone,
3128 rgw_obj& src_obj,
3129 RGWBucketInfo& src_bucket_info,
3130 real_time *src_mtime,
3131 uint64_t *psize,
3132 const real_time *mod_ptr,
3133 const real_time *unmod_ptr,
3134 bool high_precision_time,
3135 const char *if_match,
3136 const char *if_nomatch,
3137 map<string, bufferlist> *pattrs,
3138 string *version_id,
3139 string *ptag,
3140 string *petag);
3141
3142 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
3143 const rgw_user& user_id,
3144 const string& client_id,
3145 const string& op_id,
3146 bool record_op_state,
3147 req_info *info,
3148 const string& source_zone,
3149 rgw_obj& dest_obj,
3150 rgw_obj& src_obj,
3151 RGWBucketInfo& dest_bucket_info,
3152 RGWBucketInfo& src_bucket_info,
3153 ceph::real_time *src_mtime,
3154 ceph::real_time *mtime,
3155 const ceph::real_time *mod_ptr,
3156 const ceph::real_time *unmod_ptr,
3157 bool high_precision_time,
3158 const char *if_match,
3159 const char *if_nomatch,
3160 AttrsMod attrs_mod,
3161 bool copy_if_newer,
3162 map<string, bufferlist>& attrs,
3163 RGWObjCategory category,
91327a77 3164 boost::optional<uint64_t> olh_epoch,
7c673cae
FG
3165 ceph::real_time delete_at,
3166 string *version_id,
3167 string *ptag,
3168 ceph::buffer::list *petag,
7c673cae 3169 void (*progress_cb)(off_t, void *),
31f18b77
FG
3170 void *progress_data,
3171 rgw_zone_set *zones_trace= nullptr);
7c673cae
FG
3172 /**
3173 * Copy an object.
3174 * dest_obj: the object to copy into
3175 * src_obj: the object to copy from
3176 * attrs: usage depends on attrs_mod parameter
3177 * attrs_mod: the modification mode of the attrs, may have the following values:
3178 * ATTRSMOD_NONE - the attributes of the source object will be
3179 * copied without modifications, attrs parameter is ignored;
3180 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
3181 * parameter, source object attributes are not copied;
3182 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
3183 * are overwritten by values contained in attrs parameter.
7c673cae
FG
3184 * Returns: 0 on success, -ERR# otherwise.
3185 */
3186 int copy_obj(RGWObjectCtx& obj_ctx,
3187 const rgw_user& user_id,
3188 const string& client_id,
3189 const string& op_id,
3190 req_info *info,
3191 const string& source_zone,
3192 rgw_obj& dest_obj,
3193 rgw_obj& src_obj,
3194 RGWBucketInfo& dest_bucket_info,
3195 RGWBucketInfo& src_bucket_info,
3196 ceph::real_time *src_mtime,
3197 ceph::real_time *mtime,
3198 const ceph::real_time *mod_ptr,
3199 const ceph::real_time *unmod_ptr,
3200 bool high_precision_time,
3201 const char *if_match,
3202 const char *if_nomatch,
3203 AttrsMod attrs_mod,
3204 bool copy_if_newer,
3205 map<std::string, bufferlist>& attrs,
3206 RGWObjCategory category,
3207 uint64_t olh_epoch,
3208 ceph::real_time delete_at,
3209 string *version_id,
3210 string *ptag,
3211 ceph::buffer::list *petag,
7c673cae
FG
3212 void (*progress_cb)(off_t, void *),
3213 void *progress_data);
3214
3215 int copy_obj_data(RGWObjectCtx& obj_ctx,
3216 RGWBucketInfo& dest_bucket_info,
3217 RGWRados::Object::Read& read_op, off_t end,
3218 rgw_obj& dest_obj,
3219 rgw_obj& src_obj,
3220 uint64_t max_chunk_size,
3221 ceph::real_time *mtime,
3222 ceph::real_time set_mtime,
3223 map<string, bufferlist>& attrs,
3224 RGWObjCategory category,
3225 uint64_t olh_epoch,
3226 ceph::real_time delete_at,
3227 string *version_id,
3228 string *ptag,
31f18b77 3229 ceph::buffer::list *petag);
7c673cae
FG
3230
3231 int check_bucket_empty(RGWBucketInfo& bucket_info);
3232
3233 /**
3234 * Delete a bucket.
3235 * bucket: the name of the bucket to delete
3236 * Returns 0 on success, -ERR# otherwise.
3237 */
3238 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty = true);
3239
3240 bool is_meta_master();
3241
3242 /**
3243 * Check to see if the bucket metadata is synced
3244 */
3245 bool is_syncing_bucket_meta(const rgw_bucket& bucket);
3246 void wakeup_meta_sync_shards(set<int>& shard_ids);
3247 void wakeup_data_sync_shards(const string& source_zone, map<int, set<string> >& shard_ids);
3248
3249 RGWMetaSyncStatusManager* get_meta_sync_manager();
3250 RGWDataSyncStatusManager* get_data_sync_manager(const std::string& source_zone);
3251
3252 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
3253 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
3254 int bucket_suspended(rgw_bucket& bucket, bool *suspended);
3255
3256 /** Delete an object.*/
3257 int delete_obj(RGWObjectCtx& obj_ctx,
3258 const RGWBucketInfo& bucket_owner,
3259 const rgw_obj& src_obj,
3260 int versioning_status,
3261 uint16_t bilog_flags = 0,
31f18b77
FG
3262 const ceph::real_time& expiration_time = ceph::real_time(),
3263 rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
3264
3265 /** Delete a raw object.*/
3266 int delete_raw_obj(const rgw_raw_obj& obj);
3267
3268 /* Delete a system object */
3269 virtual int delete_system_obj(rgw_raw_obj& src_obj, RGWObjVersionTracker *objv_tracker = NULL);
3270
3271 /** Remove an object from the bucket index */
3272 int delete_obj_index(const rgw_obj& obj);
3273
3274 /**
31f18b77
FG
3275 * Get an attribute for a system object.
3276 * obj: the object to get attr
7c673cae
FG
3277 * name: name of the attr to retrieve
3278 * dest: bufferlist to store the result in
3279 * Returns: 0 on success, -ERR# otherwise.
3280 */
3281 virtual int system_obj_get_attr(rgw_raw_obj& obj, const char *name, bufferlist& dest);
3282
3283 int system_obj_set_attr(void *ctx, rgw_raw_obj& obj, const char *name, bufferlist& bl,
3284 RGWObjVersionTracker *objv_tracker);
3285 virtual int system_obj_set_attrs(void *ctx, rgw_raw_obj& obj,
3286 map<string, bufferlist>& attrs,
3287 map<string, bufferlist>* rmattrs,
3288 RGWObjVersionTracker *objv_tracker);
3289
3290 /**
3291 * Set an attr on an object.
3292 * bucket: name of the bucket holding the object
3293 * obj: name of the object to set the attr on
3294 * name: the attr to set
3295 * bl: the contents of the attr
3296 * Returns: 0 on success, -ERR# otherwise.
3297 */
3298 int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
3299
3300 int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
3301 map<string, bufferlist>& attrs,
3302 map<string, bufferlist>* rmattrs);
3303
3304 int get_system_obj_state(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker);
3305 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
3306 bool follow_olh, bool assume_noent = false);
3307 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state) {
3308 return get_obj_state(rctx, bucket_info, obj, state, true);
3309 }
3310
3311 virtual int stat_system_obj(RGWObjectCtx& obj_ctx,
3312 RGWRados::SystemObject::Read::GetObjState& state,
3313 rgw_raw_obj& obj,
3314 map<string, bufferlist> *attrs,
3315 ceph::real_time *lastmod,
3316 uint64_t *obj_size,
3317 RGWObjVersionTracker *objv_tracker);
3318
3319 virtual int get_system_obj(RGWObjectCtx& obj_ctx, RGWRados::SystemObject::Read::GetObjState& read_state,
3320 RGWObjVersionTracker *objv_tracker, rgw_raw_obj& obj,
3321 bufferlist& bl, off_t ofs, off_t end,
3322 map<string, bufferlist> *attrs,
b32b8144
FG
3323 rgw_cache_entry_info *cache_info,
3324 boost::optional<obj_version> refresh_version =
3325 boost::none);
7c673cae
FG
3326
3327 virtual void register_chained_cache(RGWChainedCache *cache) {}
3328 virtual bool chain_cache_entry(list<rgw_cache_entry_info *>& cache_info_entries, RGWChainedCache::Entry *chained_entry) { return false; }
3329
3330 int iterate_obj(RGWObjectCtx& ctx,
3331 const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3332 off_t ofs, off_t end,
3333 uint64_t max_chunk_size,
3334 int (*iterate_obj_cb)(const RGWBucketInfo& bucket_info, const rgw_obj& obj, const rgw_raw_obj&, off_t, off_t, off_t, bool, RGWObjState *, void *),
3335 void *arg);
3336
3337 int flush_read_list(struct get_obj_data *d);
3338
3339 int get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate,
3340 const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3341 const rgw_raw_obj& read_obj,
3342 off_t obj_ofs, off_t read_ofs, off_t len,
3343 bool is_head_obj, void *arg);
3344
3345 void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
3346
3347 /**
3348 * a simple object read without keeping state
3349 */
3350
3351 virtual int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
3352 map<string, bufferlist> *attrs, bufferlist *first_chunk,
3353 RGWObjVersionTracker *objv_tracker);
3354
3355 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
3356 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
3357
f64942e4
AA
3358 int guard_reshard(BucketShard *bs,
3359 const rgw_obj& obj_instance,
3360 const RGWBucketInfo& bucket_info,
3361 std::function<int(BucketShard *)> call);
3362 int block_while_resharding(RGWRados::BucketShard *bs,
3363 string *new_bucket_id,
3364 const RGWBucketInfo& bucket_info);
31f18b77 3365
7c673cae
FG
3366 void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
3367 int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
3368 int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
3369 int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
3370 const rgw_obj& obj_instance, bool delete_marker,
3371 const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
3372 uint64_t olh_epoch,
91327a77
AA
3373 ceph::real_time unmod_since, bool high_precision_time,
3374 rgw_zone_set *zones_trace = nullptr,
3375 bool log_data_change = false);
31f18b77 3376 int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
3377 int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
3378 map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
3379 int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
3380 int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
3381 int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3382 bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
31f18b77
FG
3383 uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
3384 int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
7c673cae 3385 int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
91327a77
AA
3386 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time,
3387 rgw_zone_set *zones_trace = nullptr, bool log_data_change = false);
a8e16298
TL
3388 int repair_olh(RGWObjState* state, const RGWBucketInfo& bucket_info,
3389 const rgw_obj& obj);
7c673cae 3390 int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
31f18b77 3391 uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
3392
3393 void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
3394 int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
3395 int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
3396 int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
3397
3398 void gen_rand_obj_instance_name(rgw_obj *target);
3399
3400 int omap_get_vals(rgw_raw_obj& obj, bufferlist& header, const std::string& marker, uint64_t count, std::map<string, bufferlist>& m);
3401 int omap_get_all(rgw_raw_obj& obj, bufferlist& header, std::map<string, bufferlist>& m);
3402 int omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl);
3403 int omap_set(rgw_raw_obj& obj, map<std::string, bufferlist>& m);
3404 int omap_del(rgw_raw_obj& obj, const std::string& key);
3405 int update_containers_stats(map<string, RGWBucketEnt>& m);
3406 int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
3407
3408 int watch(const string& oid, uint64_t *watch_handle, librados::WatchCtx2 *ctx);
3409 int unwatch(uint64_t watch_handle);
3410 void add_watcher(int i);
3411 void remove_watcher(int i);
3412 virtual bool need_watch_notify() { return false; }
3413 int init_watch();
3414 void finalize_watch();
3415 int distribute(const string& key, bufferlist& bl);
3416 virtual int watch_cb(uint64_t notify_id,
3417 uint64_t cookie,
3418 uint64_t notifier_id,
3419 bufferlist& bl) { return 0; }
3420 void pick_control_oid(const string& key, string& notify_oid);
3421
3422 virtual void set_cache_enabled(bool state) {}
3423
3424 void set_atomic(void *ctx, rgw_obj& obj) {
3425 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3426 rctx->obj.set_atomic(obj);
3427 }
3428 void set_prefetch_data(void *ctx, rgw_obj& obj) {
3429 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3430 rctx->obj.set_prefetch_data(obj);
3431 }
3432 void set_prefetch_data(void *ctx, rgw_raw_obj& obj) {
3433 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3434 rctx->raw.set_prefetch_data(obj);
3435 }
3436
3437 int decode_policy(bufferlist& bl, ACLOwner *owner);
3438 int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
c07f9fc5 3439 map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool* syncstopped = NULL);
7c673cae
FG
3440 int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
3441 int get_user_stats(const rgw_user& user, RGWStorageStats& stats);
3442 int get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *cb);
3443 void get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj);
3444 void get_bucket_meta_oid(const rgw_bucket& bucket, string& oid);
3445
3446 int put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
3447 bool exclusive, RGWObjVersionTracker& objv_tracker, ceph::real_time mtime,
3448 map<string, bufferlist> *pattrs);
3449 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
3450 int get_bucket_entrypoint_info(RGWObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name,
3451 RGWBucketEntryPoint& entry_point, RGWObjVersionTracker *objv_tracker,
b32b8144
FG
3452 ceph::real_time *pmtime, map<string, bufferlist> *pattrs, rgw_cache_entry_info *cache_info = NULL,
3453 boost::optional<obj_version> refresh_version = boost::none);
7c673cae
FG
3454 int get_bucket_instance_info(RGWObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
3455 int get_bucket_instance_info(RGWObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
31f18b77 3456 int get_bucket_instance_from_oid(RGWObjectCtx& obj_ctx, const string& oid, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs,
b32b8144
FG
3457 rgw_cache_entry_info *cache_info = NULL,
3458 boost::optional<obj_version> refresh_version = boost::none);
7c673cae
FG
3459
3460 int convert_old_bucket_info(RGWObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name);
3461 static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
b32b8144
FG
3462
3463
3464private:
3465 int _get_bucket_info(RGWObjectCtx& obj_ctx, const string& tenant,
3466 const string& bucket_name, RGWBucketInfo& info,
3467 real_time *pmtime,
3468 map<string, bufferlist> *pattrs,
3469 boost::optional<obj_version> refresh_version);
3470public:
3471
3a9019d9
FG
3472 bool call(std::string command, cmdmap_t& cmdmap, std::string format,
3473 bufferlist& out) override final;
3474
3475 // Should really be protected, but some older GCCs don't handle
3476 // access control properly with lambdas defined in member functions
3477 // of child classes.
3478
3479 void cache_list_dump_helper(Formatter* f,
3480 const std::string& name,
3481 const ceph::real_time mtime,
3482 const std::uint64_t size) {
3483 f->dump_string("name", name);
3484 f->dump_string("mtime", ceph::to_iso_8601(mtime));
3485 f->dump_unsigned("size", size);
3486 }
3487
3488protected:
3489
3490 // `call_list` must iterate over all cache entries and call
3491 // `cache_list_dump_helper` with the supplied Formatter on any that
3492 // include `filter` as a substring.
3493 //
3494 virtual void call_list(const boost::optional<std::string>& filter,
3495 Formatter* format);
3496 // `call_inspect` must look up the requested target and, if found,
3497 // dump it to the supplied Formatter and return true. If not found,
3498 // it must return false.
3499 //
3500 virtual bool call_inspect(const std::string& target, Formatter* format);
3501
3502 // `call_erase` must erase the requested target and return true. If
3503 // the requested target does not exist, it should return false.
3504 virtual bool call_erase(const std::string& target);
3505
3506 // `call_zap` must erase the cache.
3507 virtual void call_zap();
3508public:
b32b8144 3509
7c673cae 3510 int get_bucket_info(RGWObjectCtx& obj_ctx,
b32b8144
FG
3511 const string& tenant_name, const string& bucket_name,
3512 RGWBucketInfo& info,
3513 ceph::real_time *pmtime, map<string, bufferlist> *pattrs = NULL);
3514
3515 // Returns true on successful refresh. Returns false if there was an
3516 // error or the version stored on the OSD is the same as that
3517 // presented in the BucketInfo structure.
3518 //
3519 int try_refresh_bucket_info(RGWBucketInfo& info,
3520 ceph::real_time *pmtime,
3521 map<string, bufferlist> *pattrs = nullptr);
3522
7c673cae 3523 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
b32b8144 3524 map<string, bufferlist> *pattrs, bool create_entry_point);
7c673cae 3525
31f18b77
FG
3526 int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3527 int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
3528 rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3529 int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
3530 RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
7c673cae 3531 int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
31f18b77
FG
3532 ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3533 int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
7c673cae 3534 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
1adf2230
AA
3535 int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, int shard_id,
3536 rgw_obj_index_key& start, const string& prefix,
3537 uint32_t num_entries, bool list_versions,
3538 map<string, rgw_bucket_dir_entry>& m,
3539 bool *is_truncated,
3540 rgw_obj_index_key *last_entry,
3541 bool (*force_check_filter)(const string& name) = nullptr);
3542 int cls_bucket_list_unordered(RGWBucketInfo& bucket_info, int shard_id,
3543 rgw_obj_index_key& start, const string& prefix,
3544 uint32_t num_entries, bool list_versions,
3545 vector<rgw_bucket_dir_entry>& ent_list,
3546 bool *is_truncated, rgw_obj_index_key *last_entry,
3547 bool (*force_check_filter)(const string& name) = nullptr);
a8e16298 3548 int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
7c673cae
FG
3549 int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
3550 int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
3551 int trim_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, string& end_marker);
c07f9fc5
FG
3552 int resync_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
3553 int stop_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
7c673cae
FG
3554 int get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id, map<int, string>& max_marker);
3555
a8e16298
TL
3556 int bi_get_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_dir_entry *dirent);
3557 int bi_get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_olh_entry *olh);
3558 int bi_get(const RGWBucketInfo& bucket_info, const rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
7c673cae
FG
3559 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
3560 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
3561 int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
3562 int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3563 int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3564 int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
3565 list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3566 int bi_remove(BucketShard& bs);
3567
3568 int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
3569 int cls_obj_usage_log_read(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
3570 string& read_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage, bool *is_truncated);
3571 int cls_obj_usage_log_trim(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch);
3572
3573 int key_to_shard_id(const string& key, int max_shards);
3574 void shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id);
3575 void shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name);
3576 void shard_name(const string& prefix, unsigned shard_id, string& name);
3577 int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
3578 void time_log_prepare_entry(cls_log_entry& entry, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
3579 int time_log_add_init(librados::IoCtx& io_ctx);
3580 int time_log_add(const string& oid, list<cls_log_entry>& entries,
3581 librados::AioCompletion *completion, bool monotonic_inc = true);
3582 int time_log_add(const string& oid, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
3583 int time_log_list(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
3584 int max_entries, list<cls_log_entry>& entries,
3585 const string& marker, string *out_marker, bool *truncated);
3586 int time_log_info(const string& oid, cls_log_header *header);
3587 int time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion);
3588 int time_log_trim(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
3589 const string& from_marker, const string& to_marker,
3590 librados::AioCompletion *completion = nullptr);
3591
3592 string objexp_hint_get_shardname(int shard_num);
3593 int objexp_key_shard(const rgw_obj_index_key& key);
3594 void objexp_get_shard(int shard_num,
3595 string& shard); /* out */
3596 int objexp_hint_add(const ceph::real_time& delete_at,
3597 const string& tenant_name,
3598 const string& bucket_name,
3599 const string& bucket_id,
3600 const rgw_obj_index_key& obj_key);
3601 int objexp_hint_list(const string& oid,
3602 const ceph::real_time& start_time,
3603 const ceph::real_time& end_time,
3604 const int max_entries,
3605 const string& marker,
3606 list<cls_timeindex_entry>& entries, /* out */
3607 string *out_marker, /* out */
3608 bool *truncated); /* out */
3609 int objexp_hint_parse(cls_timeindex_entry &ti_entry,
3610 objexp_hint_entry& hint_entry); /* out */
3611 int objexp_hint_trim(const string& oid,
3612 const ceph::real_time& start_time,
3613 const ceph::real_time& end_time,
3614 const string& from_marker = std::string(),
3615 const string& to_marker = std::string());
3616
3617 int lock_exclusive(rgw_pool& pool, const string& oid, ceph::timespan& duration, string& zone_id, string& owner_id);
3618 int unlock(rgw_pool& pool, const string& oid, string& zone_id, string& owner_id);
3619
3620 void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
3621 int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool sync);
3622 int gc_operate(string& oid, librados::ObjectWriteOperation *op);
3623 int gc_aio_operate(string& oid, librados::ObjectWriteOperation *op);
3624 int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
3625
3626 int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
3627 int process_gc();
1adf2230 3628 bool process_expire_objects();
7c673cae
FG
3629 int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
3630
3631 int process_lc();
3632 int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
3633
3634 int bucket_check_index(RGWBucketInfo& bucket_info,
3635 map<RGWObjCategory, RGWStorageStats> *existing_stats,
3636 map<RGWObjCategory, RGWStorageStats> *calculated_stats);
3637 int bucket_rebuild_index(RGWBucketInfo& bucket_info);
f64942e4 3638 int bucket_set_reshard(const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
7c673cae
FG
3639 int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
3640 int move_rados_obj(librados::IoCtx& src_ioctx,
3641 const string& src_oid, const string& src_locator,
3642 librados::IoCtx& dst_ioctx,
3643 const string& dst_oid, const string& dst_locator);
3644 int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
3645 int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix);
3646
3647 int cls_user_get_header(const string& user_id, cls_user_header *header);
94b18763 3648 int cls_user_reset_stats(const string& user_id);
7c673cae
FG
3649 int cls_user_get_header_async(const string& user_id, RGWGetUserHeader_CB *ctx);
3650 int cls_user_sync_bucket_stats(rgw_raw_obj& user_obj, const RGWBucketInfo& bucket_info);
3651 int cls_user_list_buckets(rgw_raw_obj& obj,
3652 const string& in_marker,
3653 const string& end_marker,
3654 int max_entries,
3655 list<cls_user_bucket_entry>& entries,
3656 string *out_marker,
3657 bool *truncated);
3658 int cls_user_add_bucket(rgw_raw_obj& obj, const cls_user_bucket_entry& entry);
3659 int cls_user_update_buckets(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add);
3660 int cls_user_complete_stats_sync(rgw_raw_obj& obj);
3661 int complete_sync_user_stats(const rgw_user& user_id);
3662 int cls_user_add_bucket(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries);
3663 int cls_user_remove_bucket(rgw_raw_obj& obj, const cls_user_bucket& bucket);
c07f9fc5 3664 int cls_user_get_bucket_stats(const rgw_bucket& bucket, cls_user_bucket_entry& entry);
7c673cae
FG
3665
3666 int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
3667 RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size);
3668
224ce89b 3669 int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
31f18b77
FG
3670 RGWQuotaInfo& bucket_quota);
3671
3672 int add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
3673
7c673cae 3674 uint64_t instance_id();
224ce89b
WB
3675 const string& zone_name() {
3676 return get_zone_params().get_name();
3677 }
7c673cae
FG
3678 const string& zone_id() {
3679 return get_zone_params().get_id();
3680 }
3681 string unique_id(uint64_t unique_num) {
3682 char buf[32];
3683 snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)instance_id(), (unsigned long long)unique_num);
3684 string s = get_zone_params().get_id() + buf;
3685 return s;
3686 }
3687
3688 void init_unique_trans_id_deps() {
3689 char buf[16 + 2 + 1]; /* uint64_t needs 16, 2 hyphens add further 2 */
3690
3691 snprintf(buf, sizeof(buf), "-%llx-", (unsigned long long)instance_id());
3692 url_encode(string(buf) + get_zone_params().get_name(), trans_id_suffix);
3693 }
3694
3695 /* In order to preserve compability with Swift API, transaction ID
3696 * should contain at least 32 characters satisfying following spec:
3697 * - first 21 chars must be in range [0-9a-f]. Swift uses this
3698 * space for storing fragment of UUID obtained through a call to
3699 * uuid4() function of Python's uuid module;
3700 * - char no. 22 must be a hyphen;
3701 * - at least 10 next characters constitute hex-formatted timestamp
3702 * padded with zeroes if necessary. All bytes must be in [0-9a-f]
3703 * range;
3704 * - last, optional part of transaction ID is any url-encoded string
3705 * without restriction on length. */
3706 string unique_trans_id(const uint64_t unique_num) {
3707 char buf[41]; /* 2 + 21 + 1 + 16 (timestamp can consume up to 16) + 1 */
3708 time_t timestamp = time(NULL);
3709
3710 snprintf(buf, sizeof(buf), "tx%021llx-%010llx",
3711 (unsigned long long)unique_num,
3712 (unsigned long long)timestamp);
3713
3714 return string(buf) + trans_id_suffix;
3715 }
3716
3717 void get_log_pool(rgw_pool& pool) {
3718 pool = get_zone_params().log_pool;
3719 }
3720
3721 bool need_to_log_data() {
3722 return get_zone().log_data;
3723 }
3724
3725 bool need_to_log_metadata() {
224ce89b
WB
3726 return is_meta_master() &&
3727 (get_zonegroup().zones.size() > 1 || current_period.is_multi_zonegroups_with_zones());
7c673cae
FG
3728 }
3729
3efd9988
FG
3730 bool can_reshard() const {
3731 return current_period.get_id().empty() ||
3732 (zonegroup.zones.size() == 1 && current_period.is_single_zonegroup());
3733 }
3734
7c673cae
FG
3735 librados::Rados* get_rados_handle();
3736
3737 int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
3738 int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
3739 list<librados::AioCompletion *>& handles, bool keep_index_consistent);
3740 private:
3741 /**
3742 * This is a helper method, it generates a list of bucket index objects with the given
3743 * bucket base oid and number of shards.
3744 *
3745 * bucket_oid_base [in] - base name of the bucket index object;
3746 * num_shards [in] - number of bucket index object shards.
3747 * bucket_objs [out] - filled by this method, a list of bucket index objects.
3748 */
3749 void get_bucket_index_objects(const string& bucket_oid_base, uint32_t num_shards,
3750 map<int, string>& bucket_objs, int shard_id = -1);
3751
3752 /**
3753 * Get the bucket index object with the given base bucket index object and object key,
3754 * and the number of bucket index shards.
3755 *
3756 * bucket_oid_base [in] - bucket object base name.
3757 * obj_key [in] - object key.
3758 * num_shards [in] - number of bucket index shards.
3759 * hash_type [in] - type of hash to find the shard ID.
3760 * bucket_obj [out] - the bucket index object for the given object.
3761 *
3762 * Return 0 on success, a failure code otherwise.
3763 */
3764 int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
3765 uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj, int *shard);
3766
3767 void get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards,
3768 int shard_id, string *bucket_obj);
3769
3770 /**
3771 * Check the actual on-disk state of the object specified
3772 * by list_state, and fill in the time and size of object.
3773 * Then append any changes to suggested_updates for
3774 * the rgw class' dir_suggest_changes function.
3775 *
3776 * Note that this can maul list_state; don't use it afterwards. Also
3777 * it expects object to already be filled in from list_state; it only
3778 * sets the size and mtime.
3779 *
3780 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
3781 * and -errno on other failures. (-ENOENT is not a failure, and it
3782 * will encode that info as a suggested update.)
3783 */
3784 int check_disk_state(librados::IoCtx io_ctx,
3785 const RGWBucketInfo& bucket_info,
3786 rgw_bucket_dir_entry& list_state,
3787 rgw_bucket_dir_entry& object,
3788 bufferlist& suggested_updates);
3789
3790 /**
3791 * Init pool iteration
31f18b77 3792 * pool: pool to use for the ctx initialization
7c673cae
FG
3793 * ctx: context object to use for the iteration
3794 * Returns: 0 on success, -ERR# otherwise.
3795 */
3796 int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
31f18b77 3797
181888fb
FG
3798 /**
3799 * Init pool iteration
3800 * pool: pool to use
3801 * cursor: position to start iteration
3802 * ctx: context object to use for the iteration
3803 * Returns: 0 on success, -ERR# otherwise.
3804 */
3805 int pool_iterate_begin(const rgw_pool& pool, const string& cursor, RGWPoolIterCtx& ctx);
3806
3807 /**
3808 * Get pool iteration position
3809 * ctx: context object to use for the iteration
3810 * Returns: string representation of position
3811 */
3812 string pool_iterate_get_cursor(RGWPoolIterCtx& ctx);
3813
7c673cae
FG
3814 /**
3815 * Iterate over pool return object names, use optional filter
3816 * ctx: iteration context, initialized with pool_iterate_begin()
3817 * num: max number of objects to return
3818 * objs: a vector that the results will append into
3819 * is_truncated: if not NULL, will hold true iff iteration is complete
3820 * filter: if not NULL, will be used to filter returned objects
3821 * Returns: 0 on success, -ERR# otherwise.
3822 */
3823 int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
3824 bool *is_truncated, RGWAccessListFilter *filter);
3825
3826 uint64_t next_bucket_id();
3827};
3828
3829class RGWStoreManager {
3830public:
3831 RGWStoreManager() {}
28e407b8
AA
3832 static RGWRados *get_storage(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads,
3833 bool run_sync_thread, bool run_reshard_thread, bool use_cache = true) {
31f18b77 3834 RGWRados *store = init_storage_provider(cct, use_gc_thread, use_lc_thread, quota_threads, run_sync_thread,
28e407b8 3835 run_reshard_thread, use_cache);
7c673cae
FG
3836 return store;
3837 }
3838 static RGWRados *get_raw_storage(CephContext *cct) {
3839 RGWRados *store = init_raw_storage_provider(cct);
3840 return store;
3841 }
28e407b8 3842 static RGWRados *init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread, bool use_metadata_cache);
7c673cae
FG
3843 static RGWRados *init_raw_storage_provider(CephContext *cct);
3844 static void close_storage(RGWRados *store);
3845
3846};
3847
3848template <class T>
3849class RGWChainedCacheImpl : public RGWChainedCache {
b32b8144 3850 ceph::timespan expiry;
7c673cae
FG
3851 RWLock lock;
3852
b32b8144 3853 map<string, std::pair<T, ceph::coarse_mono_time>> entries;
7c673cae
FG
3854
3855public:
3856 RGWChainedCacheImpl() : lock("RGWChainedCacheImpl::lock") {}
3857
3858 void init(RGWRados *store) {
3859 store->register_chained_cache(this);
b32b8144
FG
3860 expiry = std::chrono::seconds(store->ctx()->_conf->get_val<uint64_t>(
3861 "rgw_cache_expiry_interval"));
7c673cae
FG
3862 }
3863
3864 bool find(const string& key, T *entry) {
3865 RWLock::RLocker rl(lock);
b32b8144 3866 auto iter = entries.find(key);
7c673cae
FG
3867 if (iter == entries.end()) {
3868 return false;
3869 }
b32b8144
FG
3870 if (expiry.count() &&
3871 (ceph::coarse_mono_clock::now() - iter->second.second) > expiry) {
3872 return false;
3873 }
7c673cae 3874
b32b8144 3875 *entry = iter->second.first;
7c673cae
FG
3876 return true;
3877 }
3878
3879 bool put(RGWRados *store, const string& key, T *entry, list<rgw_cache_entry_info *>& cache_info_entries) {
3880 Entry chain_entry(this, key, entry);
3881
3882 /* we need the store cache to call us under its lock to maintain lock ordering */
3883 return store->chain_cache_entry(cache_info_entries, &chain_entry);
3884 }
3885
3886 void chain_cb(const string& key, void *data) override {
3887 T *entry = static_cast<T *>(data);
3888 RWLock::WLocker wl(lock);
b32b8144
FG
3889 entries[key].first = *entry;
3890 if (expiry.count() > 0) {
3891 entries[key].second = ceph::coarse_mono_clock::now();
3892 }
7c673cae
FG
3893 }
3894
3895 void invalidate(const string& key) override {
3896 RWLock::WLocker wl(lock);
3897 entries.erase(key);
3898 }
3899
3900 void invalidate_all() override {
3901 RWLock::WLocker wl(lock);
3902 entries.clear();
3903 }
3904}; /* RGWChainedCacheImpl */
3905
3906/**
3907 * Base of PUT operation.
3908 * Allow to create chained data transformers like compresors and encryptors.
3909 */
3910class RGWPutObjDataProcessor
3911{
3912public:
3913 RGWPutObjDataProcessor(){}
3914 virtual ~RGWPutObjDataProcessor(){}
3915 virtual int handle_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool *again) = 0;
3916 virtual int throttle_data(void *handle, const rgw_raw_obj& obj, uint64_t size, bool need_to_wait) = 0;
3917}; /* RGWPutObjDataProcessor */
3918
3919
3920class RGWPutObjProcessor : public RGWPutObjDataProcessor
3921{
3922protected:
3923 RGWRados *store;
3924 RGWObjectCtx& obj_ctx;
3925 bool is_complete;
3926 RGWBucketInfo bucket_info;
3927 bool canceled;
3928
3929 virtual int do_complete(size_t accounted_size, const string& etag,
3930 ceph::real_time *mtime, ceph::real_time set_mtime,
3931 map<string, bufferlist>& attrs, ceph::real_time delete_at,
31f18b77
FG
3932 const char *if_match, const char *if_nomatch, const string *user_data,
3933 rgw_zone_set* zones_trace = nullptr) = 0;
7c673cae
FG
3934
3935public:
3936 RGWPutObjProcessor(RGWObjectCtx& _obj_ctx, RGWBucketInfo& _bi) : store(NULL),
3937 obj_ctx(_obj_ctx),
3938 is_complete(false),
3939 bucket_info(_bi),
3940 canceled(false) {}
3941 ~RGWPutObjProcessor() override {}
3942 virtual int prepare(RGWRados *_store, string *oid_rand) {
3943 store = _store;
3944 return 0;
3945 }
3946
3947 int complete(size_t accounted_size, const string& etag,
3948 ceph::real_time *mtime, ceph::real_time set_mtime,
3949 map<string, bufferlist>& attrs, ceph::real_time delete_at,
31f18b77
FG
3950 const char *if_match = NULL, const char *if_nomatch = NULL, const string *user_data = nullptr,
3951 rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
3952
3953 CephContext *ctx();
3954
3955 bool is_canceled() { return canceled; }
3956}; /* RGWPutObjProcessor */
3957
3958struct put_obj_aio_info {
3959 void *handle;
3960 rgw_raw_obj obj;
3961 uint64_t size;
3962};
3963
3964#define RGW_PUT_OBJ_MIN_WINDOW_SIZE_DEFAULT (16 * 1024 * 1024)
3965
3966class RGWPutObjProcessor_Aio : public RGWPutObjProcessor
3967{
3968 list<struct put_obj_aio_info> pending;
3969 uint64_t window_size{RGW_PUT_OBJ_MIN_WINDOW_SIZE_DEFAULT};
3970 uint64_t pending_size{0};
3971
3972 struct put_obj_aio_info pop_pending();
3973 int wait_pending_front();
3974 bool pending_has_completed();
3975
3976 rgw_raw_obj last_written_obj;
3977
3978protected:
3979 uint64_t obj_len{0};
3980
3981 set<rgw_raw_obj> written_objs;
3982 rgw_obj head_obj;
3983
3984 void add_written_obj(const rgw_raw_obj& obj) {
3985 written_objs.insert(obj);
3986 }
3987
3988 int drain_pending();
3989 int handle_obj_data(rgw_raw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle, bool exclusive);
3990
3991public:
3992 int prepare(RGWRados *store, string *oid_rand) override;
3993 int throttle_data(void *handle, const rgw_raw_obj& obj, uint64_t size, bool need_to_wait) override;
3994
3995 RGWPutObjProcessor_Aio(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info) : RGWPutObjProcessor(obj_ctx, bucket_info) {}
3996 ~RGWPutObjProcessor_Aio() override;
3997}; /* RGWPutObjProcessor_Aio */
3998
3999class RGWPutObjProcessor_Atomic : public RGWPutObjProcessor_Aio
4000{
4001 bufferlist first_chunk;
4002 uint64_t part_size;
4003 off_t cur_part_ofs;
4004 off_t next_part_ofs;
4005 int cur_part_id;
4006 off_t data_ofs;
4007
4008 bufferlist pending_data_bl;
4009 uint64_t max_chunk_size;
4010
4011 bool versioned_object;
91327a77 4012 boost::optional<uint64_t> olh_epoch;
7c673cae
FG
4013 string version_id;
4014
4015protected:
4016 rgw_bucket bucket;
4017 string obj_str;
4018
4019 string unique_tag;
4020
4021 rgw_raw_obj cur_obj;
4022 RGWObjManifest manifest;
4023 RGWObjManifest::generator manifest_gen;
4024
4025 int write_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool exclusive);
4026 int do_complete(size_t accounted_size, const string& etag,
4027 ceph::real_time *mtime, ceph::real_time set_mtime,
4028 map<string, bufferlist>& attrs, ceph::real_time delete_at,
31f18b77 4029 const char *if_match, const char *if_nomatch, const string *user_data, rgw_zone_set *zones_trace) override;
7c673cae
FG
4030
4031 int prepare_next_part(off_t ofs);
4032 int complete_parts();
4033 int complete_writing_data();
4034
4035 int prepare_init(RGWRados *store, string *oid_rand);
4036
4037public:
4038 ~RGWPutObjProcessor_Atomic() override {}
4039 RGWPutObjProcessor_Atomic(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info,
4040 rgw_bucket& _b, const string& _o, uint64_t _p, const string& _t, bool versioned) :
4041 RGWPutObjProcessor_Aio(obj_ctx, bucket_info),
4042 part_size(_p),
4043 cur_part_ofs(0),
4044 next_part_ofs(_p),
4045 cur_part_id(0),
4046 data_ofs(0),
4047 max_chunk_size(0),
4048 versioned_object(versioned),
7c673cae
FG
4049 bucket(_b),
4050 obj_str(_o),
4051 unique_tag(_t) {}
4052 int prepare(RGWRados *store, string *oid_rand) override;
4053 virtual bool immutable_head() { return false; }
4054 int handle_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool *again) override;
4055
4056 void set_olh_epoch(uint64_t epoch) {
4057 olh_epoch = epoch;
4058 }
4059
4060 void set_version_id(const string& vid) {
4061 version_id = vid;
4062 }
4063}; /* RGWPutObjProcessor_Atomic */
4064
4065#define MP_META_SUFFIX ".meta"
4066
4067class RGWMPObj {
4068 string oid;
4069 string prefix;
4070 string meta;
4071 string upload_id;
4072public:
4073 RGWMPObj() {}
4074 RGWMPObj(const string& _oid, const string& _upload_id) {
4075 init(_oid, _upload_id, _upload_id);
4076 }
4077 void init(const string& _oid, const string& _upload_id) {
4078 init(_oid, _upload_id, _upload_id);
4079 }
4080 void init(const string& _oid, const string& _upload_id, const string& part_unique_str) {
4081 if (_oid.empty()) {
4082 clear();
4083 return;
4084 }
4085 oid = _oid;
4086 upload_id = _upload_id;
4087 prefix = oid + ".";
4088 meta = prefix + upload_id + MP_META_SUFFIX;
4089 prefix.append(part_unique_str);
4090 }
4091 string& get_meta() { return meta; }
4092 string get_part(int num) {
4093 char buf[16];
4094 snprintf(buf, 16, ".%d", num);
4095 string s = prefix;
4096 s.append(buf);
4097 return s;
4098 }
4099 string get_part(string& part) {
4100 string s = prefix;
4101 s.append(".");
4102 s.append(part);
4103 return s;
4104 }
4105 string& get_upload_id() {
4106 return upload_id;
4107 }
4108 string& get_key() {
4109 return oid;
4110 }
4111 bool from_meta(string& meta) {
4112 int end_pos = meta.rfind('.'); // search for ".meta"
4113 if (end_pos < 0)
4114 return false;
4115 int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id>
4116 if (mid_pos < 0)
4117 return false;
4118 oid = meta.substr(0, mid_pos);
4119 upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
4120 init(oid, upload_id, upload_id);
4121 return true;
4122 }
4123 void clear() {
4124 oid = "";
4125 prefix = "";
4126 meta = "";
4127 upload_id = "";
4128 }
4129};
4130
4131class RGWPutObjProcessor_Multipart : public RGWPutObjProcessor_Atomic
4132{
4133 string part_num;
4134 RGWMPObj mp;
4135 req_state *s;
4136 string upload_id;
4137
4138protected:
4139 int prepare(RGWRados *store, string *oid_rand);
4140 int do_complete(size_t accounted_size, const string& etag,
4141 ceph::real_time *mtime, ceph::real_time set_mtime,
4142 map<string, bufferlist>& attrs, ceph::real_time delete_at,
31f18b77
FG
4143 const char *if_match, const char *if_nomatch, const string *user_data,
4144 rgw_zone_set *zones_trace) override;
7c673cae
FG
4145public:
4146 bool immutable_head() { return true; }
4147 RGWPutObjProcessor_Multipart(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, uint64_t _p, req_state *_s) :
4148 RGWPutObjProcessor_Atomic(obj_ctx, bucket_info, _s->bucket, _s->object.name, _p, _s->req_id, false), s(_s) {}
4149 void get_mp(RGWMPObj** _mp);
4150}; /* RGWPutObjProcessor_Multipart */
4151#endif