]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/rgw_rados.h
update sources to 12.2.7
[ceph.git] / ceph / src / rgw / rgw_rados.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#ifndef CEPH_RGWRADOS_H
5#define CEPH_RGWRADOS_H
6
7#include <functional>
8
9#include "include/rados/librados.hpp"
10#include "include/Context.h"
3a9019d9 11#include "common/admin_socket.h"
7c673cae
FG
12#include "common/RefCountedObj.h"
13#include "common/RWLock.h"
14#include "common/ceph_time.h"
15#include "common/lru_map.h"
16#include "rgw_common.h"
17#include "cls/rgw/cls_rgw_types.h"
18#include "cls/version/cls_version_types.h"
19#include "cls/log/cls_log_types.h"
20#include "cls/statelog/cls_statelog_types.h"
21#include "cls/timeindex/cls_timeindex_types.h"
22#include "rgw_log.h"
23#include "rgw_metadata.h"
24#include "rgw_meta_sync_status.h"
25#include "rgw_period_puller.h"
26#include "rgw_sync_module.h"
b32b8144 27#include "rgw_sync_log_trim.h"
7c673cae
FG
28
29class RGWWatcher;
30class SafeTimer;
31class ACLOwner;
32class RGWGC;
33class RGWMetaNotifier;
34class RGWDataNotifier;
35class RGWLC;
36class RGWObjectExpirer;
37class RGWMetaSyncProcessorThread;
38class RGWDataSyncProcessorThread;
39class RGWSyncLogTrimThread;
40class RGWRESTConn;
41struct RGWZoneGroup;
42struct RGWZoneParams;
31f18b77
FG
43class RGWReshard;
44class RGWReshardWait;
7c673cae
FG
45
46/* flags for put_obj_meta() */
47#define PUT_OBJ_CREATE 0x01
48#define PUT_OBJ_EXCL 0x02
49#define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
50
51#define RGW_OBJ_NS_MULTIPART "multipart"
52#define RGW_OBJ_NS_SHADOW "shadow"
53
54#define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
55
56#define RGW_NO_SHARD -1
57
31f18b77
FG
58#define RGW_SHARDS_PRIME_0 7877
59#define RGW_SHARDS_PRIME_1 65521
60
61static inline int rgw_shards_mod(unsigned hval, int max_shards)
62{
63 if (max_shards <= RGW_SHARDS_PRIME_0) {
64 return hval % RGW_SHARDS_PRIME_0 % max_shards;
65 }
66 return hval % RGW_SHARDS_PRIME_1 % max_shards;
67}
68
69static inline int rgw_shards_hash(const string& key, int max_shards)
70{
71 return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()), max_shards);
72}
73
74static inline int rgw_shards_max()
75{
76 return RGW_SHARDS_PRIME_1;
77}
7c673cae
FG
78
79static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
80{
81 if (bucket.marker.empty() || orig_oid.empty()) {
82 oid = orig_oid;
83 } else {
84 oid = bucket.marker;
85 oid.append("_");
86 oid.append(orig_oid);
87 }
88}
89
90static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
91{
92 const rgw_bucket& bucket = obj.bucket;
93 prepend_bucket_marker(bucket, obj.get_oid(), oid);
94 const string& loc = obj.key.get_loc();
95 if (!loc.empty()) {
96 prepend_bucket_marker(bucket, loc, locator);
97 } else {
98 locator.clear();
99 }
100}
101
102int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool, librados::IoCtx& ioctx, bool create = false);
103
104int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
105
106static inline bool rgw_raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj)
107{
108 ssize_t pos = raw_obj.oid.find('_');
109 if (pos < 0) {
110 return false;
111 }
112
113 if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) {
114 return false;
115 }
116 obj->bucket = bucket;
117
118 return true;
119}
120
121struct rgw_bucket_placement {
122 string placement_rule;
123 rgw_bucket bucket;
124
125 void dump(Formatter *f) const;
126};
127
128class rgw_obj_select {
129 string placement_rule;
130 rgw_obj obj;
131 rgw_raw_obj raw_obj;
132 bool is_raw;
133
134public:
135 rgw_obj_select() : is_raw(false) {}
136 rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {}
137 rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {}
138 rgw_obj_select(const rgw_obj_select& rhs) {
c07f9fc5 139 placement_rule = rhs.placement_rule;
7c673cae
FG
140 is_raw = rhs.is_raw;
141 if (is_raw) {
142 raw_obj = rhs.raw_obj;
143 } else {
144 obj = rhs.obj;
145 }
146 }
147
148 rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const;
149 rgw_raw_obj get_raw_obj(RGWRados *store) const;
150
151 rgw_obj_select& operator=(const rgw_obj& rhs) {
152 obj = rhs;
153 is_raw = false;
154 return *this;
155 }
156
157 rgw_obj_select& operator=(const rgw_raw_obj& rhs) {
158 raw_obj = rhs;
159 is_raw = true;
160 return *this;
161 }
162
163 void set_placement_rule(const string& rule) {
164 placement_rule = rule;
165 }
166};
167
168struct compression_block {
169 uint64_t old_ofs;
170 uint64_t new_ofs;
171 uint64_t len;
172
173 void encode(bufferlist& bl) const {
174 ENCODE_START(1, 1, bl);
175 ::encode(old_ofs, bl);
176 ::encode(new_ofs, bl);
177 ::encode(len, bl);
178 ENCODE_FINISH(bl);
179 }
180
181 void decode(bufferlist::iterator& bl) {
182 DECODE_START(1, bl);
183 ::decode(old_ofs, bl);
184 ::decode(new_ofs, bl);
185 ::decode(len, bl);
186 DECODE_FINISH(bl);
187 }
188};
189WRITE_CLASS_ENCODER(compression_block)
190
191struct RGWCompressionInfo {
192 string compression_type;
193 uint64_t orig_size;
194 vector<compression_block> blocks;
195
196 RGWCompressionInfo() : compression_type("none"), orig_size(0) {}
197 RGWCompressionInfo(const RGWCompressionInfo& cs_info) : compression_type(cs_info.compression_type),
198 orig_size(cs_info.orig_size),
199 blocks(cs_info.blocks) {}
200
201 void encode(bufferlist& bl) const {
202 ENCODE_START(1, 1, bl);
203 ::encode(compression_type, bl);
204 ::encode(orig_size, bl);
205 ::encode(blocks, bl);
206 ENCODE_FINISH(bl);
207 }
208
209 void decode(bufferlist::iterator& bl) {
210 DECODE_START(1, bl);
211 ::decode(compression_type, bl);
212 ::decode(orig_size, bl);
213 ::decode(blocks, bl);
214 DECODE_FINISH(bl);
215 }
216};
217WRITE_CLASS_ENCODER(RGWCompressionInfo)
218
219int rgw_compression_info_from_attrset(map<string, bufferlist>& attrs, bool& need_decompress, RGWCompressionInfo& cs_info);
220
221struct RGWOLHInfo {
222 rgw_obj target;
223 bool removed;
224
225 RGWOLHInfo() : removed(false) {}
226
227 void encode(bufferlist& bl) const {
228 ENCODE_START(1, 1, bl);
229 ::encode(target, bl);
230 ::encode(removed, bl);
231 ENCODE_FINISH(bl);
232 }
233
234 void decode(bufferlist::iterator& bl) {
235 DECODE_START(1, bl);
236 ::decode(target, bl);
237 ::decode(removed, bl);
238 DECODE_FINISH(bl);
239 }
240 static void generate_test_instances(list<RGWOLHInfo*>& o);
241 void dump(Formatter *f) const;
242};
243WRITE_CLASS_ENCODER(RGWOLHInfo)
244
245struct RGWOLHPendingInfo {
246 ceph::real_time time;
247
248 RGWOLHPendingInfo() {}
249
250 void encode(bufferlist& bl) const {
251 ENCODE_START(1, 1, bl);
252 ::encode(time, bl);
253 ENCODE_FINISH(bl);
254 }
255
256 void decode(bufferlist::iterator& bl) {
257 DECODE_START(1, bl);
258 ::decode(time, bl);
259 DECODE_FINISH(bl);
260 }
261
262 void dump(Formatter *f) const;
263};
264WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
265
266struct RGWUsageBatch {
267 map<ceph::real_time, rgw_usage_log_entry> m;
268
269 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
270 bool exists = m.find(t) != m.end();
271 *account = !exists;
272 m[t].aggregate(entry);
273 }
274};
275
276struct RGWUsageIter {
277 string read_iter;
278 uint32_t index;
279
280 RGWUsageIter() : index(0) {}
281};
282
283class RGWGetDataCB {
284protected:
285 uint64_t extra_data_len;
286public:
287 virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
288 RGWGetDataCB() : extra_data_len(0) {}
289 virtual ~RGWGetDataCB() {}
290 virtual void set_extra_data_len(uint64_t len) {
291 extra_data_len = len;
292 }
293 /**
294 * Flushes any cached data. Used by RGWGetObjFilter.
295 * Return logic same as handle_data.
296 */
297 virtual int flush() {
298 return 0;
299 }
300 /**
301 * Allows to extend fetch range of RGW object. Used by RGWGetObjFilter.
302 */
303 virtual int fixup_range(off_t& bl_ofs, off_t& bl_end) {
304 return 0;
305 }
306};
307
308class RGWAccessListFilter {
309public:
310 virtual ~RGWAccessListFilter() {}
311 virtual bool filter(string& name, string& key) = 0;
312};
313
314struct RGWCloneRangeInfo {
315 rgw_obj src;
316 off_t src_ofs;
317 off_t dst_ofs;
318 uint64_t len;
319};
320
321struct RGWObjManifestPart {
322 rgw_obj loc; /* the object where the data is located */
323 uint64_t loc_ofs; /* the offset at that object where the data is located */
324 uint64_t size; /* the part size */
325
326 RGWObjManifestPart() : loc_ofs(0), size(0) {}
327
328 void encode(bufferlist& bl) const {
329 ENCODE_START(2, 2, bl);
330 ::encode(loc, bl);
331 ::encode(loc_ofs, bl);
332 ::encode(size, bl);
333 ENCODE_FINISH(bl);
334 }
335
336 void decode(bufferlist::iterator& bl) {
337 DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
338 ::decode(loc, bl);
339 ::decode(loc_ofs, bl);
340 ::decode(size, bl);
341 DECODE_FINISH(bl);
342 }
343
344 void dump(Formatter *f) const;
345 static void generate_test_instances(list<RGWObjManifestPart*>& o);
346};
347WRITE_CLASS_ENCODER(RGWObjManifestPart)
348
349/*
350 The manifest defines a set of rules for structuring the object parts.
351 There are a few terms to note:
352 - head: the head part of the object, which is the part that contains
353 the first chunk of data. An object might not have a head (as in the
354 case of multipart-part objects).
355 - stripe: data portion of a single rgw object that resides on a single
356 rados object.
357 - part: a collection of stripes that make a contiguous part of an
358 object. A regular object will only have one part (although might have
359 many stripes), a multipart object might have many parts. Each part
360 has a fixed stripe size, although the last stripe of a part might
361 be smaller than that. Consecutive parts may be merged if their stripe
362 value is the same.
363*/
364
365struct RGWObjManifestRule {
366 uint32_t start_part_num;
367 uint64_t start_ofs;
368 uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */
369 uint64_t stripe_max_size; /* underlying obj max size */
370 string override_prefix;
371
372 RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
373 RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) :
374 start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {}
375
376 void encode(bufferlist& bl) const {
377 ENCODE_START(2, 1, bl);
378 ::encode(start_part_num, bl);
379 ::encode(start_ofs, bl);
380 ::encode(part_size, bl);
381 ::encode(stripe_max_size, bl);
382 ::encode(override_prefix, bl);
383 ENCODE_FINISH(bl);
384 }
385
386 void decode(bufferlist::iterator& bl) {
387 DECODE_START(2, bl);
388 ::decode(start_part_num, bl);
389 ::decode(start_ofs, bl);
390 ::decode(part_size, bl);
391 ::decode(stripe_max_size, bl);
392 if (struct_v >= 2)
393 ::decode(override_prefix, bl);
394 DECODE_FINISH(bl);
395 }
396 void dump(Formatter *f) const;
397};
398WRITE_CLASS_ENCODER(RGWObjManifestRule)
399
400class RGWObjManifest {
401protected:
402 bool explicit_objs; /* old manifest? */
403 map<uint64_t, RGWObjManifestPart> objs;
404
405 uint64_t obj_size;
406
407 rgw_obj obj;
408 uint64_t head_size;
409 string head_placement_rule;
410
411 uint64_t max_head_size;
412 string prefix;
413 rgw_bucket_placement tail_placement; /* might be different than the original bucket,
414 as object might have been copied across pools */
415 map<uint64_t, RGWObjManifestRule> rules;
416
417 string tail_instance; /* tail object's instance */
418
419 void convert_to_explicit(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
420 int append_explicit(RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
421 void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter, string *override_prefix);
422
423 void update_iterators() {
424 begin_iter.seek(0);
425 end_iter.seek(obj_size);
426 }
427public:
428
429 RGWObjManifest() : explicit_objs(false), obj_size(0), head_size(0), max_head_size(0),
430 begin_iter(this), end_iter(this) {}
431 RGWObjManifest(const RGWObjManifest& rhs) {
432 *this = rhs;
433 }
434 RGWObjManifest& operator=(const RGWObjManifest& rhs) {
435 explicit_objs = rhs.explicit_objs;
436 objs = rhs.objs;
437 obj_size = rhs.obj_size;
438 obj = rhs.obj;
439 head_size = rhs.head_size;
440 max_head_size = rhs.max_head_size;
441 prefix = rhs.prefix;
442 tail_placement = rhs.tail_placement;
443 rules = rhs.rules;
444 tail_instance = rhs.tail_instance;
445
446 begin_iter.set_manifest(this);
447 end_iter.set_manifest(this);
448
449 begin_iter.seek(rhs.begin_iter.get_ofs());
450 end_iter.seek(rhs.end_iter.get_ofs());
451
452 return *this;
453 }
454
455 map<uint64_t, RGWObjManifestPart>& get_explicit_objs() {
456 return objs;
457 }
458
459
460 void set_explicit(uint64_t _size, map<uint64_t, RGWObjManifestPart>& _objs) {
461 explicit_objs = true;
462 obj_size = _size;
463 objs.swap(_objs);
464 }
465
466 void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj_select *location);
467
468 void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) {
469 RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size);
470 rules[0] = rule;
471 max_head_size = tail_ofs;
472 }
473
474 void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) {
475 RGWObjManifestRule rule(0, 0, 0, stripe_max_size);
476 rule.start_part_num = part_num;
477 rules[0] = rule;
478 max_head_size = 0;
479 }
480
481 void encode(bufferlist& bl) const {
482 ENCODE_START(7, 6, bl);
483 ::encode(obj_size, bl);
484 ::encode(objs, bl);
485 ::encode(explicit_objs, bl);
486 ::encode(obj, bl);
487 ::encode(head_size, bl);
488 ::encode(max_head_size, bl);
489 ::encode(prefix, bl);
490 ::encode(rules, bl);
491 bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket);
492 ::encode(encode_tail_bucket, bl);
493 if (encode_tail_bucket) {
494 ::encode(tail_placement.bucket, bl);
495 }
496 bool encode_tail_instance = (tail_instance != obj.key.instance);
497 ::encode(encode_tail_instance, bl);
498 if (encode_tail_instance) {
499 ::encode(tail_instance, bl);
500 }
501 ::encode(head_placement_rule, bl);
502 ::encode(tail_placement.placement_rule, bl);
503 ENCODE_FINISH(bl);
504 }
505
506 void decode(bufferlist::iterator& bl) {
507 DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl);
508 ::decode(obj_size, bl);
509 ::decode(objs, bl);
510 if (struct_v >= 3) {
511 ::decode(explicit_objs, bl);
512 ::decode(obj, bl);
513 ::decode(head_size, bl);
514 ::decode(max_head_size, bl);
515 ::decode(prefix, bl);
516 ::decode(rules, bl);
517 } else {
518 explicit_objs = true;
519 if (!objs.empty()) {
520 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
521 obj = iter->second.loc;
522 head_size = iter->second.size;
523 max_head_size = head_size;
524 }
525 }
526
527 if (explicit_objs && head_size > 0 && !objs.empty()) {
528 /* patch up manifest due to issue 16435:
529 * the first object in the explicit objs list might not be the one we need to access, use the
530 * head object instead if set. This would happen if we had an old object that was created
531 * when the explicit objs manifest was around, and it got copied.
532 */
533 rgw_obj& obj_0 = objs[0].loc;
534 if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) {
535 objs[0].loc = obj;
536 objs[0].size = head_size;
537 }
538 }
539
540 if (struct_v >= 4) {
541 if (struct_v < 6) {
542 ::decode(tail_placement.bucket, bl);
543 } else {
544 bool need_to_decode;
545 ::decode(need_to_decode, bl);
546 if (need_to_decode) {
547 ::decode(tail_placement.bucket, bl);
548 } else {
549 tail_placement.bucket = obj.bucket;
550 }
551 }
552 }
553
554 if (struct_v >= 5) {
555 if (struct_v < 6) {
556 ::decode(tail_instance, bl);
557 } else {
558 bool need_to_decode;
559 ::decode(need_to_decode, bl);
560 if (need_to_decode) {
561 ::decode(tail_instance, bl);
562 } else {
563 tail_instance = obj.key.instance;
564 }
565 }
566 } else { // old object created before 'tail_instance' field added to manifest
567 tail_instance = obj.key.instance;
568 }
569
570 if (struct_v >= 7) {
571 ::decode(head_placement_rule, bl);
572 ::decode(tail_placement.placement_rule, bl);
573 }
574
575 update_iterators();
576 DECODE_FINISH(bl);
577 }
578
579 void dump(Formatter *f) const;
580 static void generate_test_instances(list<RGWObjManifest*>& o);
581
582 int append(RGWObjManifest& m, RGWZoneGroup& zonegroup, RGWZoneParams& zone_params);
583 int append(RGWObjManifest& m, RGWRados *store);
584
585 bool get_rule(uint64_t ofs, RGWObjManifestRule *rule);
586
587 bool empty() {
588 if (explicit_objs)
589 return objs.empty();
590 return rules.empty();
591 }
592
593 bool has_explicit_objs() {
594 return explicit_objs;
595 }
596
597 bool has_tail() {
598 if (explicit_objs) {
599 if (objs.size() == 1) {
600 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
601 rgw_obj& o = iter->second.loc;
602 return !(obj == o);
603 }
604 return (objs.size() >= 2);
605 }
606 return (obj_size > head_size);
607 }
608
609 void set_head(const string& placement_rule, const rgw_obj& _o, uint64_t _s) {
610 head_placement_rule = placement_rule;
611 obj = _o;
612 head_size = _s;
613
614 if (explicit_objs && head_size > 0) {
615 objs[0].loc = obj;
616 objs[0].size = head_size;
617 }
618 }
619
620 const rgw_obj& get_obj() {
621 return obj;
622 }
623
624 void set_tail_placement(const string& placement_rule, const rgw_bucket& _b) {
625 tail_placement.placement_rule = placement_rule;
626 tail_placement.bucket = _b;
627 }
628
629 const rgw_bucket_placement& get_tail_placement() {
630 return tail_placement;
631 }
632
633 const string& get_head_placement_rule() {
634 return head_placement_rule;
635 }
636
637 void set_prefix(const string& _p) {
638 prefix = _p;
639 }
640
641 const string& get_prefix() {
642 return prefix;
643 }
644
645 void set_tail_instance(const string& _ti) {
646 tail_instance = _ti;
647 }
648
649 const string& get_tail_instance() {
650 return tail_instance;
651 }
652
653 void set_head_size(uint64_t _s) {
654 head_size = _s;
655 }
656
657 void set_obj_size(uint64_t s) {
658 obj_size = s;
659
660 update_iterators();
661 }
662
663 uint64_t get_obj_size() {
664 return obj_size;
665 }
666
667 uint64_t get_head_size() {
668 return head_size;
669 }
670
671 void set_max_head_size(uint64_t s) {
672 max_head_size = s;
673 }
674
675 uint64_t get_max_head_size() {
676 return max_head_size;
677 }
678
679 class obj_iterator {
680 RGWObjManifest *manifest;
681 uint64_t part_ofs; /* where current part starts */
682 uint64_t stripe_ofs; /* where current stripe starts */
683 uint64_t ofs; /* current position within the object */
684 uint64_t stripe_size; /* current part size */
685
686 int cur_part_id;
687 int cur_stripe;
688 string cur_override_prefix;
689
690 rgw_obj_select location;
691
692 map<uint64_t, RGWObjManifestRule>::iterator rule_iter;
693 map<uint64_t, RGWObjManifestRule>::iterator next_rule_iter;
694
695 map<uint64_t, RGWObjManifestPart>::iterator explicit_iter;
696
697 void init() {
698 part_ofs = 0;
699 stripe_ofs = 0;
700 ofs = 0;
701 stripe_size = 0;
702 cur_part_id = 0;
703 cur_stripe = 0;
704 }
705
706 void update_explicit_pos();
707
708
709 protected:
710
711 void set_manifest(RGWObjManifest *m) {
712 manifest = m;
713 }
714
715 public:
716 obj_iterator() : manifest(NULL) {
717 init();
718 }
719 explicit obj_iterator(RGWObjManifest *_m) : manifest(_m) {
720 init();
721 if (!manifest->empty()) {
722 seek(0);
723 }
724 }
725 obj_iterator(RGWObjManifest *_m, uint64_t _ofs) : manifest(_m) {
726 init();
727 if (!manifest->empty()) {
728 seek(_ofs);
729 }
730 }
731 void seek(uint64_t ofs);
732
733 void operator++();
734 bool operator==(const obj_iterator& rhs) {
735 return (ofs == rhs.ofs);
736 }
737 bool operator!=(const obj_iterator& rhs) {
738 return (ofs != rhs.ofs);
739 }
740 const rgw_obj_select& get_location() {
741 return location;
742 }
743
744 /* start of current stripe */
745 uint64_t get_stripe_ofs() {
746 if (manifest->explicit_objs) {
747 return explicit_iter->first;
748 }
749 return stripe_ofs;
750 }
751
752 /* current ofs relative to start of rgw object */
753 uint64_t get_ofs() const {
754 return ofs;
755 }
756
757 /* stripe number */
758 int get_cur_stripe() const {
759 return cur_stripe;
760 }
761
762 /* current stripe size */
763 uint64_t get_stripe_size() {
764 if (manifest->explicit_objs) {
765 return explicit_iter->second.size;
766 }
767 return stripe_size;
768 }
769
770 /* offset where data starts within current stripe */
771 uint64_t location_ofs() {
772 if (manifest->explicit_objs) {
773 return explicit_iter->second.loc_ofs;
774 }
775 return 0; /* all stripes start at zero offset */
776 }
777
778 void update_location();
779
780 friend class RGWObjManifest;
781 };
782
783 const obj_iterator& obj_begin();
784 const obj_iterator& obj_end();
785 obj_iterator obj_find(uint64_t ofs);
786
787 obj_iterator begin_iter;
788 obj_iterator end_iter;
789
790 /*
791 * simple object generator. Using a simple single rule manifest.
792 */
793 class generator {
794 RGWObjManifest *manifest;
795 uint64_t last_ofs;
796 uint64_t cur_part_ofs;
797 int cur_part_id;
798 int cur_stripe;
799 uint64_t cur_stripe_size;
800 string cur_oid;
801
802 string oid_prefix;
803
804 rgw_obj_select cur_obj;
7c673cae
FG
805
806 RGWObjManifestRule rule;
807
808 public:
809 generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0),
810 cur_stripe(0), cur_stripe_size(0) {}
811 int create_begin(CephContext *cct, RGWObjManifest *manifest, const string& placement_rule, rgw_bucket& bucket, rgw_obj& obj);
812
813 int create_next(uint64_t ofs);
814
815 rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); }
816 rgw_raw_obj get_cur_obj(RGWRados *store) { return cur_obj.get_raw_obj(store); }
817
818 /* total max size of current stripe (including head obj) */
819 uint64_t cur_stripe_max_size() {
820 return cur_stripe_size;
821 }
822 };
823};
824WRITE_CLASS_ENCODER(RGWObjManifest)
825
826struct RGWUploadPartInfo {
827 uint32_t num;
828 uint64_t size;
829 uint64_t accounted_size{0};
830 string etag;
831 ceph::real_time modified;
832 RGWObjManifest manifest;
833 RGWCompressionInfo cs_info;
834
835 RGWUploadPartInfo() : num(0), size(0) {}
836
837 void encode(bufferlist& bl) const {
838 ENCODE_START(4, 2, bl);
839 ::encode(num, bl);
840 ::encode(size, bl);
841 ::encode(etag, bl);
842 ::encode(modified, bl);
843 ::encode(manifest, bl);
844 ::encode(cs_info, bl);
845 ::encode(accounted_size, bl);
846 ENCODE_FINISH(bl);
847 }
848 void decode(bufferlist::iterator& bl) {
849 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
850 ::decode(num, bl);
851 ::decode(size, bl);
852 ::decode(etag, bl);
853 ::decode(modified, bl);
854 if (struct_v >= 3)
855 ::decode(manifest, bl);
856 if (struct_v >= 4) {
857 ::decode(cs_info, bl);
858 ::decode(accounted_size, bl);
859 } else {
860 accounted_size = size;
861 }
862 DECODE_FINISH(bl);
863 }
864 void dump(Formatter *f) const;
865 static void generate_test_instances(list<RGWUploadPartInfo*>& o);
866};
867WRITE_CLASS_ENCODER(RGWUploadPartInfo)
868
869struct RGWObjState {
870 rgw_obj obj;
871 bool is_atomic;
872 bool has_attrs;
873 bool exists;
874 uint64_t size; //< size of raw object
875 uint64_t accounted_size{0}; //< size before compression, encryption
876 ceph::real_time mtime;
877 uint64_t epoch;
878 bufferlist obj_tag;
181888fb 879 bufferlist tail_tag;
7c673cae
FG
880 string write_tag;
881 bool fake_tag;
882 RGWObjManifest manifest;
883 bool has_manifest;
884 string shadow_obj;
885 bool has_data;
886 bufferlist data;
887 bool prefetch_data;
888 bool keep_tail;
889 bool is_olh;
890 bufferlist olh_tag;
891 uint64_t pg_ver;
892 uint32_t zone_short_id;
893
894 /* important! don't forget to update copy constructor */
895
896 RGWObjVersionTracker objv_tracker;
897
898 map<string, bufferlist> attrset;
899 RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
900 size(0), epoch(0), fake_tag(false), has_manifest(false),
901 has_data(false), prefetch_data(false), keep_tail(false), is_olh(false),
902 pg_ver(0), zone_short_id(0) {}
903 RGWObjState(const RGWObjState& rhs) : obj (rhs.obj) {
904 is_atomic = rhs.is_atomic;
905 has_attrs = rhs.has_attrs;
906 exists = rhs.exists;
907 size = rhs.size;
908 accounted_size = rhs.accounted_size;
909 mtime = rhs.mtime;
910 epoch = rhs.epoch;
911 if (rhs.obj_tag.length()) {
912 obj_tag = rhs.obj_tag;
913 }
181888fb
FG
914 if (rhs.tail_tag.length()) {
915 tail_tag = rhs.tail_tag;
916 }
7c673cae
FG
917 write_tag = rhs.write_tag;
918 fake_tag = rhs.fake_tag;
919 if (rhs.has_manifest) {
920 manifest = rhs.manifest;
921 }
922 has_manifest = rhs.has_manifest;
923 shadow_obj = rhs.shadow_obj;
924 has_data = rhs.has_data;
925 if (rhs.data.length()) {
926 data = rhs.data;
927 }
928 prefetch_data = rhs.prefetch_data;
929 keep_tail = rhs.keep_tail;
930 is_olh = rhs.is_olh;
931 objv_tracker = rhs.objv_tracker;
932 pg_ver = rhs.pg_ver;
933 }
934
935 bool get_attr(string name, bufferlist& dest) {
936 map<string, bufferlist>::iterator iter = attrset.find(name);
937 if (iter != attrset.end()) {
938 dest = iter->second;
939 return true;
940 }
941 return false;
942 }
943};
944
945struct RGWRawObjState {
946 rgw_raw_obj obj;
947 bool has_attrs{false};
948 bool exists{false};
949 uint64_t size{0};
950 ceph::real_time mtime;
951 uint64_t epoch;
952 bufferlist obj_tag;
953 bool has_data{false};
954 bufferlist data;
955 bool prefetch_data{false};
956 uint64_t pg_ver{0};
957
958 /* important! don't forget to update copy constructor */
959
960 RGWObjVersionTracker objv_tracker;
961
962 map<string, bufferlist> attrset;
963 RGWRawObjState() {}
964 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
965 has_attrs = rhs.has_attrs;
966 exists = rhs.exists;
967 size = rhs.size;
968 mtime = rhs.mtime;
969 epoch = rhs.epoch;
970 if (rhs.obj_tag.length()) {
971 obj_tag = rhs.obj_tag;
972 }
973 has_data = rhs.has_data;
974 if (rhs.data.length()) {
975 data = rhs.data;
976 }
977 prefetch_data = rhs.prefetch_data;
978 pg_ver = rhs.pg_ver;
979 objv_tracker = rhs.objv_tracker;
980 }
981};
982
983struct RGWPoolIterCtx {
984 librados::IoCtx io_ctx;
985 librados::NObjectIterator iter;
986};
987
988struct RGWListRawObjsCtx {
989 bool initialized;
990 RGWPoolIterCtx iter_ctx;
991
992 RGWListRawObjsCtx() : initialized(false) {}
993};
994
995struct RGWDefaultSystemMetaObjInfo {
996 string default_id;
997
998 void encode(bufferlist& bl) const {
999 ENCODE_START(1, 1, bl);
1000 ::encode(default_id, bl);
1001 ENCODE_FINISH(bl);
1002 }
1003
1004 void decode(bufferlist::iterator& bl) {
1005 DECODE_START(1, bl);
1006 ::decode(default_id, bl);
1007 DECODE_FINISH(bl);
1008 }
1009
1010 void dump(Formatter *f) const;
1011 void decode_json(JSONObj *obj);
1012};
1013WRITE_CLASS_ENCODER(RGWDefaultSystemMetaObjInfo)
1014
1015struct RGWNameToId {
1016 string obj_id;
1017
1018 void encode(bufferlist& bl) const {
1019 ENCODE_START(1, 1, bl);
1020 ::encode(obj_id, bl);
1021 ENCODE_FINISH(bl);
1022 }
1023
1024 void decode(bufferlist::iterator& bl) {
1025 DECODE_START(1, bl);
1026 ::decode(obj_id, bl);
1027 DECODE_FINISH(bl);
1028 }
1029
1030 void dump(Formatter *f) const;
1031 void decode_json(JSONObj *obj);
1032};
1033WRITE_CLASS_ENCODER(RGWNameToId)
1034
1035class RGWSystemMetaObj {
1036protected:
1037 string id;
1038 string name;
1039
1040 CephContext *cct;
1041 RGWRados *store;
1042
1043 int store_name(bool exclusive);
1044 int store_info(bool exclusive);
1045 int read_info(const string& obj_id, bool old_format = false);
1046 int read_id(const string& obj_name, string& obj_id);
1047 int read_default(RGWDefaultSystemMetaObjInfo& default_info,
1048 const string& oid);
1049 /* read and use default id */
1050 int use_default(bool old_format = false);
1051
1052public:
1053 RGWSystemMetaObj() : cct(NULL), store(NULL) {}
1054 RGWSystemMetaObj(const string& _name): name(_name), cct(NULL), store(NULL) {}
1055 RGWSystemMetaObj(const string& _id, const string& _name) : id(_id), name(_name), cct(NULL), store(NULL) {}
1056 RGWSystemMetaObj(CephContext *_cct, RGWRados *_store): cct(_cct), store(_store){}
1057 RGWSystemMetaObj(const string& _name, CephContext *_cct, RGWRados *_store): name(_name), cct(_cct), store(_store){}
1058 const string& get_name() const { return name; }
1059 const string& get_id() const { return id; }
1060
1061 void set_name(const string& _name) { name = _name;}
1062 void set_id(const string& _id) { id = _id;}
1063 void clear_id() { id.clear(); }
1064
1065 virtual ~RGWSystemMetaObj() {}
1066
1067 virtual void encode(bufferlist& bl) const {
1068 ENCODE_START(1, 1, bl);
1069 ::encode(id, bl);
1070 ::encode(name, bl);
1071 ENCODE_FINISH(bl);
1072 }
1073
1074 virtual void decode(bufferlist::iterator& bl) {
1075 DECODE_START(1, bl);
1076 ::decode(id, bl);
1077 ::decode(name, bl);
1078 DECODE_FINISH(bl);
1079 }
1080
1081 void reinit_instance(CephContext *_cct, RGWRados *_store) {
1082 cct = _cct;
1083 store = _store;
1084 }
1085 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true, bool old_format = false);
1086 virtual int read_default_id(string& default_id, bool old_format = false);
1087 virtual int set_as_default(bool exclusive = false);
1088 int delete_default();
1089 virtual int create(bool exclusive = true);
1090 int delete_obj(bool old_format = false);
1091 int rename(const string& new_name);
1092 int update() { return store_info(false);}
1093 int update_name() { return store_name(false);}
1094 int read();
1095 int write(bool exclusive);
1096
1097 virtual rgw_pool get_pool(CephContext *cct) = 0;
1098 virtual const string get_default_oid(bool old_format = false) = 0;
1099 virtual const string& get_names_oid_prefix() = 0;
1100 virtual const string& get_info_oid_prefix(bool old_format = false) = 0;
1101 virtual const string& get_predefined_name(CephContext *cct) = 0;
1102
1103 void dump(Formatter *f) const;
1104 void decode_json(JSONObj *obj);
1105};
1106WRITE_CLASS_ENCODER(RGWSystemMetaObj)
1107
1108struct RGWZonePlacementInfo {
1109 rgw_pool index_pool;
1110 rgw_pool data_pool;
1111 rgw_pool data_extra_pool; /* if not set we should use data_pool */
1112 RGWBucketIndexType index_type;
1113 std::string compression_type;
1114
1115 RGWZonePlacementInfo() : index_type(RGWBIType_Normal) {}
1116
1117 void encode(bufferlist& bl) const {
1118 ENCODE_START(6, 1, bl);
1119 ::encode(index_pool.to_str(), bl);
1120 ::encode(data_pool.to_str(), bl);
1121 ::encode(data_extra_pool.to_str(), bl);
1122 ::encode((uint32_t)index_type, bl);
1123 ::encode(compression_type, bl);
1124 ENCODE_FINISH(bl);
1125 }
1126
1127 void decode(bufferlist::iterator& bl) {
1128 DECODE_START(6, bl);
1129 string index_pool_str;
1130 string data_pool_str;
1131 ::decode(index_pool_str, bl);
1132 index_pool = rgw_pool(index_pool_str);
1133 ::decode(data_pool_str, bl);
1134 data_pool = rgw_pool(data_pool_str);
1135 if (struct_v >= 4) {
1136 string data_extra_pool_str;
1137 ::decode(data_extra_pool_str, bl);
1138 data_extra_pool = rgw_pool(data_extra_pool_str);
1139 }
1140 if (struct_v >= 5) {
1141 uint32_t it;
1142 ::decode(it, bl);
1143 index_type = (RGWBucketIndexType)it;
1144 }
1145 if (struct_v >= 6) {
1146 ::decode(compression_type, bl);
1147 }
1148 DECODE_FINISH(bl);
1149 }
31f18b77 1150 const rgw_pool& get_data_extra_pool() const {
7c673cae
FG
1151 if (data_extra_pool.empty()) {
1152 return data_pool;
1153 }
1154 return data_extra_pool;
1155 }
1156 void dump(Formatter *f) const;
1157 void decode_json(JSONObj *obj);
1158};
1159WRITE_CLASS_ENCODER(RGWZonePlacementInfo)
1160
1161struct RGWZoneParams : RGWSystemMetaObj {
1162 rgw_pool domain_root;
1163 rgw_pool metadata_heap;
1164 rgw_pool control_pool;
1165 rgw_pool gc_pool;
1166 rgw_pool lc_pool;
1167 rgw_pool log_pool;
1168 rgw_pool intent_log_pool;
1169 rgw_pool usage_log_pool;
1170
1171 rgw_pool user_keys_pool;
1172 rgw_pool user_email_pool;
1173 rgw_pool user_swift_pool;
1174 rgw_pool user_uid_pool;
1175 rgw_pool roles_pool;
31f18b77 1176 rgw_pool reshard_pool;
7c673cae
FG
1177
1178 RGWAccessKey system_key;
1179
1180 map<string, RGWZonePlacementInfo> placement_pools;
1181
1182 string realm_id;
1183
31f18b77 1184 map<string, string, ltstr_nocase> tier_config;
7c673cae
FG
1185
1186 RGWZoneParams() : RGWSystemMetaObj() {}
1187 RGWZoneParams(const string& name) : RGWSystemMetaObj(name){}
1188 RGWZoneParams(const string& id, const string& name) : RGWSystemMetaObj(id, name) {}
1189 RGWZoneParams(const string& id, const string& name, const string& _realm_id)
1190 : RGWSystemMetaObj(id, name), realm_id(_realm_id) {}
1191
1192 rgw_pool get_pool(CephContext *cct);
1193 const string get_default_oid(bool old_format = false) override;
1194 const string& get_names_oid_prefix() override;
1195 const string& get_info_oid_prefix(bool old_format = false) override;
1196 const string& get_predefined_name(CephContext *cct) override;
1197
1198 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true,
1199 bool old_format = false);
1200 using RGWSystemMetaObj::init;
1201 int read_default_id(string& default_id, bool old_format = false) override;
1202 int set_as_default(bool exclusive = false) override;
1203 int create_default(bool old_format = false);
1204 int create(bool exclusive = true) override;
1205 int fix_pool_names();
1206
1207 const string& get_compression_type(const string& placement_rule) const;
1208
1209 void encode(bufferlist& bl) const override {
31f18b77 1210 ENCODE_START(10, 1, bl);
7c673cae
FG
1211 ::encode(domain_root, bl);
1212 ::encode(control_pool, bl);
1213 ::encode(gc_pool, bl);
1214 ::encode(log_pool, bl);
1215 ::encode(intent_log_pool, bl);
1216 ::encode(usage_log_pool, bl);
1217 ::encode(user_keys_pool, bl);
1218 ::encode(user_email_pool, bl);
1219 ::encode(user_swift_pool, bl);
1220 ::encode(user_uid_pool, bl);
1221 RGWSystemMetaObj::encode(bl);
1222 ::encode(system_key, bl);
1223 ::encode(placement_pools, bl);
1224 ::encode(metadata_heap, bl);
1225 ::encode(realm_id, bl);
1226 ::encode(lc_pool, bl);
1227 ::encode(tier_config, bl);
1228 ::encode(roles_pool, bl);
31f18b77 1229 ::encode(reshard_pool, bl);
7c673cae
FG
1230 ENCODE_FINISH(bl);
1231 }
1232
1233 void decode(bufferlist::iterator& bl) override {
31f18b77 1234 DECODE_START(10, bl);
7c673cae
FG
1235 ::decode(domain_root, bl);
1236 ::decode(control_pool, bl);
1237 ::decode(gc_pool, bl);
1238 ::decode(log_pool, bl);
1239 ::decode(intent_log_pool, bl);
1240 ::decode(usage_log_pool, bl);
1241 ::decode(user_keys_pool, bl);
1242 ::decode(user_email_pool, bl);
1243 ::decode(user_swift_pool, bl);
1244 ::decode(user_uid_pool, bl);
1245 if (struct_v >= 6) {
1246 RGWSystemMetaObj::decode(bl);
1247 } else if (struct_v >= 2) {
1248 ::decode(name, bl);
1249 id = name;
1250 }
1251 if (struct_v >= 3)
1252 ::decode(system_key, bl);
1253 if (struct_v >= 4)
1254 ::decode(placement_pools, bl);
1255 if (struct_v >= 5)
1256 ::decode(metadata_heap, bl);
1257 if (struct_v >= 6) {
1258 ::decode(realm_id, bl);
1259 }
1260 if (struct_v >= 7) {
1261 ::decode(lc_pool, bl);
1262 } else {
d2e6a577 1263 lc_pool = log_pool.name + ":lc";
7c673cae
FG
1264 }
1265 if (struct_v >= 8) {
1266 ::decode(tier_config, bl);
1267 }
1268 if (struct_v >= 9) {
1269 ::decode(roles_pool, bl);
1270 } else {
d2e6a577 1271 roles_pool = name + ".rgw.meta:roles";
7c673cae 1272 }
31f18b77
FG
1273 if (struct_v >= 10) {
1274 ::decode(reshard_pool, bl);
1275 } else {
c07f9fc5 1276 reshard_pool = log_pool.name + ":reshard";
31f18b77 1277 }
7c673cae
FG
1278 DECODE_FINISH(bl);
1279 }
1280 void dump(Formatter *f) const;
1281 void decode_json(JSONObj *obj);
1282 static void generate_test_instances(list<RGWZoneParams*>& o);
1283
7c673cae
FG
1284 bool get_placement(const string& placement_id, RGWZonePlacementInfo *placement) const {
1285 auto iter = placement_pools.find(placement_id);
1286 if (iter == placement_pools.end()) {
1287 return false;
1288 }
1289 *placement = iter->second;
1290 return true;
1291 }
1292
1293 /*
1294 * return data pool of the head object
1295 */
1296 bool get_head_data_pool(const string& placement_id, const rgw_obj& obj, rgw_pool *pool) const {
1297 const rgw_data_placement_target& explicit_placement = obj.bucket.explicit_placement;
1298 if (!explicit_placement.data_pool.empty()) {
1299 if (!obj.in_extra_data) {
1300 *pool = explicit_placement.data_pool;
1301 } else {
1302 *pool = explicit_placement.get_data_extra_pool();
1303 }
1304 return true;
1305 }
1306 if (placement_id.empty()) {
1307 return false;
1308 }
1309 auto iter = placement_pools.find(placement_id);
1310 if (iter == placement_pools.end()) {
1311 return false;
1312 }
1313 if (!obj.in_extra_data) {
1314 *pool = iter->second.data_pool;
1315 } else {
31f18b77 1316 *pool = iter->second.get_data_extra_pool();
7c673cae
FG
1317 }
1318 return true;
1319 }
1320};
1321WRITE_CLASS_ENCODER(RGWZoneParams)
1322
1323struct RGWZone {
1324 string id;
1325 string name;
1326 list<string> endpoints;
1327 bool log_meta;
1328 bool log_data;
1329 bool read_only;
1330 string tier_type;
1331
1332/**
1333 * Represents the number of shards for the bucket index object, a value of zero
1334 * indicates there is no sharding. By default (no sharding, the name of the object
1335 * is '.dir.{marker}', with sharding, the name is '.dir.{marker}.{sharding_id}',
1336 * sharding_id is zero-based value. It is not recommended to set a too large value
1337 * (e.g. thousand) as it increases the cost for bucket listing.
1338 */
1339 uint32_t bucket_index_max_shards;
1340
1341 bool sync_from_all;
1342 set<string> sync_from; /* list of zones to sync from */
1343
1344 RGWZone() : log_meta(false), log_data(false), read_only(false), bucket_index_max_shards(0),
1345 sync_from_all(true) {}
1346
1347 void encode(bufferlist& bl) const {
1348 ENCODE_START(6, 1, bl);
1349 ::encode(name, bl);
1350 ::encode(endpoints, bl);
1351 ::encode(log_meta, bl);
1352 ::encode(log_data, bl);
1353 ::encode(bucket_index_max_shards, bl);
1354 ::encode(id, bl);
1355 ::encode(read_only, bl);
1356 ::encode(tier_type, bl);
1357 ::encode(sync_from_all, bl);
1358 ::encode(sync_from, bl);
1359 ENCODE_FINISH(bl);
1360 }
1361
1362 void decode(bufferlist::iterator& bl) {
1363 DECODE_START(6, bl);
1364 ::decode(name, bl);
1365 if (struct_v < 4) {
1366 id = name;
1367 }
1368 ::decode(endpoints, bl);
1369 if (struct_v >= 2) {
1370 ::decode(log_meta, bl);
1371 ::decode(log_data, bl);
1372 }
1373 if (struct_v >= 3) {
1374 ::decode(bucket_index_max_shards, bl);
1375 }
1376 if (struct_v >= 4) {
1377 ::decode(id, bl);
1378 ::decode(read_only, bl);
1379 }
1380 if (struct_v >= 5) {
1381 ::decode(tier_type, bl);
1382 }
1383 if (struct_v >= 6) {
1384 ::decode(sync_from_all, bl);
1385 ::decode(sync_from, bl);
1386 }
1387 DECODE_FINISH(bl);
1388 }
1389 void dump(Formatter *f) const;
1390 void decode_json(JSONObj *obj);
1391 static void generate_test_instances(list<RGWZone*>& o);
1392
1393 bool is_read_only() { return read_only; }
1394
28e407b8 1395 bool syncs_from(const string& zone_id) const {
7c673cae
FG
1396 return (sync_from_all || sync_from.find(zone_id) != sync_from.end());
1397 }
1398};
1399WRITE_CLASS_ENCODER(RGWZone)
1400
1401struct RGWDefaultZoneGroupInfo {
1402 string default_zonegroup;
1403
1404 void encode(bufferlist& bl) const {
1405 ENCODE_START(1, 1, bl);
1406 ::encode(default_zonegroup, bl);
1407 ENCODE_FINISH(bl);
1408 }
1409
1410 void decode(bufferlist::iterator& bl) {
1411 DECODE_START(1, bl);
1412 ::decode(default_zonegroup, bl);
1413 DECODE_FINISH(bl);
1414 }
1415 void dump(Formatter *f) const;
1416 void decode_json(JSONObj *obj);
1417 //todo: implement ceph-dencoder
1418};
1419WRITE_CLASS_ENCODER(RGWDefaultZoneGroupInfo)
1420
1421struct RGWZoneGroupPlacementTarget {
1422 string name;
1423 set<string> tags;
1424
c07f9fc5 1425 bool user_permitted(list<string>& user_tags) const {
7c673cae
FG
1426 if (tags.empty()) {
1427 return true;
1428 }
1429 for (auto& rule : user_tags) {
1430 if (tags.find(rule) != tags.end()) {
1431 return true;
1432 }
1433 }
1434 return false;
1435 }
1436
1437 void encode(bufferlist& bl) const {
1438 ENCODE_START(1, 1, bl);
1439 ::encode(name, bl);
1440 ::encode(tags, bl);
1441 ENCODE_FINISH(bl);
1442 }
1443
1444 void decode(bufferlist::iterator& bl) {
1445 DECODE_START(1, bl);
1446 ::decode(name, bl);
1447 ::decode(tags, bl);
1448 DECODE_FINISH(bl);
1449 }
1450 void dump(Formatter *f) const;
1451 void decode_json(JSONObj *obj);
1452};
1453WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTarget)
1454
1455
1456struct RGWZoneGroup : public RGWSystemMetaObj {
1457 string api_name;
1458 list<string> endpoints;
1459 bool is_master;
1460
1461 string master_zone;
1462 map<string, RGWZone> zones;
1463
1464 map<string, RGWZoneGroupPlacementTarget> placement_targets;
1465 string default_placement;
1466
1467 list<string> hostnames;
1468 list<string> hostnames_s3website;
1469 // TODO: Maybe convert hostnames to a map<string,list<string>> for
1470 // endpoint_type->hostnames
1471/*
147220:05 < _robbat21irssi> maybe I do someting like: if (hostname_map.empty()) { populate all map keys from hostnames; };
147320:05 < _robbat21irssi> but that's a later compatability migration planning bit
147420:06 < yehudasa> more like if (!hostnames.empty()) {
147520:06 < yehudasa> for (list<string>::iterator iter = hostnames.begin(); iter != hostnames.end(); ++iter) {
147620:06 < yehudasa> hostname_map["s3"].append(iter->second);
147720:07 < yehudasa> hostname_map["s3website"].append(iter->second);
147820:07 < yehudasa> s/append/push_back/g
147920:08 < _robbat21irssi> inner loop over APIs
148020:08 < yehudasa> yeah, probably
148120:08 < _robbat21irssi> s3, s3website, swift, swith_auth, swift_website
1482*/
1483 map<string, list<string> > api_hostname_map;
1484 map<string, list<string> > api_endpoints_map;
1485
1486 string realm_id;
1487
1488 RGWZoneGroup(): is_master(false){}
1489 RGWZoneGroup(const std::string &id, const std::string &name):RGWSystemMetaObj(id, name) {}
1490 RGWZoneGroup(const std::string &_name):RGWSystemMetaObj(_name) {}
1491 RGWZoneGroup(const std::string &_name, bool _is_master, CephContext *cct, RGWRados* store,
1492 const string& _realm_id, const list<string>& _endpoints)
1493 : RGWSystemMetaObj(_name, cct , store), endpoints(_endpoints), is_master(_is_master),
1494 realm_id(_realm_id) {}
1495
1496 bool is_master_zonegroup() const { return is_master;}
1497 void update_master(bool _is_master) {
1498 is_master = _is_master;
1499 post_process_params();
1500 }
1501 void post_process_params();
1502
1503 void encode(bufferlist& bl) const override {
1504 ENCODE_START(4, 1, bl);
1505 ::encode(name, bl);
1506 ::encode(api_name, bl);
1507 ::encode(is_master, bl);
1508 ::encode(endpoints, bl);
1509 ::encode(master_zone, bl);
1510 ::encode(zones, bl);
1511 ::encode(placement_targets, bl);
1512 ::encode(default_placement, bl);
1513 ::encode(hostnames, bl);
1514 ::encode(hostnames_s3website, bl);
1515 RGWSystemMetaObj::encode(bl);
1516 ::encode(realm_id, bl);
1517 ENCODE_FINISH(bl);
1518 }
1519
1520 void decode(bufferlist::iterator& bl) override {
1521 DECODE_START(4, bl);
1522 ::decode(name, bl);
1523 ::decode(api_name, bl);
1524 ::decode(is_master, bl);
1525 ::decode(endpoints, bl);
1526 ::decode(master_zone, bl);
1527 ::decode(zones, bl);
1528 ::decode(placement_targets, bl);
1529 ::decode(default_placement, bl);
1530 if (struct_v >= 2) {
1531 ::decode(hostnames, bl);
1532 }
1533 if (struct_v >= 3) {
1534 ::decode(hostnames_s3website, bl);
1535 }
1536 if (struct_v >= 4) {
1537 RGWSystemMetaObj::decode(bl);
1538 ::decode(realm_id, bl);
1539 } else {
1540 id = name;
1541 }
1542 DECODE_FINISH(bl);
1543 }
1544
1545 int read_default_id(string& default_id, bool old_format = false) override;
1546 int set_as_default(bool exclusive = false) override;
1547 int create_default(bool old_format = false);
1548 int equals(const string& other_zonegroup) const;
1549 int add_zone(const RGWZoneParams& zone_params, bool *is_master, bool *read_only,
1550 const list<string>& endpoints, const string *ptier_type,
1551 bool *psync_from_all, list<string>& sync_from, list<string>& sync_from_rm);
1552 int remove_zone(const std::string& zone_id);
1553 int rename_zone(const RGWZoneParams& zone_params);
1554 rgw_pool get_pool(CephContext *cct);
1555 const string get_default_oid(bool old_region_format = false) override;
1556 const string& get_info_oid_prefix(bool old_region_format = false) override;
1557 const string& get_names_oid_prefix() override;
1558 const string& get_predefined_name(CephContext *cct) override;
1559
1560 void dump(Formatter *f) const;
1561 void decode_json(JSONObj *obj);
1562 static void generate_test_instances(list<RGWZoneGroup*>& o);
1563};
1564WRITE_CLASS_ENCODER(RGWZoneGroup)
1565
1566struct RGWPeriodMap
1567{
1568 string id;
1569 map<string, RGWZoneGroup> zonegroups;
1570 map<string, RGWZoneGroup> zonegroups_by_api;
1571 map<string, uint32_t> short_zone_ids;
1572
1573 string master_zonegroup;
1574
1575 void encode(bufferlist& bl) const;
1576 void decode(bufferlist::iterator& bl);
1577
1578 int update(const RGWZoneGroup& zonegroup, CephContext *cct);
1579
1580 void dump(Formatter *f) const;
1581 void decode_json(JSONObj *obj);
1582
1583 void reset() {
1584 zonegroups.clear();
1585 zonegroups_by_api.clear();
1586 master_zonegroup.clear();
1587 }
1588
1589 uint32_t get_zone_short_id(const string& zone_id) const;
1590};
1591WRITE_CLASS_ENCODER(RGWPeriodMap)
1592
1593struct RGWPeriodConfig
1594{
1595 RGWQuotaInfo bucket_quota;
1596 RGWQuotaInfo user_quota;
1597
1598 void encode(bufferlist& bl) const {
1599 ENCODE_START(1, 1, bl);
1600 ::encode(bucket_quota, bl);
1601 ::encode(user_quota, bl);
1602 ENCODE_FINISH(bl);
1603 }
1604
1605 void decode(bufferlist::iterator& bl) {
1606 DECODE_START(1, bl);
1607 ::decode(bucket_quota, bl);
1608 ::decode(user_quota, bl);
1609 DECODE_FINISH(bl);
1610 }
1611
1612 void dump(Formatter *f) const;
1613 void decode_json(JSONObj *obj);
1614
1615 // the period config must be stored in a local object outside of the period,
1616 // so that it can be used in a default configuration where no realm/period
1617 // exists
1618 int read(RGWRados *store, const std::string& realm_id);
1619 int write(RGWRados *store, const std::string& realm_id);
1620
1621 static std::string get_oid(const std::string& realm_id);
1622 static rgw_pool get_pool(CephContext *cct);
1623};
1624WRITE_CLASS_ENCODER(RGWPeriodConfig)
1625
1626/* for backward comaptability */
1627struct RGWRegionMap {
1628
1629 map<string, RGWZoneGroup> regions;
1630
1631 string master_region;
1632
1633 RGWQuotaInfo bucket_quota;
1634 RGWQuotaInfo user_quota;
1635
1636 void encode(bufferlist& bl) const;
1637 void decode(bufferlist::iterator& bl);
1638
1639 void dump(Formatter *f) const;
1640 void decode_json(JSONObj *obj);
1641};
1642WRITE_CLASS_ENCODER(RGWRegionMap)
1643
1644struct RGWZoneGroupMap {
1645
1646 map<string, RGWZoneGroup> zonegroups;
1647 map<string, RGWZoneGroup> zonegroups_by_api;
1648
1649 string master_zonegroup;
1650
1651 RGWQuotaInfo bucket_quota;
1652 RGWQuotaInfo user_quota;
1653
1654 /* constract the map */
1655 int read(CephContext *cct, RGWRados *store);
1656
1657 void encode(bufferlist& bl) const;
1658 void decode(bufferlist::iterator& bl);
1659
1660 void dump(Formatter *f) const;
1661 void decode_json(JSONObj *obj);
1662};
1663WRITE_CLASS_ENCODER(RGWZoneGroupMap)
1664
1665class RGWRealm;
1666
1667struct objexp_hint_entry {
1668 string tenant;
1669 string bucket_name;
1670 string bucket_id;
1671 rgw_obj_key obj_key;
1672 ceph::real_time exp_time;
1673
1674 void encode(bufferlist& bl) const {
1675 ENCODE_START(2, 1, bl);
1676 ::encode(bucket_name, bl);
1677 ::encode(bucket_id, bl);
1678 ::encode(obj_key, bl);
1679 ::encode(exp_time, bl);
1680 ::encode(tenant, bl);
1681 ENCODE_FINISH(bl);
1682 }
1683
1684 void decode(bufferlist::iterator& bl) {
1685 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
1686 DECODE_START(2, bl);
1687 ::decode(bucket_name, bl);
1688 ::decode(bucket_id, bl);
1689 ::decode(obj_key, bl);
1690 ::decode(exp_time, bl);
1691 if (struct_v >= 2) {
1692 ::decode(tenant, bl);
1693 } else {
1694 tenant.clear();
1695 }
1696 DECODE_FINISH(bl);
1697 }
1698};
1699WRITE_CLASS_ENCODER(objexp_hint_entry)
1700
1701class RGWPeriod;
1702
1703class RGWRealm : public RGWSystemMetaObj
1704{
1705 string current_period;
1706 epoch_t epoch{0}; //< realm epoch, incremented for each new period
1707
1708 int create_control(bool exclusive);
1709 int delete_control();
1710public:
1711 RGWRealm() {}
1712 RGWRealm(const string& _id, const string& _name = "") : RGWSystemMetaObj(_id, _name) {}
1713 RGWRealm(CephContext *_cct, RGWRados *_store): RGWSystemMetaObj(_cct, _store) {}
1714 RGWRealm(const string& _name, CephContext *_cct, RGWRados *_store): RGWSystemMetaObj(_name, _cct, _store){}
1715
1716 void encode(bufferlist& bl) const override {
1717 ENCODE_START(1, 1, bl);
1718 RGWSystemMetaObj::encode(bl);
1719 ::encode(current_period, bl);
1720 ::encode(epoch, bl);
1721 ENCODE_FINISH(bl);
1722 }
1723
1724 void decode(bufferlist::iterator& bl) override {
1725 DECODE_START(1, bl);
1726 RGWSystemMetaObj::decode(bl);
1727 ::decode(current_period, bl);
1728 ::decode(epoch, bl);
1729 DECODE_FINISH(bl);
1730 }
1731
1732 int create(bool exclusive = true) override;
1733 int delete_obj();
1734 rgw_pool get_pool(CephContext *cct);
1735 const string get_default_oid(bool old_format = false) override;
1736 const string& get_names_oid_prefix() override;
1737 const string& get_info_oid_prefix(bool old_format = false) override;
1738 const string& get_predefined_name(CephContext *cct) override;
1739
1740 using RGWSystemMetaObj::read_id; // expose as public for radosgw-admin
1741
1742 void dump(Formatter *f) const;
1743 void decode_json(JSONObj *obj);
1744
1745 const string& get_current_period() const {
1746 return current_period;
1747 }
1748 int set_current_period(RGWPeriod& period);
1749 void clear_current_period_and_epoch() {
1750 current_period.clear();
1751 epoch = 0;
1752 }
1753 epoch_t get_epoch() const { return epoch; }
1754
1755 string get_control_oid();
1756 /// send a notify on the realm control object
1757 int notify_zone(bufferlist& bl);
1758 /// notify the zone of a new period
1759 int notify_new_period(const RGWPeriod& period);
1760};
1761WRITE_CLASS_ENCODER(RGWRealm)
1762
1763struct RGWPeriodLatestEpochInfo {
1764 epoch_t epoch;
1765
1766 void encode(bufferlist& bl) const {
1767 ENCODE_START(1, 1, bl);
1768 ::encode(epoch, bl);
1769 ENCODE_FINISH(bl);
1770 }
1771
1772 void decode(bufferlist::iterator& bl) {
1773 DECODE_START(1, bl);
1774 ::decode(epoch, bl);
1775 DECODE_FINISH(bl);
1776 }
1777
1778 void dump(Formatter *f) const;
1779 void decode_json(JSONObj *obj);
1780};
1781WRITE_CLASS_ENCODER(RGWPeriodLatestEpochInfo)
1782
1783class RGWPeriod
1784{
1785 string id;
1786 epoch_t epoch;
1787 string predecessor_uuid;
1788 std::vector<std::string> sync_status;
1789 RGWPeriodMap period_map;
1790 RGWPeriodConfig period_config;
1791 string master_zonegroup;
1792 string master_zone;
1793
1794 string realm_id;
1795 string realm_name;
1796 epoch_t realm_epoch{1}; //< realm epoch when period was made current
1797
1798 CephContext *cct;
1799 RGWRados *store;
1800
1801 int read_info();
224ce89b
WB
1802 int read_latest_epoch(RGWPeriodLatestEpochInfo& epoch_info,
1803 RGWObjVersionTracker *objv = nullptr);
7c673cae
FG
1804 int use_latest_epoch();
1805 int use_current_period();
1806
1807 const string get_period_oid();
1808 const string get_period_oid_prefix();
1809
1810 // gather the metadata sync status for each shard; only for use on master zone
1811 int update_sync_status(const RGWPeriod &current_period,
1812 std::ostream& error_stream, bool force_if_stale);
1813
1814public:
1815 RGWPeriod() : epoch(0), cct(NULL), store(NULL) {}
1816
1817 RGWPeriod(const string& period_id, epoch_t _epoch = 0)
1818 : id(period_id), epoch(_epoch),
1819 cct(NULL), store(NULL) {}
1820
1821 const string& get_id() const { return id; }
1822 epoch_t get_epoch() const { return epoch; }
1823 epoch_t get_realm_epoch() const { return realm_epoch; }
1824 const string& get_predecessor() const { return predecessor_uuid; }
1825 const string& get_master_zone() const { return master_zone; }
1826 const string& get_master_zonegroup() const { return master_zonegroup; }
1827 const string& get_realm() const { return realm_id; }
1828 const RGWPeriodMap& get_map() const { return period_map; }
1829 RGWPeriodConfig& get_config() { return period_config; }
1830 const RGWPeriodConfig& get_config() const { return period_config; }
1831 const std::vector<std::string>& get_sync_status() const { return sync_status; }
1832 rgw_pool get_pool(CephContext *cct);
1833 const string& get_latest_epoch_oid();
1834 const string& get_info_oid_prefix();
1835
1836 void set_user_quota(RGWQuotaInfo& user_quota) {
1837 period_config.user_quota = user_quota;
1838 }
1839
1840 void set_bucket_quota(RGWQuotaInfo& bucket_quota) {
1841 period_config.bucket_quota = bucket_quota;
1842 }
1843
1844 void set_id(const string& id) {
1845 this->id = id;
1846 period_map.id = id;
1847 }
1848 void set_epoch(epoch_t epoch) { this->epoch = epoch; }
1849 void set_realm_epoch(epoch_t epoch) { realm_epoch = epoch; }
1850
1851 void set_predecessor(const string& predecessor)
1852 {
1853 predecessor_uuid = predecessor;
1854 }
1855
1856 void set_realm_id(const string& _realm_id) {
1857 realm_id = _realm_id;
1858 }
1859
1860 int reflect();
1861
1862 int get_zonegroup(RGWZoneGroup& zonegroup,
1863 const string& zonegroup_id);
1864
3efd9988 1865 bool is_single_zonegroup() const
224ce89b
WB
1866 {
1867 return (period_map.zonegroups.size() == 1);
1868 }
1869
1870 /*
1871 returns true if there are several zone groups with a least one zone
1872 */
1873 bool is_multi_zonegroups_with_zones()
1874 {
1875 int count = 0;
1876 for (const auto& zg: period_map.zonegroups) {
1877 if (zg.second.zones.size() > 0) {
1878 if (count++ > 0) {
1879 return true;
1880 }
1881 }
1882 }
1883 return false;
1884 }
7c673cae
FG
1885
1886 int get_latest_epoch(epoch_t& epoch);
224ce89b
WB
1887 int set_latest_epoch(epoch_t epoch, bool exclusive = false,
1888 RGWObjVersionTracker *objv = nullptr);
1889 // update latest_epoch if the given epoch is higher, else return -EEXIST
1890 int update_latest_epoch(epoch_t epoch);
7c673cae
FG
1891
1892 int init(CephContext *_cct, RGWRados *_store, const string &period_realm_id, const string &period_realm_name = "",
1893 bool setup_obj = true);
1894 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true);
7c673cae
FG
1895
1896 int create(bool exclusive = true);
1897 int delete_obj();
1898 int store_info(bool exclusive);
1899 int add_zonegroup(const RGWZoneGroup& zonegroup);
1900
1901 void fork();
1902 int update();
1903
1904 // commit a staging period; only for use on master zone
1905 int commit(RGWRealm& realm, const RGWPeriod &current_period,
1906 std::ostream& error_stream, bool force_if_stale = false);
1907
1908 void encode(bufferlist& bl) const {
1909 ENCODE_START(1, 1, bl);
1910 ::encode(id, bl);
1911 ::encode(epoch, bl);
1912 ::encode(realm_epoch, bl);
1913 ::encode(predecessor_uuid, bl);
1914 ::encode(sync_status, bl);
1915 ::encode(period_map, bl);
1916 ::encode(master_zone, bl);
1917 ::encode(master_zonegroup, bl);
1918 ::encode(period_config, bl);
1919 ::encode(realm_id, bl);
1920 ::encode(realm_name, bl);
1921 ENCODE_FINISH(bl);
1922 }
1923
1924 void decode(bufferlist::iterator& bl) {
1925 DECODE_START(1, bl);
1926 ::decode(id, bl);
1927 ::decode(epoch, bl);
1928 ::decode(realm_epoch, bl);
1929 ::decode(predecessor_uuid, bl);
1930 ::decode(sync_status, bl);
1931 ::decode(period_map, bl);
1932 ::decode(master_zone, bl);
1933 ::decode(master_zonegroup, bl);
1934 ::decode(period_config, bl);
1935 ::decode(realm_id, bl);
1936 ::decode(realm_name, bl);
1937 DECODE_FINISH(bl);
1938 }
1939 void dump(Formatter *f) const;
1940 void decode_json(JSONObj *obj);
1941
1942 static string get_staging_id(const string& realm_id) {
1943 return realm_id + ":staging";
1944 }
1945};
1946WRITE_CLASS_ENCODER(RGWPeriod)
1947
1948class RGWDataChangesLog;
1949class RGWMetaSyncStatusManager;
1950class RGWDataSyncStatusManager;
1951class RGWReplicaLogger;
1952class RGWCoroutinesManagerRegistry;
1953
1954class RGWStateLog {
1955 RGWRados *store;
1956 int num_shards;
1957 string module_name;
1958
1959 void oid_str(int shard, string& oid);
1960 int get_shard_num(const string& object);
1961 string get_oid(const string& object);
1962 int open_ioctx(librados::IoCtx& ioctx);
1963
1964 struct list_state {
1965 int cur_shard;
1966 int max_shard;
1967 string marker;
1968 string client_id;
1969 string op_id;
1970 string object;
1971
1972 list_state() : cur_shard(0), max_shard(0) {}
1973 };
1974
1975protected:
1976 virtual bool dump_entry_internal(const cls_statelog_entry& entry, Formatter *f) {
1977 return false;
1978 }
1979
1980public:
1981 RGWStateLog(RGWRados *_store, int _num_shards, const string& _module_name) :
1982 store(_store), num_shards(_num_shards), module_name(_module_name) {}
1983 virtual ~RGWStateLog() {}
1984
1985 int store_entry(const string& client_id, const string& op_id, const string& object,
1986 uint32_t state, bufferlist *bl, uint32_t *check_state);
1987
1988 int remove_entry(const string& client_id, const string& op_id, const string& object);
1989
1990 void init_list_entries(const string& client_id, const string& op_id, const string& object,
1991 void **handle);
1992
1993 int list_entries(void *handle, int max_entries, list<cls_statelog_entry>& entries, bool *done);
1994
1995 void finish_list_entries(void *handle);
1996
1997 virtual void dump_entry(const cls_statelog_entry& entry, Formatter *f);
1998};
1999
2000/*
2001 * state transitions:
2002 *
2003 * unknown -> in-progress -> complete
2004 * -> error
2005 *
2006 * user can try setting the 'abort' state, and it can only succeed if state is
2007 * in-progress.
2008 *
2009 * state renewal cannot switch state (stays in the same state)
2010 *
2011 * rgw can switch from in-progress to complete
2012 * rgw can switch from in-progress to error
2013 *
2014 * rgw can switch from abort to cancelled
2015 *
2016 */
2017
2018class RGWOpState : public RGWStateLog {
2019protected:
2020 bool dump_entry_internal(const cls_statelog_entry& entry, Formatter *f) override;
2021public:
2022
2023 enum OpState {
2024 OPSTATE_UNKNOWN = 0,
2025 OPSTATE_IN_PROGRESS = 1,
2026 OPSTATE_COMPLETE = 2,
2027 OPSTATE_ERROR = 3,
2028 OPSTATE_ABORT = 4,
2029 OPSTATE_CANCELLED = 5,
2030 };
2031
2032 explicit RGWOpState(RGWRados *_store);
2033
2034 int state_from_str(const string& s, OpState *state);
2035 int set_state(const string& client_id, const string& op_id, const string& object, OpState state);
2036 int renew_state(const string& client_id, const string& op_id, const string& object, OpState state);
2037};
2038
2039class RGWOpStateSingleOp
2040{
2041 RGWOpState os;
2042 string client_id;
2043 string op_id;
2044 string object;
2045
2046 CephContext *cct;
2047
2048 RGWOpState::OpState cur_state;
2049 ceph::real_time last_update;
2050
2051public:
2052 RGWOpStateSingleOp(RGWRados *store, const string& cid, const string& oid, const string& obj);
2053
2054 int set_state(RGWOpState::OpState state);
2055 int renew_state();
2056};
2057
2058class RGWGetBucketStats_CB : public RefCountedObject {
2059protected:
2060 rgw_bucket bucket;
2061 map<RGWObjCategory, RGWStorageStats> *stats;
2062public:
224ce89b 2063 explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
7c673cae
FG
2064 ~RGWGetBucketStats_CB() override {}
2065 virtual void handle_response(int r) = 0;
2066 virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
2067 stats = _stats;
2068 }
2069};
2070
2071class RGWGetUserStats_CB : public RefCountedObject {
2072protected:
2073 rgw_user user;
2074 RGWStorageStats stats;
2075public:
2076 explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
2077 ~RGWGetUserStats_CB() override {}
2078 virtual void handle_response(int r) = 0;
2079 virtual void set_response(RGWStorageStats& _stats) {
2080 stats = _stats;
2081 }
2082};
2083
2084class RGWGetDirHeader_CB;
2085class RGWGetUserHeader_CB;
2086
2087struct rgw_rados_ref {
2088 rgw_pool pool;
2089 string oid;
2090 string key;
2091 librados::IoCtx ioctx;
2092};
2093
2094class RGWChainedCache {
2095public:
2096 virtual ~RGWChainedCache() {}
2097 virtual void chain_cb(const string& key, void *data) = 0;
2098 virtual void invalidate(const string& key) = 0;
2099 virtual void invalidate_all() = 0;
2100
2101 struct Entry {
2102 RGWChainedCache *cache;
2103 const string& key;
2104 void *data;
2105
2106 Entry(RGWChainedCache *_c, const string& _k, void *_d) : cache(_c), key(_k), data(_d) {}
2107 };
2108};
2109
2110template <class T, class S>
2111class RGWObjectCtxImpl {
2112 RGWRados *store;
2113 std::map<T, S> objs_state;
2114 RWLock lock;
2115
2116public:
2117 RGWObjectCtxImpl(RGWRados *_store) : store(_store), lock("RGWObjectCtxImpl") {}
2118
2119 S *get_state(const T& obj) {
2120 S *result;
2121 typename std::map<T, S>::iterator iter;
2122 lock.get_read();
2123 assert (!obj.empty());
2124 iter = objs_state.find(obj);
2125 if (iter != objs_state.end()) {
2126 result = &iter->second;
2127 lock.unlock();
2128 } else {
2129 lock.unlock();
2130 lock.get_write();
2131 result = &objs_state[obj];
2132 lock.unlock();
2133 }
2134 return result;
2135 }
2136
2137 void set_atomic(T& obj) {
2138 RWLock::WLocker wl(lock);
2139 assert (!obj.empty());
2140 objs_state[obj].is_atomic = true;
2141 }
2142 void set_prefetch_data(T& obj) {
2143 RWLock::WLocker wl(lock);
2144 assert (!obj.empty());
2145 objs_state[obj].prefetch_data = true;
2146 }
2147 void invalidate(T& obj) {
2148 RWLock::WLocker wl(lock);
2149 auto iter = objs_state.find(obj);
2150 if (iter == objs_state.end()) {
2151 return;
2152 }
2153 bool is_atomic = iter->second.is_atomic;
2154 bool prefetch_data = iter->second.prefetch_data;
2155
2156 objs_state.erase(iter);
2157
2158 if (is_atomic || prefetch_data) {
2159 auto& s = objs_state[obj];
2160 s.is_atomic = is_atomic;
2161 s.prefetch_data = prefetch_data;
2162 }
2163 }
2164};
2165
2166template<>
2167void RGWObjectCtxImpl<rgw_obj, RGWObjState>::invalidate(rgw_obj& obj);
2168
2169template<>
2170void RGWObjectCtxImpl<rgw_raw_obj, RGWRawObjState>::invalidate(rgw_raw_obj& obj);
2171
2172struct RGWObjectCtx {
2173 RGWRados *store;
2174 void *user_ctx;
2175
2176 RGWObjectCtxImpl<rgw_obj, RGWObjState> obj;
2177 RGWObjectCtxImpl<rgw_raw_obj, RGWRawObjState> raw;
2178
2179 explicit RGWObjectCtx(RGWRados *_store) : store(_store), user_ctx(NULL), obj(store), raw(store) { }
2180 RGWObjectCtx(RGWRados *_store, void *_user_ctx) : store(_store), user_ctx(_user_ctx), obj(store), raw(store) { }
2181};
2182
2183class Finisher;
2184class RGWAsyncRadosProcessor;
2185
2186template <class T>
2187class RGWChainedCacheImpl;
2188
2189struct bucket_info_entry {
2190 RGWBucketInfo info;
2191 real_time mtime;
2192 map<string, bufferlist> attrs;
2193};
2194
2195struct tombstone_entry {
2196 ceph::real_time mtime;
2197 uint32_t zone_short_id;
2198 uint64_t pg_ver;
2199
2200 tombstone_entry() = default;
2201 tombstone_entry(const RGWObjState& state)
2202 : mtime(state.mtime), zone_short_id(state.zone_short_id),
2203 pg_ver(state.pg_ver) {}
2204};
2205
31f18b77
FG
2206class RGWIndexCompletionManager;
2207
3a9019d9 2208class RGWRados : public AdminSocketHook
7c673cae
FG
2209{
2210 friend class RGWGC;
2211 friend class RGWMetaNotifier;
2212 friend class RGWDataNotifier;
2213 friend class RGWLC;
2214 friend class RGWObjectExpirer;
2215 friend class RGWMetaSyncProcessorThread;
2216 friend class RGWDataSyncProcessorThread;
2217 friend class RGWStateLog;
2218 friend class RGWReplicaLogger;
31f18b77
FG
2219 friend class RGWReshard;
2220 friend class RGWBucketReshard;
2221 friend class BucketIndexLockGuard;
d2e6a577 2222 friend class RGWCompleteMultipart;
7c673cae 2223
3a9019d9
FG
2224 static const char* admin_commands[4][3];
2225
7c673cae
FG
2226 /** Open the pool used as root for this gateway */
2227 int open_root_pool_ctx();
2228 int open_gc_pool_ctx();
2229 int open_lc_pool_ctx();
2230 int open_objexp_pool_ctx();
31f18b77 2231 int open_reshard_pool_ctx();
7c673cae
FG
2232
2233 int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx);
2234 int open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx);
2235 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid);
2236 int open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2237 string& bucket_oid_base);
2238 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2239 const string& obj_key, string *bucket_obj, int *shard_id);
2240 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2241 int shard_id, string *bucket_obj);
2242 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2243 map<int, string>& bucket_objs, int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
2244 template<typename T>
2245 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2246 map<int, string>& oids, map<int, T>& bucket_objs,
2247 int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
2248 void build_bucket_index_marker(const string& shard_id_str, const string& shard_marker,
2249 string *marker);
2250
2251 void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result);
2252
2253 std::atomic<int64_t> max_req_id = { 0 };
2254 Mutex lock;
2255 Mutex watchers_lock;
2256 SafeTimer *timer;
2257
2258 RGWGC *gc;
2259 RGWLC *lc;
2260 RGWObjectExpirer *obj_expirer;
2261 bool use_gc_thread;
2262 bool use_lc_thread;
2263 bool quota_threads;
2264 bool run_sync_thread;
31f18b77 2265 bool run_reshard_thread;
7c673cae
FG
2266
2267 RGWAsyncRadosProcessor* async_rados;
2268
2269 RGWMetaNotifier *meta_notifier;
2270 RGWDataNotifier *data_notifier;
2271 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
2272 map<string, RGWDataSyncProcessorThread *> data_sync_processor_threads;
2273
b32b8144 2274 boost::optional<rgw::BucketTrimManager> bucket_trim;
7c673cae
FG
2275 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
2276
2277 Mutex meta_sync_thread_lock;
2278 Mutex data_sync_thread_lock;
2279
2280 int num_watchers;
2281 RGWWatcher **watchers;
2282 std::set<int> watchers_set;
2283 librados::IoCtx root_pool_ctx; // .rgw
2284 librados::IoCtx control_pool_ctx; // .rgw.control
2285 bool watch_initialized;
2286
2287 friend class RGWWatcher;
2288
2289 Mutex bucket_id_lock;
2290
2291 // This field represents the number of bucket index object shards
2292 uint32_t bucket_index_max_shards;
2293
2294 int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
2295 int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
224ce89b 2296 int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae
FG
2297 uint64_t max_bucket_id;
2298
2299 int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2300 RGWObjState *olh_state, RGWObjState **target_state);
2301 int get_system_obj_state_impl(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker);
2302 int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
2303 bool follow_olh, bool assume_noent = false);
2304 int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2305 librados::ObjectOperation& op, RGWObjState **state);
2306
2307 int update_placement_map();
2308 int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
2309
2310 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
2311 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
2312 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
2313protected:
2314 CephContext *cct;
2315
2316 std::vector<librados::Rados> rados;
2317 uint32_t next_rados_handle;
2318 RWLock handle_lock;
2319 std::map<pthread_t, int> rados_map;
2320
2321 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
2322 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
2323
2324 using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
2325 tombstone_cache_t *obj_tombstone_cache;
2326
2327 librados::IoCtx gc_pool_ctx; // .rgw.gc
2328 librados::IoCtx lc_pool_ctx; // .rgw.lc
2329 librados::IoCtx objexp_pool_ctx;
31f18b77 2330 librados::IoCtx reshard_pool_ctx;
7c673cae
FG
2331
2332 bool pools_initialized;
2333
7c673cae
FG
2334 string trans_id_suffix;
2335
2336 RGWQuotaHandler *quota_handler;
2337
2338 Finisher *finisher;
31f18b77 2339
7c673cae
FG
2340 RGWCoroutinesManagerRegistry *cr_registry;
2341
2342 RGWSyncModulesManager *sync_modules_manager{nullptr};
2343 RGWSyncModuleInstanceRef sync_module;
2344 bool writeable_zone{false};
2345
2346 RGWZoneGroup zonegroup;
2347 RGWZone zone_public_config; /* external zone params, e.g., entrypoints, log flags, etc. */
2348 RGWZoneParams zone_params; /* internal zone params, e.g., rados pools */
2349 uint32_t zone_short_id;
2350
2351 RGWPeriod current_period;
31f18b77
FG
2352
2353 RGWIndexCompletionManager *index_completion_manager{nullptr};
7c673cae
FG
2354public:
2355 RGWRados() : lock("rados_timer_lock"), watchers_lock("watchers_lock"), timer(NULL),
2356 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
31f18b77 2357 run_sync_thread(false), run_reshard_thread(false), async_rados(nullptr), meta_notifier(NULL),
7c673cae
FG
2358 data_notifier(NULL), meta_sync_processor_thread(NULL),
2359 meta_sync_thread_lock("meta_sync_thread_lock"), data_sync_thread_lock("data_sync_thread_lock"),
2360 num_watchers(0), watchers(NULL),
2361 watch_initialized(false),
2362 bucket_id_lock("rados_bucket_id"),
2363 bucket_index_max_shards(0),
2364 max_bucket_id(0), cct(NULL),
2365 next_rados_handle(0),
2366 handle_lock("rados_handle_lock"),
2367 binfo_cache(NULL), obj_tombstone_cache(nullptr),
2368 pools_initialized(false),
2369 quota_handler(NULL),
2370 finisher(NULL),
2371 cr_registry(NULL),
2372 zone_short_id(0),
2373 rest_master_conn(NULL),
31f18b77 2374 meta_mgr(NULL), data_log(NULL), reshard(NULL) {}
7c673cae
FG
2375
2376 uint64_t get_new_req_id() {
2377 return ++max_req_id;
2378 }
2379
2380 librados::IoCtx* get_lc_pool_ctx() {
2381 return &lc_pool_ctx;
2382 }
2383 void set_context(CephContext *_cct) {
2384 cct = _cct;
2385 }
2386
2387 /**
2388 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
2389 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
2390 */
2391 void init_host_id() {
2392 /* uint64_t needs 16, two '-' separators and a trailing null */
2393 const string& zone_name = get_zone().name;
2394 const string& zonegroup_name = zonegroup.get_name();
2395 char charbuf[16 + zone_name.size() + zonegroup_name.size() + 2 + 1];
2396 snprintf(charbuf, sizeof(charbuf), "%llx-%s-%s", (unsigned long long)instance_id(), zone_name.c_str(), zonegroup_name.c_str());
2397 string s(charbuf);
2398 host_id = s;
2399 }
2400
2401 string host_id;
2402
2403 RGWRealm realm;
2404
2405 RGWRESTConn *rest_master_conn;
2406 map<string, RGWRESTConn *> zone_conn_map;
2407 map<string, RGWRESTConn *> zone_data_sync_from_map;
2408 map<string, RGWRESTConn *> zone_data_notify_to_map;
2409 map<string, RGWRESTConn *> zonegroup_conn_map;
2410
2411 map<string, string> zone_id_by_name;
2412 map<string, RGWZone> zone_by_id;
2413
2414 RGWRESTConn *get_zone_conn_by_id(const string& id) {
2415 auto citer = zone_conn_map.find(id);
2416 if (citer == zone_conn_map.end()) {
2417 return NULL;
2418 }
2419
2420 return citer->second;
2421 }
2422
2423 RGWRESTConn *get_zone_conn_by_name(const string& name) {
2424 auto i = zone_id_by_name.find(name);
2425 if (i == zone_id_by_name.end()) {
2426 return NULL;
2427 }
2428
2429 return get_zone_conn_by_id(i->second);
2430 }
2431
2432 bool find_zone_id_by_name(const string& name, string *id) {
2433 auto i = zone_id_by_name.find(name);
2434 if (i == zone_id_by_name.end()) {
2435 return false;
2436 }
2437 *id = i->second;
2438 return true;
2439 }
2440
2441 int get_zonegroup(const string& id, RGWZoneGroup& zonegroup) {
2442 int ret = 0;
2443 if (id == get_zonegroup().get_id()) {
2444 zonegroup = get_zonegroup();
2445 } else if (!current_period.get_id().empty()) {
2446 ret = current_period.get_zonegroup(zonegroup, id);
2447 }
2448 return ret;
2449 }
2450
2451 RGWRealm& get_realm() {
2452 return realm;
2453 }
2454
2455 RGWZoneParams& get_zone_params() { return zone_params; }
2456 RGWZoneGroup& get_zonegroup() {
2457 return zonegroup;
2458 }
2459 RGWZone& get_zone() {
2460 return zone_public_config;
2461 }
2462
2463 bool zone_is_writeable() {
2464 return writeable_zone && !get_zone().is_read_only();
2465 }
2466
2467 uint32_t get_zone_short_id() const {
2468 return zone_short_id;
2469 }
2470
2471 bool zone_syncs_from(RGWZone& target_zone, RGWZone& source_zone);
2472
2473 const RGWQuotaInfo& get_bucket_quota() {
2474 return current_period.get_config().bucket_quota;
2475 }
2476
2477 const RGWQuotaInfo& get_user_quota() {
2478 return current_period.get_config().user_quota;
2479 }
2480
2481 const string& get_current_period_id() {
2482 return current_period.get_id();
2483 }
31f18b77
FG
2484
2485 bool has_zonegroup_api(const std::string& api) const {
2486 if (!current_period.get_id().empty()) {
2487 const auto& zonegroups_by_api = current_period.get_map().zonegroups_by_api;
2488 if (zonegroups_by_api.find(api) != zonegroups_by_api.end())
2489 return true;
2490 }
2491 return false;
2492 }
2493
7c673cae
FG
2494 // pulls missing periods for period_history
2495 std::unique_ptr<RGWPeriodPuller> period_puller;
2496 // maintains a connected history of periods
2497 std::unique_ptr<RGWPeriodHistory> period_history;
2498
2499 RGWAsyncRadosProcessor* get_async_rados() const { return async_rados; };
2500
2501 RGWMetadataManager *meta_mgr;
2502
2503 RGWDataChangesLog *data_log;
2504
31f18b77
FG
2505 RGWReshard *reshard;
2506 std::shared_ptr<RGWReshardWait> reshard_wait;
2507
7c673cae
FG
2508 virtual ~RGWRados() = default;
2509
2510 tombstone_cache_t *get_tombstone_cache() {
2511 return obj_tombstone_cache;
2512 }
2513
2514 RGWSyncModulesManager *get_sync_modules_manager() {
2515 return sync_modules_manager;
2516 }
2517 const RGWSyncModuleInstanceRef& get_sync_module() {
2518 return sync_module;
2519 }
2520
2521 int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
2522 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size);
2523 int get_max_chunk_size(const string& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size);
2524
2525 uint32_t get_max_bucket_shards() {
31f18b77 2526 return rgw_shards_max();
7c673cae
FG
2527 }
2528
181888fb 2529
224ce89b 2530 int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae 2531
181888fb
FG
2532 int list_raw_objects_init(const rgw_pool& pool, const string& marker, RGWListRawObjsCtx *ctx);
2533 int list_raw_objects_next(const string& prefix_filter, int max,
2534 RGWListRawObjsCtx& ctx, list<string>& oids,
2535 bool *is_truncated);
7c673cae
FG
2536 int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
2537 RGWListRawObjsCtx& ctx, list<string>& oids,
2538 bool *is_truncated);
181888fb 2539 string list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx);
7c673cae
FG
2540
2541 int list_raw_prefixed_objs(const rgw_pool& pool, const string& prefix, list<string>& result);
2542 int list_zonegroups(list<string>& zonegroups);
2543 int list_regions(list<string>& regions);
2544 int list_zones(list<string>& zones);
2545 int list_realms(list<string>& realms);
2546 int list_periods(list<string>& periods);
2547 int list_periods(const string& current_period, list<string>& periods);
2548 void tick();
2549
2550 CephContext *ctx() { return cct; }
2551 /** do all necessary setup of the storage device */
31f18b77 2552 int initialize(CephContext *_cct, bool _use_gc_thread, bool _use_lc_thread, bool _quota_threads, bool _run_sync_thread, bool _run_reshard_thread) {
7c673cae
FG
2553 set_context(_cct);
2554 use_gc_thread = _use_gc_thread;
2555 use_lc_thread = _use_lc_thread;
2556 quota_threads = _quota_threads;
2557 run_sync_thread = _run_sync_thread;
31f18b77 2558 run_reshard_thread = _run_reshard_thread;
7c673cae
FG
2559 return initialize();
2560 }
2561 /** Initialize the RADOS instance and prepare to do other ops */
2562 virtual int init_rados();
2563 int init_zg_from_period(bool *initialized);
2564 int init_zg_from_local(bool *creating_defaults);
2565 int init_complete();
2566 int replace_region_with_zonegroup();
2567 int convert_regionmap();
2568 int initialize();
2569 void finalize();
2570
224ce89b
WB
2571 int register_to_service_map(const string& daemon_type, const map<string, string>& meta);
2572
7c673cae
FG
2573 void schedule_context(Context *c);
2574
2575 /** set up a bucket listing. handle is filled in. */
2576 int list_buckets_init(RGWAccessHandle *handle);
2577 /**
2578 * get the next bucket in the listing. obj is filled in,
2579 * handle is updated.
2580 */
2581 int list_buckets_next(rgw_bucket_dir_entry& obj, RGWAccessHandle *handle);
2582
2583 /// list logs
2584 int log_list_init(const string& prefix, RGWAccessHandle *handle);
2585 int log_list_next(RGWAccessHandle handle, string *name);
2586
2587 /// remove log
2588 int log_remove(const string& name);
2589
2590 /// show log
2591 int log_show_init(const string& name, RGWAccessHandle *handle);
2592 int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
2593
2594 // log bandwidth info
2595 int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
2596 int read_usage(const rgw_user& user, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
2597 bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage);
2598 int trim_usage(rgw_user& user, uint64_t start_epoch, uint64_t end_epoch);
2599
2600 int create_pool(const rgw_pool& pool);
2601
7c673cae
FG
2602 int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
2603 int select_bucket_placement(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
2604 string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
2605 int select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info);
2606 int select_new_bucket_location(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
2607 string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
2608 int select_bucket_location_by_rule(const string& location_rule, RGWZonePlacementInfo *rule_info);
2609 void create_bucket_id(string *bucket_id);
2610
2611 bool get_obj_data_pool(const string& placement_rule, const rgw_obj& obj, rgw_pool *pool);
2612 bool obj_to_raw(const string& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
2613
2614 int create_bucket(RGWUserInfo& owner, rgw_bucket& bucket,
2615 const string& zonegroup_id,
2616 const string& placement_rule,
2617 const string& swift_ver_location,
2618 const RGWQuotaInfo * pquota_info,
2619 map<std::string,bufferlist>& attrs,
2620 RGWBucketInfo& bucket_info,
2621 obj_version *pobjv,
2622 obj_version *pep_objv,
2623 ceph::real_time creation_time,
2624 rgw_bucket *master_bucket,
2625 uint32_t *master_num_shards,
2626 bool exclusive = true);
2627 int add_bucket_placement(const rgw_pool& new_pool);
2628 int remove_bucket_placement(const rgw_pool& new_pool);
2629 int list_placement_set(set<rgw_pool>& names);
2630 int create_pools(vector<rgw_pool>& pools, vector<int>& retcodes);
2631
2632 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
2633
2634 class SystemObject {
2635 RGWRados *store;
2636 RGWObjectCtx& ctx;
2637 rgw_raw_obj obj;
2638
2639 RGWObjState *state;
2640
2641 protected:
2642 int get_state(RGWRawObjState **pstate, RGWObjVersionTracker *objv_tracker);
2643
2644 public:
2645 SystemObject(RGWRados *_store, RGWObjectCtx& _ctx, rgw_raw_obj& _obj) : store(_store), ctx(_ctx), obj(_obj), state(NULL) {}
2646
2647 void invalidate_state();
2648
2649 RGWRados *get_store() { return store; }
2650 rgw_raw_obj& get_obj() { return obj; }
2651 RGWObjectCtx& get_ctx() { return ctx; }
2652
2653 struct Read {
2654 RGWRados::SystemObject *source;
2655
2656 struct GetObjState {
2657 rgw_rados_ref ref;
2658 bool has_ref{false};
2659 uint64_t last_ver{0};
2660
2661 GetObjState() {}
2662
2663 int get_ref(RGWRados *store, rgw_raw_obj& obj, rgw_rados_ref **pref);
2664 } state;
2665
2666 struct StatParams {
2667 ceph::real_time *lastmod;
2668 uint64_t *obj_size;
2669 map<string, bufferlist> *attrs;
7c673cae 2670
31f18b77 2671 StatParams() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
7c673cae
FG
2672 } stat_params;
2673
2674 struct ReadParams {
224ce89b 2675 rgw_cache_entry_info *cache_info{nullptr};
7c673cae
FG
2676 map<string, bufferlist> *attrs;
2677
2678 ReadParams() : attrs(NULL) {}
2679 } read_params;
2680
2681 explicit Read(RGWRados::SystemObject *_source) : source(_source) {}
2682
2683 int stat(RGWObjVersionTracker *objv_tracker);
b32b8144
FG
2684 int read(int64_t ofs, int64_t end, bufferlist& bl, RGWObjVersionTracker *objv_tracker,
2685 boost::optional<obj_version> refresh_version = boost::none);
7c673cae
FG
2686 int get_attr(const char *name, bufferlist& dest);
2687 };
2688 };
2689
2690 struct BucketShard {
2691 RGWRados *store;
2692 rgw_bucket bucket;
2693 int shard_id;
2694 librados::IoCtx index_ctx;
2695 string bucket_obj;
2696
2697 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
2698 int init(const rgw_bucket& _bucket, const rgw_obj& obj);
2699 int init(const rgw_bucket& _bucket, int sid);
b32b8144 2700 int init(const RGWBucketInfo& bucket_info, int sid);
7c673cae
FG
2701 };
2702
2703 class Object {
2704 RGWRados *store;
2705 RGWBucketInfo bucket_info;
2706 RGWObjectCtx& ctx;
2707 rgw_obj obj;
2708
2709 BucketShard bs;
2710
2711 RGWObjState *state;
2712
2713 bool versioning_disabled;
2714
2715 bool bs_initialized;
2716
2717 protected:
2718 int get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent = false);
2719 void invalidate_state();
2720
2721 int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
181888fb 2722 const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail);
7c673cae
FG
2723 int complete_atomic_modification();
2724
2725 public:
2726 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
2727 ctx(_ctx), obj(_obj), bs(store),
2728 state(NULL), versioning_disabled(false),
2729 bs_initialized(false) {}
2730
2731 RGWRados *get_store() { return store; }
2732 rgw_obj& get_obj() { return obj; }
2733 RGWObjectCtx& get_ctx() { return ctx; }
2734 RGWBucketInfo& get_bucket_info() { return bucket_info; }
2735 int get_manifest(RGWObjManifest **pmanifest);
2736
2737 int get_bucket_shard(BucketShard **pbs) {
2738 if (!bs_initialized) {
2739 int r = bs.init(bucket_info.bucket, obj);
2740 if (r < 0) {
2741 return r;
2742 }
2743 bs_initialized = true;
2744 }
2745 *pbs = &bs;
2746 return 0;
2747 }
2748
2749 void set_versioning_disabled(bool status) {
2750 versioning_disabled = status;
2751 }
2752
2753 bool versioning_enabled() {
2754 return (!versioning_disabled && bucket_info.versioning_enabled());
2755 }
2756
2757 struct Read {
2758 RGWRados::Object *source;
2759
2760 struct GetObjState {
2761 librados::IoCtx io_ctx;
2762 rgw_obj obj;
2763 rgw_raw_obj head_obj;
2764 } state;
2765
2766 struct ConditionParams {
2767 const ceph::real_time *mod_ptr;
2768 const ceph::real_time *unmod_ptr;
2769 bool high_precision_time;
2770 uint32_t mod_zone_id;
2771 uint64_t mod_pg_ver;
2772 const char *if_match;
2773 const char *if_nomatch;
2774
2775 ConditionParams() :
2776 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
2777 if_match(NULL), if_nomatch(NULL) {}
2778 } conds;
2779
2780 struct Params {
2781 ceph::real_time *lastmod;
2782 uint64_t *obj_size;
2783 map<string, bufferlist> *attrs;
7c673cae 2784
31f18b77 2785 Params() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
7c673cae
FG
2786 } params;
2787
2788 explicit Read(RGWRados::Object *_source) : source(_source) {}
2789
2790 int prepare();
2791 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
2792 int read(int64_t ofs, int64_t end, bufferlist& bl);
2793 int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb);
2794 int get_attr(const char *name, bufferlist& dest);
2795 };
2796
2797 struct Write {
2798 RGWRados::Object *target;
2799
2800 struct MetaParams {
2801 ceph::real_time *mtime;
2802 map<std::string, bufferlist>* rmattrs;
2803 const bufferlist *data;
2804 RGWObjManifest *manifest;
2805 const string *ptag;
2806 list<rgw_obj_index_key> *remove_objs;
2807 ceph::real_time set_mtime;
2808 rgw_user owner;
2809 RGWObjCategory category;
2810 int flags;
2811 const char *if_match;
2812 const char *if_nomatch;
2813 uint64_t olh_epoch;
2814 ceph::real_time delete_at;
2815 bool canceled;
2816 const string *user_data;
31f18b77 2817 rgw_zone_set *zones_trace;
181888fb 2818 bool modify_tail;
3efd9988 2819 bool completeMultipart;
7c673cae
FG
2820
2821 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
2822 remove_objs(NULL), category(RGW_OBJ_CATEGORY_MAIN), flags(0),
181888fb 2823 if_match(NULL), if_nomatch(NULL), olh_epoch(0), canceled(false), user_data(nullptr), zones_trace(nullptr),
3efd9988 2824 modify_tail(false), completeMultipart(false) {}
7c673cae
FG
2825 } meta;
2826
2827 explicit Write(RGWRados::Object *_target) : target(_target) {}
2828
2829 int _do_write_meta(uint64_t size, uint64_t accounted_size,
2830 map<std::string, bufferlist>& attrs,
181888fb 2831 bool modify_tail, bool assume_noent,
7c673cae
FG
2832 void *index_op);
2833 int write_meta(uint64_t size, uint64_t accounted_size,
2834 map<std::string, bufferlist>& attrs);
2835 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
2836 };
2837
2838 struct Delete {
2839 RGWRados::Object *target;
2840
2841 struct DeleteParams {
2842 rgw_user bucket_owner;
2843 int versioning_status;
2844 ACLOwner obj_owner; /* needed for creation of deletion marker */
2845 uint64_t olh_epoch;
2846 string marker_version_id;
2847 uint32_t bilog_flags;
2848 list<rgw_obj_index_key> *remove_objs;
2849 ceph::real_time expiration_time;
2850 ceph::real_time unmod_since;
2851 ceph::real_time mtime; /* for setting delete marker mtime */
2852 bool high_precision_time;
31f18b77 2853 rgw_zone_set *zones_trace;
7c673cae 2854
31f18b77 2855 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr) {}
7c673cae
FG
2856 } params;
2857
2858 struct DeleteResult {
2859 bool delete_marker;
2860 string version_id;
2861
2862 DeleteResult() : delete_marker(false) {}
2863 } result;
2864
2865 explicit Delete(RGWRados::Object *_target) : target(_target) {}
2866
2867 int delete_obj();
2868 };
2869
2870 struct Stat {
2871 RGWRados::Object *source;
2872
2873 struct Result {
2874 rgw_obj obj;
2875 RGWObjManifest manifest;
2876 bool has_manifest;
2877 uint64_t size;
2878 struct timespec mtime;
2879 map<string, bufferlist> attrs;
2880
2881 Result() : has_manifest(false), size(0) {}
2882 } result;
2883
2884 struct State {
2885 librados::IoCtx io_ctx;
2886 librados::AioCompletion *completion;
2887 int ret;
2888
2889 State() : completion(NULL), ret(0) {}
2890 } state;
2891
2892
2893 explicit Stat(RGWRados::Object *_source) : source(_source) {}
2894
2895 int stat_async();
2896 int wait();
2897 int stat();
2898 private:
2899 int finish();
2900 };
2901 };
2902
2903 class Bucket {
2904 RGWRados *store;
2905 RGWBucketInfo bucket_info;
2906 rgw_bucket& bucket;
2907 int shard_id;
2908
2909 public:
2910 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
2911 shard_id(RGW_NO_SHARD) {}
2912 RGWRados *get_store() { return store; }
2913 rgw_bucket& get_bucket() { return bucket; }
2914 RGWBucketInfo& get_bucket_info() { return bucket_info; }
2915
31f18b77
FG
2916 int update_bucket_id(const string& new_bucket_id);
2917
7c673cae
FG
2918 int get_shard_id() { return shard_id; }
2919 void set_shard_id(int id) {
2920 shard_id = id;
2921 }
2922
2923 class UpdateIndex {
2924 RGWRados::Bucket *target;
2925 string optag;
2926 rgw_obj obj;
2927 uint16_t bilog_flags{0};
2928 BucketShard bs;
2929 bool bs_initialized{false};
2930 bool blind;
2931 bool prepared{false};
31f18b77
FG
2932 rgw_zone_set *zones_trace{nullptr};
2933
2934 int init_bs() {
2935 int r = bs.init(target->get_bucket(), obj);
2936 if (r < 0) {
2937 return r;
2938 }
2939 bs_initialized = true;
2940 return 0;
2941 }
2942
2943 void invalidate_bs() {
2944 bs_initialized = false;
2945 }
2946
2947 int guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call);
7c673cae
FG
2948 public:
2949
2950 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
2951 bs(target->get_store()) {
2952 blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
2953 }
2954
2955 int get_bucket_shard(BucketShard **pbs) {
2956 if (!bs_initialized) {
31f18b77 2957 int r = init_bs();
7c673cae
FG
2958 if (r < 0) {
2959 return r;
2960 }
7c673cae
FG
2961 }
2962 *pbs = &bs;
2963 return 0;
2964 }
2965
2966 void set_bilog_flags(uint16_t flags) {
2967 bilog_flags = flags;
2968 }
31f18b77
FG
2969
2970 void set_zones_trace(rgw_zone_set *_zones_trace) {
2971 zones_trace = _zones_trace;
2972 }
7c673cae
FG
2973
2974 int prepare(RGWModifyOp, const string *write_tag);
2975 int complete(int64_t poolid, uint64_t epoch, uint64_t size,
2976 uint64_t accounted_size, ceph::real_time& ut,
2977 const string& etag, const string& content_type,
2978 bufferlist *acl_bl, RGWObjCategory category,
2979 list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr);
2980 int complete_del(int64_t poolid, uint64_t epoch,
2981 ceph::real_time& removed_mtime, /* mtime of removed object */
2982 list<rgw_obj_index_key> *remove_objs);
2983 int cancel();
2984
2985 const string *get_optag() { return &optag; }
2986
2987 bool is_prepared() { return prepared; }
2988 };
2989
2990 struct List {
2991 RGWRados::Bucket *target;
2992 rgw_obj_key next_marker;
2993
2994 struct Params {
2995 string prefix;
2996 string delim;
2997 rgw_obj_key marker;
2998 rgw_obj_key end_marker;
2999 string ns;
3000 bool enforce_ns;
3001 RGWAccessListFilter *filter;
3002 bool list_versions;
3003
3004 Params() : enforce_ns(true), filter(NULL), list_versions(false) {}
3005 } params;
3006
3007 public:
3008 explicit List(RGWRados::Bucket *_target) : target(_target) {}
3009
c07f9fc5 3010 int list_objects(int64_t max, vector<rgw_bucket_dir_entry> *result, map<string, bool> *common_prefixes, bool *is_truncated);
7c673cae
FG
3011 rgw_obj_key& get_next_marker() {
3012 return next_marker;
3013 }
3014 };
3015 };
3016
3017 /** Write/overwrite an object to the bucket storage. */
3018 virtual int put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, ceph::real_time *mtime,
3019 map<std::string, bufferlist>& attrs, int flags,
3020 bufferlist& data,
3021 RGWObjVersionTracker *objv_tracker,
3022 ceph::real_time set_mtime /* 0 for don't set */);
3023
3024 virtual int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
3025 off_t ofs, bool exclusive,
3026 RGWObjVersionTracker *objv_tracker = nullptr);
3027 int aio_put_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
3028 off_t ofs, bool exclusive, void **handle);
3029
3030 int put_system_obj(void *ctx, rgw_raw_obj& obj, const char *data, size_t len, bool exclusive,
3031 ceph::real_time *mtime, map<std::string, bufferlist>& attrs, RGWObjVersionTracker *objv_tracker,
3032 ceph::real_time set_mtime) {
3033 bufferlist bl;
3034 bl.append(data, len);
3035 int flags = PUT_OBJ_CREATE;
3036 if (exclusive)
3037 flags |= PUT_OBJ_EXCL;
3038
3039 return put_system_obj_impl(obj, len, mtime, attrs, flags, bl, objv_tracker, set_mtime);
3040 }
3041 int aio_wait(void *handle);
3042 bool aio_completed(void *handle);
3043
3044 int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
3045 const std::string& obj_prefix,
3046 const std::string& obj_delim,
3047 std::function<int(const rgw_bucket_dir_entry&)> handler);
3048
3049 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
3050 return bucket_info.has_swift_versioning() &&
3051 bucket_info.swift_ver_location.size();
3052 }
3053
3054 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
3055 const rgw_user& user, /* in */
3056 RGWBucketInfo& bucket_info, /* in */
3057 rgw_obj& obj); /* in */
3058 int swift_versioning_restore(RGWObjectCtx& obj_ctx, /* in/out */
3059 const rgw_user& user, /* in */
3060 RGWBucketInfo& bucket_info, /* in */
3061 rgw_obj& obj, /* in */
3062 bool& restored); /* out */
3063 int copy_obj_to_remote_dest(RGWObjState *astate,
3064 map<string, bufferlist>& src_attrs,
3065 RGWRados::Object::Read& read_op,
3066 const rgw_user& user_id,
3067 rgw_obj& dest_obj,
3068 ceph::real_time *mtime);
3069
3070 enum AttrsMod {
3071 ATTRSMOD_NONE = 0,
3072 ATTRSMOD_REPLACE = 1,
3073 ATTRSMOD_MERGE = 2
3074 };
3075
3076 int rewrite_obj(RGWBucketInfo& dest_bucket_info, rgw_obj& obj);
3077
3078 int stat_remote_obj(RGWObjectCtx& obj_ctx,
3079 const rgw_user& user_id,
3080 const string& client_id,
3081 req_info *info,
3082 const string& source_zone,
3083 rgw_obj& src_obj,
3084 RGWBucketInfo& src_bucket_info,
3085 real_time *src_mtime,
3086 uint64_t *psize,
3087 const real_time *mod_ptr,
3088 const real_time *unmod_ptr,
3089 bool high_precision_time,
3090 const char *if_match,
3091 const char *if_nomatch,
3092 map<string, bufferlist> *pattrs,
3093 string *version_id,
3094 string *ptag,
3095 string *petag);
3096
3097 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
3098 const rgw_user& user_id,
3099 const string& client_id,
3100 const string& op_id,
3101 bool record_op_state,
3102 req_info *info,
3103 const string& source_zone,
3104 rgw_obj& dest_obj,
3105 rgw_obj& src_obj,
3106 RGWBucketInfo& dest_bucket_info,
3107 RGWBucketInfo& src_bucket_info,
3108 ceph::real_time *src_mtime,
3109 ceph::real_time *mtime,
3110 const ceph::real_time *mod_ptr,
3111 const ceph::real_time *unmod_ptr,
3112 bool high_precision_time,
3113 const char *if_match,
3114 const char *if_nomatch,
3115 AttrsMod attrs_mod,
3116 bool copy_if_newer,
3117 map<string, bufferlist>& attrs,
3118 RGWObjCategory category,
3119 uint64_t olh_epoch,
3120 ceph::real_time delete_at,
3121 string *version_id,
3122 string *ptag,
3123 ceph::buffer::list *petag,
7c673cae 3124 void (*progress_cb)(off_t, void *),
31f18b77
FG
3125 void *progress_data,
3126 rgw_zone_set *zones_trace= nullptr);
7c673cae
FG
3127 /**
3128 * Copy an object.
3129 * dest_obj: the object to copy into
3130 * src_obj: the object to copy from
3131 * attrs: usage depends on attrs_mod parameter
3132 * attrs_mod: the modification mode of the attrs, may have the following values:
3133 * ATTRSMOD_NONE - the attributes of the source object will be
3134 * copied without modifications, attrs parameter is ignored;
3135 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
3136 * parameter, source object attributes are not copied;
3137 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
3138 * are overwritten by values contained in attrs parameter.
7c673cae
FG
3139 * Returns: 0 on success, -ERR# otherwise.
3140 */
3141 int copy_obj(RGWObjectCtx& obj_ctx,
3142 const rgw_user& user_id,
3143 const string& client_id,
3144 const string& op_id,
3145 req_info *info,
3146 const string& source_zone,
3147 rgw_obj& dest_obj,
3148 rgw_obj& src_obj,
3149 RGWBucketInfo& dest_bucket_info,
3150 RGWBucketInfo& src_bucket_info,
3151 ceph::real_time *src_mtime,
3152 ceph::real_time *mtime,
3153 const ceph::real_time *mod_ptr,
3154 const ceph::real_time *unmod_ptr,
3155 bool high_precision_time,
3156 const char *if_match,
3157 const char *if_nomatch,
3158 AttrsMod attrs_mod,
3159 bool copy_if_newer,
3160 map<std::string, bufferlist>& attrs,
3161 RGWObjCategory category,
3162 uint64_t olh_epoch,
3163 ceph::real_time delete_at,
3164 string *version_id,
3165 string *ptag,
3166 ceph::buffer::list *petag,
7c673cae
FG
3167 void (*progress_cb)(off_t, void *),
3168 void *progress_data);
3169
3170 int copy_obj_data(RGWObjectCtx& obj_ctx,
3171 RGWBucketInfo& dest_bucket_info,
3172 RGWRados::Object::Read& read_op, off_t end,
3173 rgw_obj& dest_obj,
3174 rgw_obj& src_obj,
3175 uint64_t max_chunk_size,
3176 ceph::real_time *mtime,
3177 ceph::real_time set_mtime,
3178 map<string, bufferlist>& attrs,
3179 RGWObjCategory category,
3180 uint64_t olh_epoch,
3181 ceph::real_time delete_at,
3182 string *version_id,
3183 string *ptag,
31f18b77 3184 ceph::buffer::list *petag);
7c673cae
FG
3185
3186 int check_bucket_empty(RGWBucketInfo& bucket_info);
3187
3188 /**
3189 * Delete a bucket.
3190 * bucket: the name of the bucket to delete
3191 * Returns 0 on success, -ERR# otherwise.
3192 */
3193 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty = true);
3194
3195 bool is_meta_master();
3196
3197 /**
3198 * Check to see if the bucket metadata is synced
3199 */
3200 bool is_syncing_bucket_meta(const rgw_bucket& bucket);
3201 void wakeup_meta_sync_shards(set<int>& shard_ids);
3202 void wakeup_data_sync_shards(const string& source_zone, map<int, set<string> >& shard_ids);
3203
3204 RGWMetaSyncStatusManager* get_meta_sync_manager();
3205 RGWDataSyncStatusManager* get_data_sync_manager(const std::string& source_zone);
3206
3207 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
3208 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
3209 int bucket_suspended(rgw_bucket& bucket, bool *suspended);
3210
3211 /** Delete an object.*/
3212 int delete_obj(RGWObjectCtx& obj_ctx,
3213 const RGWBucketInfo& bucket_owner,
3214 const rgw_obj& src_obj,
3215 int versioning_status,
3216 uint16_t bilog_flags = 0,
31f18b77
FG
3217 const ceph::real_time& expiration_time = ceph::real_time(),
3218 rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
3219
3220 /** Delete a raw object.*/
3221 int delete_raw_obj(const rgw_raw_obj& obj);
3222
3223 /* Delete a system object */
3224 virtual int delete_system_obj(rgw_raw_obj& src_obj, RGWObjVersionTracker *objv_tracker = NULL);
3225
3226 /** Remove an object from the bucket index */
3227 int delete_obj_index(const rgw_obj& obj);
3228
3229 /**
31f18b77
FG
3230 * Get an attribute for a system object.
3231 * obj: the object to get attr
7c673cae
FG
3232 * name: name of the attr to retrieve
3233 * dest: bufferlist to store the result in
3234 * Returns: 0 on success, -ERR# otherwise.
3235 */
3236 virtual int system_obj_get_attr(rgw_raw_obj& obj, const char *name, bufferlist& dest);
3237
3238 int system_obj_set_attr(void *ctx, rgw_raw_obj& obj, const char *name, bufferlist& bl,
3239 RGWObjVersionTracker *objv_tracker);
3240 virtual int system_obj_set_attrs(void *ctx, rgw_raw_obj& obj,
3241 map<string, bufferlist>& attrs,
3242 map<string, bufferlist>* rmattrs,
3243 RGWObjVersionTracker *objv_tracker);
3244
3245 /**
3246 * Set an attr on an object.
3247 * bucket: name of the bucket holding the object
3248 * obj: name of the object to set the attr on
3249 * name: the attr to set
3250 * bl: the contents of the attr
3251 * Returns: 0 on success, -ERR# otherwise.
3252 */
3253 int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
3254
3255 int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
3256 map<string, bufferlist>& attrs,
3257 map<string, bufferlist>* rmattrs);
3258
3259 int get_system_obj_state(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker);
3260 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
3261 bool follow_olh, bool assume_noent = false);
3262 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state) {
3263 return get_obj_state(rctx, bucket_info, obj, state, true);
3264 }
3265
3266 virtual int stat_system_obj(RGWObjectCtx& obj_ctx,
3267 RGWRados::SystemObject::Read::GetObjState& state,
3268 rgw_raw_obj& obj,
3269 map<string, bufferlist> *attrs,
3270 ceph::real_time *lastmod,
3271 uint64_t *obj_size,
3272 RGWObjVersionTracker *objv_tracker);
3273
3274 virtual int get_system_obj(RGWObjectCtx& obj_ctx, RGWRados::SystemObject::Read::GetObjState& read_state,
3275 RGWObjVersionTracker *objv_tracker, rgw_raw_obj& obj,
3276 bufferlist& bl, off_t ofs, off_t end,
3277 map<string, bufferlist> *attrs,
b32b8144
FG
3278 rgw_cache_entry_info *cache_info,
3279 boost::optional<obj_version> refresh_version =
3280 boost::none);
7c673cae
FG
3281
3282 virtual void register_chained_cache(RGWChainedCache *cache) {}
3283 virtual bool chain_cache_entry(list<rgw_cache_entry_info *>& cache_info_entries, RGWChainedCache::Entry *chained_entry) { return false; }
3284
3285 int iterate_obj(RGWObjectCtx& ctx,
3286 const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3287 off_t ofs, off_t end,
3288 uint64_t max_chunk_size,
3289 int (*iterate_obj_cb)(const RGWBucketInfo& bucket_info, const rgw_obj& obj, const rgw_raw_obj&, off_t, off_t, off_t, bool, RGWObjState *, void *),
3290 void *arg);
3291
3292 int flush_read_list(struct get_obj_data *d);
3293
3294 int get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate,
3295 const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3296 const rgw_raw_obj& read_obj,
3297 off_t obj_ofs, off_t read_ofs, off_t len,
3298 bool is_head_obj, void *arg);
3299
3300 void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
3301
3302 /**
3303 * a simple object read without keeping state
3304 */
3305
3306 virtual int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
3307 map<string, bufferlist> *attrs, bufferlist *first_chunk,
3308 RGWObjVersionTracker *objv_tracker);
3309
3310 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
3311 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
3312
31f18b77
FG
3313 int guard_reshard(BucketShard *bs, const rgw_obj& obj_instance, std::function<int(BucketShard *)> call);
3314 int block_while_resharding(RGWRados::BucketShard *bs, string *new_bucket_id);
3315
7c673cae
FG
3316 void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
3317 int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
3318 int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
3319 int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
3320 const rgw_obj& obj_instance, bool delete_marker,
3321 const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
3322 uint64_t olh_epoch,
31f18b77
FG
3323 ceph::real_time unmod_since, bool high_precision_time, rgw_zone_set *zones_trace = nullptr);
3324 int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
3325 int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
3326 map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
3327 int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
3328 int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
3329 int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3330 bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
31f18b77
FG
3331 uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
3332 int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
7c673cae 3333 int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
31f18b77 3334 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time, rgw_zone_set *zones_trace = nullptr);
7c673cae 3335 int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
31f18b77 3336 uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
3337
3338 void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
3339 int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
3340 int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
3341 int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
3342
3343 void gen_rand_obj_instance_name(rgw_obj *target);
3344
3345 int omap_get_vals(rgw_raw_obj& obj, bufferlist& header, const std::string& marker, uint64_t count, std::map<string, bufferlist>& m);
3346 int omap_get_all(rgw_raw_obj& obj, bufferlist& header, std::map<string, bufferlist>& m);
3347 int omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl);
3348 int omap_set(rgw_raw_obj& obj, map<std::string, bufferlist>& m);
3349 int omap_del(rgw_raw_obj& obj, const std::string& key);
3350 int update_containers_stats(map<string, RGWBucketEnt>& m);
3351 int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
3352
3353 int watch(const string& oid, uint64_t *watch_handle, librados::WatchCtx2 *ctx);
3354 int unwatch(uint64_t watch_handle);
3355 void add_watcher(int i);
3356 void remove_watcher(int i);
3357 virtual bool need_watch_notify() { return false; }
3358 int init_watch();
3359 void finalize_watch();
3360 int distribute(const string& key, bufferlist& bl);
3361 virtual int watch_cb(uint64_t notify_id,
3362 uint64_t cookie,
3363 uint64_t notifier_id,
3364 bufferlist& bl) { return 0; }
3365 void pick_control_oid(const string& key, string& notify_oid);
3366
3367 virtual void set_cache_enabled(bool state) {}
3368
3369 void set_atomic(void *ctx, rgw_obj& obj) {
3370 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3371 rctx->obj.set_atomic(obj);
3372 }
3373 void set_prefetch_data(void *ctx, rgw_obj& obj) {
3374 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3375 rctx->obj.set_prefetch_data(obj);
3376 }
3377 void set_prefetch_data(void *ctx, rgw_raw_obj& obj) {
3378 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3379 rctx->raw.set_prefetch_data(obj);
3380 }
3381
3382 int decode_policy(bufferlist& bl, ACLOwner *owner);
3383 int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
c07f9fc5 3384 map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool* syncstopped = NULL);
7c673cae
FG
3385 int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
3386 int get_user_stats(const rgw_user& user, RGWStorageStats& stats);
3387 int get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *cb);
3388 void get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj);
3389 void get_bucket_meta_oid(const rgw_bucket& bucket, string& oid);
3390
3391 int put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
3392 bool exclusive, RGWObjVersionTracker& objv_tracker, ceph::real_time mtime,
3393 map<string, bufferlist> *pattrs);
3394 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
3395 int get_bucket_entrypoint_info(RGWObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name,
3396 RGWBucketEntryPoint& entry_point, RGWObjVersionTracker *objv_tracker,
b32b8144
FG
3397 ceph::real_time *pmtime, map<string, bufferlist> *pattrs, rgw_cache_entry_info *cache_info = NULL,
3398 boost::optional<obj_version> refresh_version = boost::none);
7c673cae
FG
3399 int get_bucket_instance_info(RGWObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
3400 int get_bucket_instance_info(RGWObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
31f18b77 3401 int get_bucket_instance_from_oid(RGWObjectCtx& obj_ctx, const string& oid, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs,
b32b8144
FG
3402 rgw_cache_entry_info *cache_info = NULL,
3403 boost::optional<obj_version> refresh_version = boost::none);
7c673cae
FG
3404
3405 int convert_old_bucket_info(RGWObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name);
3406 static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
b32b8144
FG
3407
3408
3409private:
3410 int _get_bucket_info(RGWObjectCtx& obj_ctx, const string& tenant,
3411 const string& bucket_name, RGWBucketInfo& info,
3412 real_time *pmtime,
3413 map<string, bufferlist> *pattrs,
3414 boost::optional<obj_version> refresh_version);
3415public:
3416
3a9019d9
FG
3417 bool call(std::string command, cmdmap_t& cmdmap, std::string format,
3418 bufferlist& out) override final;
3419
3420 // Should really be protected, but some older GCCs don't handle
3421 // access control properly with lambdas defined in member functions
3422 // of child classes.
3423
3424 void cache_list_dump_helper(Formatter* f,
3425 const std::string& name,
3426 const ceph::real_time mtime,
3427 const std::uint64_t size) {
3428 f->dump_string("name", name);
3429 f->dump_string("mtime", ceph::to_iso_8601(mtime));
3430 f->dump_unsigned("size", size);
3431 }
3432
3433protected:
3434
3435 // `call_list` must iterate over all cache entries and call
3436 // `cache_list_dump_helper` with the supplied Formatter on any that
3437 // include `filter` as a substring.
3438 //
3439 virtual void call_list(const boost::optional<std::string>& filter,
3440 Formatter* format);
3441 // `call_inspect` must look up the requested target and, if found,
3442 // dump it to the supplied Formatter and return true. If not found,
3443 // it must return false.
3444 //
3445 virtual bool call_inspect(const std::string& target, Formatter* format);
3446
3447 // `call_erase` must erase the requested target and return true. If
3448 // the requested target does not exist, it should return false.
3449 virtual bool call_erase(const std::string& target);
3450
3451 // `call_zap` must erase the cache.
3452 virtual void call_zap();
3453public:
b32b8144 3454
7c673cae 3455 int get_bucket_info(RGWObjectCtx& obj_ctx,
b32b8144
FG
3456 const string& tenant_name, const string& bucket_name,
3457 RGWBucketInfo& info,
3458 ceph::real_time *pmtime, map<string, bufferlist> *pattrs = NULL);
3459
3460 // Returns true on successful refresh. Returns false if there was an
3461 // error or the version stored on the OSD is the same as that
3462 // presented in the BucketInfo structure.
3463 //
3464 int try_refresh_bucket_info(RGWBucketInfo& info,
3465 ceph::real_time *pmtime,
3466 map<string, bufferlist> *pattrs = nullptr);
3467
7c673cae 3468 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
b32b8144 3469 map<string, bufferlist> *pattrs, bool create_entry_point);
7c673cae
FG
3470
3471 int cls_rgw_init_index(librados::IoCtx& io_ctx, librados::ObjectWriteOperation& op, string& oid);
31f18b77
FG
3472 int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3473 int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
3474 rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3475 int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
3476 RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
7c673cae 3477 int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
31f18b77
FG
3478 ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3479 int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
3480 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
3481 int cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_index_key& start, const string& prefix,
3482 uint32_t num_entries, bool list_versions, map<string, rgw_bucket_dir_entry>& m,
3483 bool *is_truncated, rgw_obj_index_key *last_entry,
3484 bool (*force_check_filter)(const string& name) = NULL);
3485 int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, map<string, struct rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
3486 int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
3487 int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
3488 int trim_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, string& end_marker);
c07f9fc5
FG
3489 int resync_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
3490 int stop_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
7c673cae
FG
3491 int get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id, map<int, string>& max_marker);
3492
3493 int bi_get_instance(const RGWBucketInfo& bucket_info, rgw_obj& obj, rgw_bucket_dir_entry *dirent);
3494 int bi_get(rgw_bucket& bucket, rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
3495 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
3496 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
3497 int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
3498 int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3499 int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3500 int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
3501 list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3502 int bi_remove(BucketShard& bs);
3503
3504 int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
3505 int cls_obj_usage_log_read(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
3506 string& read_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage, bool *is_truncated);
3507 int cls_obj_usage_log_trim(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch);
3508
3509 int key_to_shard_id(const string& key, int max_shards);
3510 void shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id);
3511 void shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name);
3512 void shard_name(const string& prefix, unsigned shard_id, string& name);
3513 int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
3514 void time_log_prepare_entry(cls_log_entry& entry, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
3515 int time_log_add_init(librados::IoCtx& io_ctx);
3516 int time_log_add(const string& oid, list<cls_log_entry>& entries,
3517 librados::AioCompletion *completion, bool monotonic_inc = true);
3518 int time_log_add(const string& oid, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
3519 int time_log_list(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
3520 int max_entries, list<cls_log_entry>& entries,
3521 const string& marker, string *out_marker, bool *truncated);
3522 int time_log_info(const string& oid, cls_log_header *header);
3523 int time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion);
3524 int time_log_trim(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
3525 const string& from_marker, const string& to_marker,
3526 librados::AioCompletion *completion = nullptr);
3527
3528 string objexp_hint_get_shardname(int shard_num);
3529 int objexp_key_shard(const rgw_obj_index_key& key);
3530 void objexp_get_shard(int shard_num,
3531 string& shard); /* out */
3532 int objexp_hint_add(const ceph::real_time& delete_at,
3533 const string& tenant_name,
3534 const string& bucket_name,
3535 const string& bucket_id,
3536 const rgw_obj_index_key& obj_key);
3537 int objexp_hint_list(const string& oid,
3538 const ceph::real_time& start_time,
3539 const ceph::real_time& end_time,
3540 const int max_entries,
3541 const string& marker,
3542 list<cls_timeindex_entry>& entries, /* out */
3543 string *out_marker, /* out */
3544 bool *truncated); /* out */
3545 int objexp_hint_parse(cls_timeindex_entry &ti_entry,
3546 objexp_hint_entry& hint_entry); /* out */
3547 int objexp_hint_trim(const string& oid,
3548 const ceph::real_time& start_time,
3549 const ceph::real_time& end_time,
3550 const string& from_marker = std::string(),
3551 const string& to_marker = std::string());
3552
3553 int lock_exclusive(rgw_pool& pool, const string& oid, ceph::timespan& duration, string& zone_id, string& owner_id);
3554 int unlock(rgw_pool& pool, const string& oid, string& zone_id, string& owner_id);
3555
3556 void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
3557 int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool sync);
3558 int gc_operate(string& oid, librados::ObjectWriteOperation *op);
3559 int gc_aio_operate(string& oid, librados::ObjectWriteOperation *op);
3560 int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
3561
3562 int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
3563 int process_gc();
3564 int process_expire_objects();
3565 int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
3566
3567 int process_lc();
3568 int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
3569
3570 int bucket_check_index(RGWBucketInfo& bucket_info,
3571 map<RGWObjCategory, RGWStorageStats> *existing_stats,
3572 map<RGWObjCategory, RGWStorageStats> *calculated_stats);
3573 int bucket_rebuild_index(RGWBucketInfo& bucket_info);
31f18b77 3574 int bucket_set_reshard(RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
7c673cae
FG
3575 int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
3576 int move_rados_obj(librados::IoCtx& src_ioctx,
3577 const string& src_oid, const string& src_locator,
3578 librados::IoCtx& dst_ioctx,
3579 const string& dst_oid, const string& dst_locator);
3580 int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
3581 int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix);
3582
3583 int cls_user_get_header(const string& user_id, cls_user_header *header);
94b18763 3584 int cls_user_reset_stats(const string& user_id);
7c673cae
FG
3585 int cls_user_get_header_async(const string& user_id, RGWGetUserHeader_CB *ctx);
3586 int cls_user_sync_bucket_stats(rgw_raw_obj& user_obj, const RGWBucketInfo& bucket_info);
3587 int cls_user_list_buckets(rgw_raw_obj& obj,
3588 const string& in_marker,
3589 const string& end_marker,
3590 int max_entries,
3591 list<cls_user_bucket_entry>& entries,
3592 string *out_marker,
3593 bool *truncated);
3594 int cls_user_add_bucket(rgw_raw_obj& obj, const cls_user_bucket_entry& entry);
3595 int cls_user_update_buckets(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add);
3596 int cls_user_complete_stats_sync(rgw_raw_obj& obj);
3597 int complete_sync_user_stats(const rgw_user& user_id);
3598 int cls_user_add_bucket(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries);
3599 int cls_user_remove_bucket(rgw_raw_obj& obj, const cls_user_bucket& bucket);
c07f9fc5 3600 int cls_user_get_bucket_stats(const rgw_bucket& bucket, cls_user_bucket_entry& entry);
7c673cae
FG
3601
3602 int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
3603 RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size);
3604
224ce89b 3605 int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
31f18b77
FG
3606 RGWQuotaInfo& bucket_quota);
3607
3608 int add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
3609
7c673cae 3610 uint64_t instance_id();
224ce89b
WB
3611 const string& zone_name() {
3612 return get_zone_params().get_name();
3613 }
7c673cae
FG
3614 const string& zone_id() {
3615 return get_zone_params().get_id();
3616 }
3617 string unique_id(uint64_t unique_num) {
3618 char buf[32];
3619 snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)instance_id(), (unsigned long long)unique_num);
3620 string s = get_zone_params().get_id() + buf;
3621 return s;
3622 }
3623
3624 void init_unique_trans_id_deps() {
3625 char buf[16 + 2 + 1]; /* uint64_t needs 16, 2 hyphens add further 2 */
3626
3627 snprintf(buf, sizeof(buf), "-%llx-", (unsigned long long)instance_id());
3628 url_encode(string(buf) + get_zone_params().get_name(), trans_id_suffix);
3629 }
3630
3631 /* In order to preserve compability with Swift API, transaction ID
3632 * should contain at least 32 characters satisfying following spec:
3633 * - first 21 chars must be in range [0-9a-f]. Swift uses this
3634 * space for storing fragment of UUID obtained through a call to
3635 * uuid4() function of Python's uuid module;
3636 * - char no. 22 must be a hyphen;
3637 * - at least 10 next characters constitute hex-formatted timestamp
3638 * padded with zeroes if necessary. All bytes must be in [0-9a-f]
3639 * range;
3640 * - last, optional part of transaction ID is any url-encoded string
3641 * without restriction on length. */
3642 string unique_trans_id(const uint64_t unique_num) {
3643 char buf[41]; /* 2 + 21 + 1 + 16 (timestamp can consume up to 16) + 1 */
3644 time_t timestamp = time(NULL);
3645
3646 snprintf(buf, sizeof(buf), "tx%021llx-%010llx",
3647 (unsigned long long)unique_num,
3648 (unsigned long long)timestamp);
3649
3650 return string(buf) + trans_id_suffix;
3651 }
3652
3653 void get_log_pool(rgw_pool& pool) {
3654 pool = get_zone_params().log_pool;
3655 }
3656
3657 bool need_to_log_data() {
3658 return get_zone().log_data;
3659 }
3660
3661 bool need_to_log_metadata() {
224ce89b
WB
3662 return is_meta_master() &&
3663 (get_zonegroup().zones.size() > 1 || current_period.is_multi_zonegroups_with_zones());
7c673cae
FG
3664 }
3665
3efd9988
FG
3666 bool can_reshard() const {
3667 return current_period.get_id().empty() ||
3668 (zonegroup.zones.size() == 1 && current_period.is_single_zonegroup());
3669 }
3670
7c673cae
FG
3671 librados::Rados* get_rados_handle();
3672
3673 int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
3674 int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
3675 list<librados::AioCompletion *>& handles, bool keep_index_consistent);
3676 private:
3677 /**
3678 * This is a helper method, it generates a list of bucket index objects with the given
3679 * bucket base oid and number of shards.
3680 *
3681 * bucket_oid_base [in] - base name of the bucket index object;
3682 * num_shards [in] - number of bucket index object shards.
3683 * bucket_objs [out] - filled by this method, a list of bucket index objects.
3684 */
3685 void get_bucket_index_objects(const string& bucket_oid_base, uint32_t num_shards,
3686 map<int, string>& bucket_objs, int shard_id = -1);
3687
3688 /**
3689 * Get the bucket index object with the given base bucket index object and object key,
3690 * and the number of bucket index shards.
3691 *
3692 * bucket_oid_base [in] - bucket object base name.
3693 * obj_key [in] - object key.
3694 * num_shards [in] - number of bucket index shards.
3695 * hash_type [in] - type of hash to find the shard ID.
3696 * bucket_obj [out] - the bucket index object for the given object.
3697 *
3698 * Return 0 on success, a failure code otherwise.
3699 */
3700 int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
3701 uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj, int *shard);
3702
3703 void get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards,
3704 int shard_id, string *bucket_obj);
3705
3706 /**
3707 * Check the actual on-disk state of the object specified
3708 * by list_state, and fill in the time and size of object.
3709 * Then append any changes to suggested_updates for
3710 * the rgw class' dir_suggest_changes function.
3711 *
3712 * Note that this can maul list_state; don't use it afterwards. Also
3713 * it expects object to already be filled in from list_state; it only
3714 * sets the size and mtime.
3715 *
3716 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
3717 * and -errno on other failures. (-ENOENT is not a failure, and it
3718 * will encode that info as a suggested update.)
3719 */
3720 int check_disk_state(librados::IoCtx io_ctx,
3721 const RGWBucketInfo& bucket_info,
3722 rgw_bucket_dir_entry& list_state,
3723 rgw_bucket_dir_entry& object,
3724 bufferlist& suggested_updates);
3725
3726 /**
3727 * Init pool iteration
31f18b77 3728 * pool: pool to use for the ctx initialization
7c673cae
FG
3729 * ctx: context object to use for the iteration
3730 * Returns: 0 on success, -ERR# otherwise.
3731 */
3732 int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
31f18b77 3733
181888fb
FG
3734 /**
3735 * Init pool iteration
3736 * pool: pool to use
3737 * cursor: position to start iteration
3738 * ctx: context object to use for the iteration
3739 * Returns: 0 on success, -ERR# otherwise.
3740 */
3741 int pool_iterate_begin(const rgw_pool& pool, const string& cursor, RGWPoolIterCtx& ctx);
3742
3743 /**
3744 * Get pool iteration position
3745 * ctx: context object to use for the iteration
3746 * Returns: string representation of position
3747 */
3748 string pool_iterate_get_cursor(RGWPoolIterCtx& ctx);
3749
7c673cae
FG
3750 /**
3751 * Iterate over pool return object names, use optional filter
3752 * ctx: iteration context, initialized with pool_iterate_begin()
3753 * num: max number of objects to return
3754 * objs: a vector that the results will append into
3755 * is_truncated: if not NULL, will hold true iff iteration is complete
3756 * filter: if not NULL, will be used to filter returned objects
3757 * Returns: 0 on success, -ERR# otherwise.
3758 */
3759 int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
3760 bool *is_truncated, RGWAccessListFilter *filter);
3761
3762 uint64_t next_bucket_id();
3763};
3764
3765class RGWStoreManager {
3766public:
3767 RGWStoreManager() {}
28e407b8
AA
3768 static RGWRados *get_storage(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads,
3769 bool run_sync_thread, bool run_reshard_thread, bool use_cache = true) {
31f18b77 3770 RGWRados *store = init_storage_provider(cct, use_gc_thread, use_lc_thread, quota_threads, run_sync_thread,
28e407b8 3771 run_reshard_thread, use_cache);
7c673cae
FG
3772 return store;
3773 }
3774 static RGWRados *get_raw_storage(CephContext *cct) {
3775 RGWRados *store = init_raw_storage_provider(cct);
3776 return store;
3777 }
28e407b8 3778 static RGWRados *init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread, bool use_metadata_cache);
7c673cae
FG
3779 static RGWRados *init_raw_storage_provider(CephContext *cct);
3780 static void close_storage(RGWRados *store);
3781
3782};
3783
3784template <class T>
3785class RGWChainedCacheImpl : public RGWChainedCache {
b32b8144 3786 ceph::timespan expiry;
7c673cae
FG
3787 RWLock lock;
3788
b32b8144 3789 map<string, std::pair<T, ceph::coarse_mono_time>> entries;
7c673cae
FG
3790
3791public:
3792 RGWChainedCacheImpl() : lock("RGWChainedCacheImpl::lock") {}
3793
3794 void init(RGWRados *store) {
3795 store->register_chained_cache(this);
b32b8144
FG
3796 expiry = std::chrono::seconds(store->ctx()->_conf->get_val<uint64_t>(
3797 "rgw_cache_expiry_interval"));
7c673cae
FG
3798 }
3799
3800 bool find(const string& key, T *entry) {
3801 RWLock::RLocker rl(lock);
b32b8144 3802 auto iter = entries.find(key);
7c673cae
FG
3803 if (iter == entries.end()) {
3804 return false;
3805 }
b32b8144
FG
3806 if (expiry.count() &&
3807 (ceph::coarse_mono_clock::now() - iter->second.second) > expiry) {
3808 return false;
3809 }
7c673cae 3810
b32b8144 3811 *entry = iter->second.first;
7c673cae
FG
3812 return true;
3813 }
3814
3815 bool put(RGWRados *store, const string& key, T *entry, list<rgw_cache_entry_info *>& cache_info_entries) {
3816 Entry chain_entry(this, key, entry);
3817
3818 /* we need the store cache to call us under its lock to maintain lock ordering */
3819 return store->chain_cache_entry(cache_info_entries, &chain_entry);
3820 }
3821
3822 void chain_cb(const string& key, void *data) override {
3823 T *entry = static_cast<T *>(data);
3824 RWLock::WLocker wl(lock);
b32b8144
FG
3825 entries[key].first = *entry;
3826 if (expiry.count() > 0) {
3827 entries[key].second = ceph::coarse_mono_clock::now();
3828 }
7c673cae
FG
3829 }
3830
3831 void invalidate(const string& key) override {
3832 RWLock::WLocker wl(lock);
3833 entries.erase(key);
3834 }
3835
3836 void invalidate_all() override {
3837 RWLock::WLocker wl(lock);
3838 entries.clear();
3839 }
3840}; /* RGWChainedCacheImpl */
3841
3842/**
3843 * Base of PUT operation.
3844 * Allow to create chained data transformers like compresors and encryptors.
3845 */
3846class RGWPutObjDataProcessor
3847{
3848public:
3849 RGWPutObjDataProcessor(){}
3850 virtual ~RGWPutObjDataProcessor(){}
3851 virtual int handle_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool *again) = 0;
3852 virtual int throttle_data(void *handle, const rgw_raw_obj& obj, uint64_t size, bool need_to_wait) = 0;
3853}; /* RGWPutObjDataProcessor */
3854
3855
3856class RGWPutObjProcessor : public RGWPutObjDataProcessor
3857{
3858protected:
3859 RGWRados *store;
3860 RGWObjectCtx& obj_ctx;
3861 bool is_complete;
3862 RGWBucketInfo bucket_info;
3863 bool canceled;
3864
3865 virtual int do_complete(size_t accounted_size, const string& etag,
3866 ceph::real_time *mtime, ceph::real_time set_mtime,
3867 map<string, bufferlist>& attrs, ceph::real_time delete_at,
31f18b77
FG
3868 const char *if_match, const char *if_nomatch, const string *user_data,
3869 rgw_zone_set* zones_trace = nullptr) = 0;
7c673cae
FG
3870
3871public:
3872 RGWPutObjProcessor(RGWObjectCtx& _obj_ctx, RGWBucketInfo& _bi) : store(NULL),
3873 obj_ctx(_obj_ctx),
3874 is_complete(false),
3875 bucket_info(_bi),
3876 canceled(false) {}
3877 ~RGWPutObjProcessor() override {}
3878 virtual int prepare(RGWRados *_store, string *oid_rand) {
3879 store = _store;
3880 return 0;
3881 }
3882
3883 int complete(size_t accounted_size, const string& etag,
3884 ceph::real_time *mtime, ceph::real_time set_mtime,
3885 map<string, bufferlist>& attrs, ceph::real_time delete_at,
31f18b77
FG
3886 const char *if_match = NULL, const char *if_nomatch = NULL, const string *user_data = nullptr,
3887 rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
3888
3889 CephContext *ctx();
3890
3891 bool is_canceled() { return canceled; }
3892}; /* RGWPutObjProcessor */
3893
3894struct put_obj_aio_info {
3895 void *handle;
3896 rgw_raw_obj obj;
3897 uint64_t size;
3898};
3899
3900#define RGW_PUT_OBJ_MIN_WINDOW_SIZE_DEFAULT (16 * 1024 * 1024)
3901
3902class RGWPutObjProcessor_Aio : public RGWPutObjProcessor
3903{
3904 list<struct put_obj_aio_info> pending;
3905 uint64_t window_size{RGW_PUT_OBJ_MIN_WINDOW_SIZE_DEFAULT};
3906 uint64_t pending_size{0};
3907
3908 struct put_obj_aio_info pop_pending();
3909 int wait_pending_front();
3910 bool pending_has_completed();
3911
3912 rgw_raw_obj last_written_obj;
3913
3914protected:
3915 uint64_t obj_len{0};
3916
3917 set<rgw_raw_obj> written_objs;
3918 rgw_obj head_obj;
3919
3920 void add_written_obj(const rgw_raw_obj& obj) {
3921 written_objs.insert(obj);
3922 }
3923
3924 int drain_pending();
3925 int handle_obj_data(rgw_raw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle, bool exclusive);
3926
3927public:
3928 int prepare(RGWRados *store, string *oid_rand) override;
3929 int throttle_data(void *handle, const rgw_raw_obj& obj, uint64_t size, bool need_to_wait) override;
3930
3931 RGWPutObjProcessor_Aio(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info) : RGWPutObjProcessor(obj_ctx, bucket_info) {}
3932 ~RGWPutObjProcessor_Aio() override;
3933}; /* RGWPutObjProcessor_Aio */
3934
3935class RGWPutObjProcessor_Atomic : public RGWPutObjProcessor_Aio
3936{
3937 bufferlist first_chunk;
3938 uint64_t part_size;
3939 off_t cur_part_ofs;
3940 off_t next_part_ofs;
3941 int cur_part_id;
3942 off_t data_ofs;
3943
3944 bufferlist pending_data_bl;
3945 uint64_t max_chunk_size;
3946
3947 bool versioned_object;
3948 uint64_t olh_epoch;
3949 string version_id;
3950
3951protected:
3952 rgw_bucket bucket;
3953 string obj_str;
3954
3955 string unique_tag;
3956
3957 rgw_raw_obj cur_obj;
3958 RGWObjManifest manifest;
3959 RGWObjManifest::generator manifest_gen;
3960
3961 int write_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool exclusive);
3962 int do_complete(size_t accounted_size, const string& etag,
3963 ceph::real_time *mtime, ceph::real_time set_mtime,
3964 map<string, bufferlist>& attrs, ceph::real_time delete_at,
31f18b77 3965 const char *if_match, const char *if_nomatch, const string *user_data, rgw_zone_set *zones_trace) override;
7c673cae
FG
3966
3967 int prepare_next_part(off_t ofs);
3968 int complete_parts();
3969 int complete_writing_data();
3970
3971 int prepare_init(RGWRados *store, string *oid_rand);
3972
3973public:
3974 ~RGWPutObjProcessor_Atomic() override {}
3975 RGWPutObjProcessor_Atomic(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info,
3976 rgw_bucket& _b, const string& _o, uint64_t _p, const string& _t, bool versioned) :
3977 RGWPutObjProcessor_Aio(obj_ctx, bucket_info),
3978 part_size(_p),
3979 cur_part_ofs(0),
3980 next_part_ofs(_p),
3981 cur_part_id(0),
3982 data_ofs(0),
3983 max_chunk_size(0),
3984 versioned_object(versioned),
3985 olh_epoch(0),
3986 bucket(_b),
3987 obj_str(_o),
3988 unique_tag(_t) {}
3989 int prepare(RGWRados *store, string *oid_rand) override;
3990 virtual bool immutable_head() { return false; }
3991 int handle_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool *again) override;
3992
3993 void set_olh_epoch(uint64_t epoch) {
3994 olh_epoch = epoch;
3995 }
3996
3997 void set_version_id(const string& vid) {
3998 version_id = vid;
3999 }
4000}; /* RGWPutObjProcessor_Atomic */
4001
4002#define MP_META_SUFFIX ".meta"
4003
4004class RGWMPObj {
4005 string oid;
4006 string prefix;
4007 string meta;
4008 string upload_id;
4009public:
4010 RGWMPObj() {}
4011 RGWMPObj(const string& _oid, const string& _upload_id) {
4012 init(_oid, _upload_id, _upload_id);
4013 }
4014 void init(const string& _oid, const string& _upload_id) {
4015 init(_oid, _upload_id, _upload_id);
4016 }
4017 void init(const string& _oid, const string& _upload_id, const string& part_unique_str) {
4018 if (_oid.empty()) {
4019 clear();
4020 return;
4021 }
4022 oid = _oid;
4023 upload_id = _upload_id;
4024 prefix = oid + ".";
4025 meta = prefix + upload_id + MP_META_SUFFIX;
4026 prefix.append(part_unique_str);
4027 }
4028 string& get_meta() { return meta; }
4029 string get_part(int num) {
4030 char buf[16];
4031 snprintf(buf, 16, ".%d", num);
4032 string s = prefix;
4033 s.append(buf);
4034 return s;
4035 }
4036 string get_part(string& part) {
4037 string s = prefix;
4038 s.append(".");
4039 s.append(part);
4040 return s;
4041 }
4042 string& get_upload_id() {
4043 return upload_id;
4044 }
4045 string& get_key() {
4046 return oid;
4047 }
4048 bool from_meta(string& meta) {
4049 int end_pos = meta.rfind('.'); // search for ".meta"
4050 if (end_pos < 0)
4051 return false;
4052 int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id>
4053 if (mid_pos < 0)
4054 return false;
4055 oid = meta.substr(0, mid_pos);
4056 upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
4057 init(oid, upload_id, upload_id);
4058 return true;
4059 }
4060 void clear() {
4061 oid = "";
4062 prefix = "";
4063 meta = "";
4064 upload_id = "";
4065 }
4066};
4067
4068class RGWPutObjProcessor_Multipart : public RGWPutObjProcessor_Atomic
4069{
4070 string part_num;
4071 RGWMPObj mp;
4072 req_state *s;
4073 string upload_id;
4074
4075protected:
4076 int prepare(RGWRados *store, string *oid_rand);
4077 int do_complete(size_t accounted_size, const string& etag,
4078 ceph::real_time *mtime, ceph::real_time set_mtime,
4079 map<string, bufferlist>& attrs, ceph::real_time delete_at,
31f18b77
FG
4080 const char *if_match, const char *if_nomatch, const string *user_data,
4081 rgw_zone_set *zones_trace) override;
7c673cae
FG
4082public:
4083 bool immutable_head() { return true; }
4084 RGWPutObjProcessor_Multipart(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, uint64_t _p, req_state *_s) :
4085 RGWPutObjProcessor_Atomic(obj_ctx, bucket_info, _s->bucket, _s->object.name, _p, _s->req_id, false), s(_s) {}
4086 void get_mp(RGWMPObj** _mp);
4087}; /* RGWPutObjProcessor_Multipart */
4088#endif