]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_rados.h
update sources to v12.2.3
[ceph.git] / ceph / src / rgw / rgw_rados.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef CEPH_RGWRADOS_H
5 #define CEPH_RGWRADOS_H
6
7 #include <functional>
8
9 #include "include/rados/librados.hpp"
10 #include "include/Context.h"
11 #include "common/RefCountedObj.h"
12 #include "common/RWLock.h"
13 #include "common/ceph_time.h"
14 #include "common/lru_map.h"
15 #include "rgw_common.h"
16 #include "cls/rgw/cls_rgw_types.h"
17 #include "cls/version/cls_version_types.h"
18 #include "cls/log/cls_log_types.h"
19 #include "cls/statelog/cls_statelog_types.h"
20 #include "cls/timeindex/cls_timeindex_types.h"
21 #include "rgw_log.h"
22 #include "rgw_metadata.h"
23 #include "rgw_meta_sync_status.h"
24 #include "rgw_period_puller.h"
25 #include "rgw_sync_module.h"
26 #include "rgw_sync_log_trim.h"
27
28 class RGWWatcher;
29 class SafeTimer;
30 class ACLOwner;
31 class RGWGC;
32 class RGWMetaNotifier;
33 class RGWDataNotifier;
34 class RGWLC;
35 class RGWObjectExpirer;
36 class RGWMetaSyncProcessorThread;
37 class RGWDataSyncProcessorThread;
38 class RGWSyncLogTrimThread;
39 class RGWRESTConn;
40 struct RGWZoneGroup;
41 struct RGWZoneParams;
42 class RGWReshard;
43 class RGWReshardWait;
44
45 /* flags for put_obj_meta() */
46 #define PUT_OBJ_CREATE 0x01
47 #define PUT_OBJ_EXCL 0x02
48 #define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
49
50 #define RGW_OBJ_NS_MULTIPART "multipart"
51 #define RGW_OBJ_NS_SHADOW "shadow"
52
53 #define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
54
55 #define RGW_NO_SHARD -1
56
57 #define RGW_SHARDS_PRIME_0 7877
58 #define RGW_SHARDS_PRIME_1 65521
59
60 static inline int rgw_shards_mod(unsigned hval, int max_shards)
61 {
62 if (max_shards <= RGW_SHARDS_PRIME_0) {
63 return hval % RGW_SHARDS_PRIME_0 % max_shards;
64 }
65 return hval % RGW_SHARDS_PRIME_1 % max_shards;
66 }
67
68 static inline int rgw_shards_hash(const string& key, int max_shards)
69 {
70 return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()), max_shards);
71 }
72
73 static inline int rgw_shards_max()
74 {
75 return RGW_SHARDS_PRIME_1;
76 }
77
78 static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
79 {
80 if (bucket.marker.empty() || orig_oid.empty()) {
81 oid = orig_oid;
82 } else {
83 oid = bucket.marker;
84 oid.append("_");
85 oid.append(orig_oid);
86 }
87 }
88
89 static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
90 {
91 const rgw_bucket& bucket = obj.bucket;
92 prepend_bucket_marker(bucket, obj.get_oid(), oid);
93 const string& loc = obj.key.get_loc();
94 if (!loc.empty()) {
95 prepend_bucket_marker(bucket, loc, locator);
96 } else {
97 locator.clear();
98 }
99 }
100
101 int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool, librados::IoCtx& ioctx, bool create = false);
102
103 int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
104
105 static inline bool rgw_raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj)
106 {
107 ssize_t pos = raw_obj.oid.find('_');
108 if (pos < 0) {
109 return false;
110 }
111
112 if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) {
113 return false;
114 }
115 obj->bucket = bucket;
116
117 return true;
118 }
119
120 struct rgw_bucket_placement {
121 string placement_rule;
122 rgw_bucket bucket;
123
124 void dump(Formatter *f) const;
125 };
126
127 class rgw_obj_select {
128 string placement_rule;
129 rgw_obj obj;
130 rgw_raw_obj raw_obj;
131 bool is_raw;
132
133 public:
134 rgw_obj_select() : is_raw(false) {}
135 rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {}
136 rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {}
137 rgw_obj_select(const rgw_obj_select& rhs) {
138 placement_rule = rhs.placement_rule;
139 is_raw = rhs.is_raw;
140 if (is_raw) {
141 raw_obj = rhs.raw_obj;
142 } else {
143 obj = rhs.obj;
144 }
145 }
146
147 rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const;
148 rgw_raw_obj get_raw_obj(RGWRados *store) const;
149
150 rgw_obj_select& operator=(const rgw_obj& rhs) {
151 obj = rhs;
152 is_raw = false;
153 return *this;
154 }
155
156 rgw_obj_select& operator=(const rgw_raw_obj& rhs) {
157 raw_obj = rhs;
158 is_raw = true;
159 return *this;
160 }
161
162 void set_placement_rule(const string& rule) {
163 placement_rule = rule;
164 }
165 };
166
167 struct compression_block {
168 uint64_t old_ofs;
169 uint64_t new_ofs;
170 uint64_t len;
171
172 void encode(bufferlist& bl) const {
173 ENCODE_START(1, 1, bl);
174 ::encode(old_ofs, bl);
175 ::encode(new_ofs, bl);
176 ::encode(len, bl);
177 ENCODE_FINISH(bl);
178 }
179
180 void decode(bufferlist::iterator& bl) {
181 DECODE_START(1, bl);
182 ::decode(old_ofs, bl);
183 ::decode(new_ofs, bl);
184 ::decode(len, bl);
185 DECODE_FINISH(bl);
186 }
187 };
188 WRITE_CLASS_ENCODER(compression_block)
189
190 struct RGWCompressionInfo {
191 string compression_type;
192 uint64_t orig_size;
193 vector<compression_block> blocks;
194
195 RGWCompressionInfo() : compression_type("none"), orig_size(0) {}
196 RGWCompressionInfo(const RGWCompressionInfo& cs_info) : compression_type(cs_info.compression_type),
197 orig_size(cs_info.orig_size),
198 blocks(cs_info.blocks) {}
199
200 void encode(bufferlist& bl) const {
201 ENCODE_START(1, 1, bl);
202 ::encode(compression_type, bl);
203 ::encode(orig_size, bl);
204 ::encode(blocks, bl);
205 ENCODE_FINISH(bl);
206 }
207
208 void decode(bufferlist::iterator& bl) {
209 DECODE_START(1, bl);
210 ::decode(compression_type, bl);
211 ::decode(orig_size, bl);
212 ::decode(blocks, bl);
213 DECODE_FINISH(bl);
214 }
215 };
216 WRITE_CLASS_ENCODER(RGWCompressionInfo)
217
218 int rgw_compression_info_from_attrset(map<string, bufferlist>& attrs, bool& need_decompress, RGWCompressionInfo& cs_info);
219
220 struct RGWOLHInfo {
221 rgw_obj target;
222 bool removed;
223
224 RGWOLHInfo() : removed(false) {}
225
226 void encode(bufferlist& bl) const {
227 ENCODE_START(1, 1, bl);
228 ::encode(target, bl);
229 ::encode(removed, bl);
230 ENCODE_FINISH(bl);
231 }
232
233 void decode(bufferlist::iterator& bl) {
234 DECODE_START(1, bl);
235 ::decode(target, bl);
236 ::decode(removed, bl);
237 DECODE_FINISH(bl);
238 }
239 static void generate_test_instances(list<RGWOLHInfo*>& o);
240 void dump(Formatter *f) const;
241 };
242 WRITE_CLASS_ENCODER(RGWOLHInfo)
243
244 struct RGWOLHPendingInfo {
245 ceph::real_time time;
246
247 RGWOLHPendingInfo() {}
248
249 void encode(bufferlist& bl) const {
250 ENCODE_START(1, 1, bl);
251 ::encode(time, bl);
252 ENCODE_FINISH(bl);
253 }
254
255 void decode(bufferlist::iterator& bl) {
256 DECODE_START(1, bl);
257 ::decode(time, bl);
258 DECODE_FINISH(bl);
259 }
260
261 void dump(Formatter *f) const;
262 };
263 WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
264
265 struct RGWUsageBatch {
266 map<ceph::real_time, rgw_usage_log_entry> m;
267
268 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
269 bool exists = m.find(t) != m.end();
270 *account = !exists;
271 m[t].aggregate(entry);
272 }
273 };
274
275 struct RGWUsageIter {
276 string read_iter;
277 uint32_t index;
278
279 RGWUsageIter() : index(0) {}
280 };
281
282 class RGWGetDataCB {
283 protected:
284 uint64_t extra_data_len;
285 public:
286 virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
287 RGWGetDataCB() : extra_data_len(0) {}
288 virtual ~RGWGetDataCB() {}
289 virtual void set_extra_data_len(uint64_t len) {
290 extra_data_len = len;
291 }
292 /**
293 * Flushes any cached data. Used by RGWGetObjFilter.
294 * Return logic same as handle_data.
295 */
296 virtual int flush() {
297 return 0;
298 }
299 /**
300 * Allows to extend fetch range of RGW object. Used by RGWGetObjFilter.
301 */
302 virtual int fixup_range(off_t& bl_ofs, off_t& bl_end) {
303 return 0;
304 }
305 };
306
307 class RGWAccessListFilter {
308 public:
309 virtual ~RGWAccessListFilter() {}
310 virtual bool filter(string& name, string& key) = 0;
311 };
312
313 struct RGWCloneRangeInfo {
314 rgw_obj src;
315 off_t src_ofs;
316 off_t dst_ofs;
317 uint64_t len;
318 };
319
320 struct RGWObjManifestPart {
321 rgw_obj loc; /* the object where the data is located */
322 uint64_t loc_ofs; /* the offset at that object where the data is located */
323 uint64_t size; /* the part size */
324
325 RGWObjManifestPart() : loc_ofs(0), size(0) {}
326
327 void encode(bufferlist& bl) const {
328 ENCODE_START(2, 2, bl);
329 ::encode(loc, bl);
330 ::encode(loc_ofs, bl);
331 ::encode(size, bl);
332 ENCODE_FINISH(bl);
333 }
334
335 void decode(bufferlist::iterator& bl) {
336 DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
337 ::decode(loc, bl);
338 ::decode(loc_ofs, bl);
339 ::decode(size, bl);
340 DECODE_FINISH(bl);
341 }
342
343 void dump(Formatter *f) const;
344 static void generate_test_instances(list<RGWObjManifestPart*>& o);
345 };
346 WRITE_CLASS_ENCODER(RGWObjManifestPart)
347
348 /*
349 The manifest defines a set of rules for structuring the object parts.
350 There are a few terms to note:
351 - head: the head part of the object, which is the part that contains
352 the first chunk of data. An object might not have a head (as in the
353 case of multipart-part objects).
354 - stripe: data portion of a single rgw object that resides on a single
355 rados object.
356 - part: a collection of stripes that make a contiguous part of an
357 object. A regular object will only have one part (although might have
358 many stripes), a multipart object might have many parts. Each part
359 has a fixed stripe size, although the last stripe of a part might
360 be smaller than that. Consecutive parts may be merged if their stripe
361 value is the same.
362 */
363
364 struct RGWObjManifestRule {
365 uint32_t start_part_num;
366 uint64_t start_ofs;
367 uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */
368 uint64_t stripe_max_size; /* underlying obj max size */
369 string override_prefix;
370
371 RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
372 RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) :
373 start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {}
374
375 void encode(bufferlist& bl) const {
376 ENCODE_START(2, 1, bl);
377 ::encode(start_part_num, bl);
378 ::encode(start_ofs, bl);
379 ::encode(part_size, bl);
380 ::encode(stripe_max_size, bl);
381 ::encode(override_prefix, bl);
382 ENCODE_FINISH(bl);
383 }
384
385 void decode(bufferlist::iterator& bl) {
386 DECODE_START(2, bl);
387 ::decode(start_part_num, bl);
388 ::decode(start_ofs, bl);
389 ::decode(part_size, bl);
390 ::decode(stripe_max_size, bl);
391 if (struct_v >= 2)
392 ::decode(override_prefix, bl);
393 DECODE_FINISH(bl);
394 }
395 void dump(Formatter *f) const;
396 };
397 WRITE_CLASS_ENCODER(RGWObjManifestRule)
398
399 class RGWObjManifest {
400 protected:
401 bool explicit_objs; /* old manifest? */
402 map<uint64_t, RGWObjManifestPart> objs;
403
404 uint64_t obj_size;
405
406 rgw_obj obj;
407 uint64_t head_size;
408 string head_placement_rule;
409
410 uint64_t max_head_size;
411 string prefix;
412 rgw_bucket_placement tail_placement; /* might be different than the original bucket,
413 as object might have been copied across pools */
414 map<uint64_t, RGWObjManifestRule> rules;
415
416 string tail_instance; /* tail object's instance */
417
418 void convert_to_explicit(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
419 int append_explicit(RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
420 void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter, string *override_prefix);
421
422 void update_iterators() {
423 begin_iter.seek(0);
424 end_iter.seek(obj_size);
425 }
426 public:
427
428 RGWObjManifest() : explicit_objs(false), obj_size(0), head_size(0), max_head_size(0),
429 begin_iter(this), end_iter(this) {}
430 RGWObjManifest(const RGWObjManifest& rhs) {
431 *this = rhs;
432 }
433 RGWObjManifest& operator=(const RGWObjManifest& rhs) {
434 explicit_objs = rhs.explicit_objs;
435 objs = rhs.objs;
436 obj_size = rhs.obj_size;
437 obj = rhs.obj;
438 head_size = rhs.head_size;
439 max_head_size = rhs.max_head_size;
440 prefix = rhs.prefix;
441 tail_placement = rhs.tail_placement;
442 rules = rhs.rules;
443 tail_instance = rhs.tail_instance;
444
445 begin_iter.set_manifest(this);
446 end_iter.set_manifest(this);
447
448 begin_iter.seek(rhs.begin_iter.get_ofs());
449 end_iter.seek(rhs.end_iter.get_ofs());
450
451 return *this;
452 }
453
454 map<uint64_t, RGWObjManifestPart>& get_explicit_objs() {
455 return objs;
456 }
457
458
459 void set_explicit(uint64_t _size, map<uint64_t, RGWObjManifestPart>& _objs) {
460 explicit_objs = true;
461 obj_size = _size;
462 objs.swap(_objs);
463 }
464
465 void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj_select *location);
466
467 void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) {
468 RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size);
469 rules[0] = rule;
470 max_head_size = tail_ofs;
471 }
472
473 void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) {
474 RGWObjManifestRule rule(0, 0, 0, stripe_max_size);
475 rule.start_part_num = part_num;
476 rules[0] = rule;
477 max_head_size = 0;
478 }
479
480 void encode(bufferlist& bl) const {
481 ENCODE_START(7, 6, bl);
482 ::encode(obj_size, bl);
483 ::encode(objs, bl);
484 ::encode(explicit_objs, bl);
485 ::encode(obj, bl);
486 ::encode(head_size, bl);
487 ::encode(max_head_size, bl);
488 ::encode(prefix, bl);
489 ::encode(rules, bl);
490 bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket);
491 ::encode(encode_tail_bucket, bl);
492 if (encode_tail_bucket) {
493 ::encode(tail_placement.bucket, bl);
494 }
495 bool encode_tail_instance = (tail_instance != obj.key.instance);
496 ::encode(encode_tail_instance, bl);
497 if (encode_tail_instance) {
498 ::encode(tail_instance, bl);
499 }
500 ::encode(head_placement_rule, bl);
501 ::encode(tail_placement.placement_rule, bl);
502 ENCODE_FINISH(bl);
503 }
504
505 void decode(bufferlist::iterator& bl) {
506 DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl);
507 ::decode(obj_size, bl);
508 ::decode(objs, bl);
509 if (struct_v >= 3) {
510 ::decode(explicit_objs, bl);
511 ::decode(obj, bl);
512 ::decode(head_size, bl);
513 ::decode(max_head_size, bl);
514 ::decode(prefix, bl);
515 ::decode(rules, bl);
516 } else {
517 explicit_objs = true;
518 if (!objs.empty()) {
519 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
520 obj = iter->second.loc;
521 head_size = iter->second.size;
522 max_head_size = head_size;
523 }
524 }
525
526 if (explicit_objs && head_size > 0 && !objs.empty()) {
527 /* patch up manifest due to issue 16435:
528 * the first object in the explicit objs list might not be the one we need to access, use the
529 * head object instead if set. This would happen if we had an old object that was created
530 * when the explicit objs manifest was around, and it got copied.
531 */
532 rgw_obj& obj_0 = objs[0].loc;
533 if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) {
534 objs[0].loc = obj;
535 objs[0].size = head_size;
536 }
537 }
538
539 if (struct_v >= 4) {
540 if (struct_v < 6) {
541 ::decode(tail_placement.bucket, bl);
542 } else {
543 bool need_to_decode;
544 ::decode(need_to_decode, bl);
545 if (need_to_decode) {
546 ::decode(tail_placement.bucket, bl);
547 } else {
548 tail_placement.bucket = obj.bucket;
549 }
550 }
551 }
552
553 if (struct_v >= 5) {
554 if (struct_v < 6) {
555 ::decode(tail_instance, bl);
556 } else {
557 bool need_to_decode;
558 ::decode(need_to_decode, bl);
559 if (need_to_decode) {
560 ::decode(tail_instance, bl);
561 } else {
562 tail_instance = obj.key.instance;
563 }
564 }
565 } else { // old object created before 'tail_instance' field added to manifest
566 tail_instance = obj.key.instance;
567 }
568
569 if (struct_v >= 7) {
570 ::decode(head_placement_rule, bl);
571 ::decode(tail_placement.placement_rule, bl);
572 }
573
574 update_iterators();
575 DECODE_FINISH(bl);
576 }
577
578 void dump(Formatter *f) const;
579 static void generate_test_instances(list<RGWObjManifest*>& o);
580
581 int append(RGWObjManifest& m, RGWZoneGroup& zonegroup, RGWZoneParams& zone_params);
582 int append(RGWObjManifest& m, RGWRados *store);
583
584 bool get_rule(uint64_t ofs, RGWObjManifestRule *rule);
585
586 bool empty() {
587 if (explicit_objs)
588 return objs.empty();
589 return rules.empty();
590 }
591
592 bool has_explicit_objs() {
593 return explicit_objs;
594 }
595
596 bool has_tail() {
597 if (explicit_objs) {
598 if (objs.size() == 1) {
599 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
600 rgw_obj& o = iter->second.loc;
601 return !(obj == o);
602 }
603 return (objs.size() >= 2);
604 }
605 return (obj_size > head_size);
606 }
607
608 void set_head(const string& placement_rule, const rgw_obj& _o, uint64_t _s) {
609 head_placement_rule = placement_rule;
610 obj = _o;
611 head_size = _s;
612
613 if (explicit_objs && head_size > 0) {
614 objs[0].loc = obj;
615 objs[0].size = head_size;
616 }
617 }
618
619 const rgw_obj& get_obj() {
620 return obj;
621 }
622
623 void set_tail_placement(const string& placement_rule, const rgw_bucket& _b) {
624 tail_placement.placement_rule = placement_rule;
625 tail_placement.bucket = _b;
626 }
627
628 const rgw_bucket_placement& get_tail_placement() {
629 return tail_placement;
630 }
631
632 const string& get_head_placement_rule() {
633 return head_placement_rule;
634 }
635
636 void set_prefix(const string& _p) {
637 prefix = _p;
638 }
639
640 const string& get_prefix() {
641 return prefix;
642 }
643
644 void set_tail_instance(const string& _ti) {
645 tail_instance = _ti;
646 }
647
648 const string& get_tail_instance() {
649 return tail_instance;
650 }
651
652 void set_head_size(uint64_t _s) {
653 head_size = _s;
654 }
655
656 void set_obj_size(uint64_t s) {
657 obj_size = s;
658
659 update_iterators();
660 }
661
662 uint64_t get_obj_size() {
663 return obj_size;
664 }
665
666 uint64_t get_head_size() {
667 return head_size;
668 }
669
670 void set_max_head_size(uint64_t s) {
671 max_head_size = s;
672 }
673
674 uint64_t get_max_head_size() {
675 return max_head_size;
676 }
677
678 class obj_iterator {
679 RGWObjManifest *manifest;
680 uint64_t part_ofs; /* where current part starts */
681 uint64_t stripe_ofs; /* where current stripe starts */
682 uint64_t ofs; /* current position within the object */
683 uint64_t stripe_size; /* current part size */
684
685 int cur_part_id;
686 int cur_stripe;
687 string cur_override_prefix;
688
689 rgw_obj_select location;
690
691 map<uint64_t, RGWObjManifestRule>::iterator rule_iter;
692 map<uint64_t, RGWObjManifestRule>::iterator next_rule_iter;
693
694 map<uint64_t, RGWObjManifestPart>::iterator explicit_iter;
695
696 void init() {
697 part_ofs = 0;
698 stripe_ofs = 0;
699 ofs = 0;
700 stripe_size = 0;
701 cur_part_id = 0;
702 cur_stripe = 0;
703 }
704
705 void update_explicit_pos();
706
707
708 protected:
709
710 void set_manifest(RGWObjManifest *m) {
711 manifest = m;
712 }
713
714 public:
715 obj_iterator() : manifest(NULL) {
716 init();
717 }
718 explicit obj_iterator(RGWObjManifest *_m) : manifest(_m) {
719 init();
720 if (!manifest->empty()) {
721 seek(0);
722 }
723 }
724 obj_iterator(RGWObjManifest *_m, uint64_t _ofs) : manifest(_m) {
725 init();
726 if (!manifest->empty()) {
727 seek(_ofs);
728 }
729 }
730 void seek(uint64_t ofs);
731
732 void operator++();
733 bool operator==(const obj_iterator& rhs) {
734 return (ofs == rhs.ofs);
735 }
736 bool operator!=(const obj_iterator& rhs) {
737 return (ofs != rhs.ofs);
738 }
739 const rgw_obj_select& get_location() {
740 return location;
741 }
742
743 /* start of current stripe */
744 uint64_t get_stripe_ofs() {
745 if (manifest->explicit_objs) {
746 return explicit_iter->first;
747 }
748 return stripe_ofs;
749 }
750
751 /* current ofs relative to start of rgw object */
752 uint64_t get_ofs() const {
753 return ofs;
754 }
755
756 /* stripe number */
757 int get_cur_stripe() const {
758 return cur_stripe;
759 }
760
761 /* current stripe size */
762 uint64_t get_stripe_size() {
763 if (manifest->explicit_objs) {
764 return explicit_iter->second.size;
765 }
766 return stripe_size;
767 }
768
769 /* offset where data starts within current stripe */
770 uint64_t location_ofs() {
771 if (manifest->explicit_objs) {
772 return explicit_iter->second.loc_ofs;
773 }
774 return 0; /* all stripes start at zero offset */
775 }
776
777 void update_location();
778
779 friend class RGWObjManifest;
780 };
781
782 const obj_iterator& obj_begin();
783 const obj_iterator& obj_end();
784 obj_iterator obj_find(uint64_t ofs);
785
786 obj_iterator begin_iter;
787 obj_iterator end_iter;
788
789 /*
790 * simple object generator. Using a simple single rule manifest.
791 */
792 class generator {
793 RGWObjManifest *manifest;
794 uint64_t last_ofs;
795 uint64_t cur_part_ofs;
796 int cur_part_id;
797 int cur_stripe;
798 uint64_t cur_stripe_size;
799 string cur_oid;
800
801 string oid_prefix;
802
803 rgw_obj_select cur_obj;
804
805 RGWObjManifestRule rule;
806
807 public:
808 generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0),
809 cur_stripe(0), cur_stripe_size(0) {}
810 int create_begin(CephContext *cct, RGWObjManifest *manifest, const string& placement_rule, rgw_bucket& bucket, rgw_obj& obj);
811
812 int create_next(uint64_t ofs);
813
814 rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); }
815 rgw_raw_obj get_cur_obj(RGWRados *store) { return cur_obj.get_raw_obj(store); }
816
817 /* total max size of current stripe (including head obj) */
818 uint64_t cur_stripe_max_size() {
819 return cur_stripe_size;
820 }
821 };
822 };
823 WRITE_CLASS_ENCODER(RGWObjManifest)
824
825 struct RGWUploadPartInfo {
826 uint32_t num;
827 uint64_t size;
828 uint64_t accounted_size{0};
829 string etag;
830 ceph::real_time modified;
831 RGWObjManifest manifest;
832 RGWCompressionInfo cs_info;
833
834 RGWUploadPartInfo() : num(0), size(0) {}
835
836 void encode(bufferlist& bl) const {
837 ENCODE_START(4, 2, bl);
838 ::encode(num, bl);
839 ::encode(size, bl);
840 ::encode(etag, bl);
841 ::encode(modified, bl);
842 ::encode(manifest, bl);
843 ::encode(cs_info, bl);
844 ::encode(accounted_size, bl);
845 ENCODE_FINISH(bl);
846 }
847 void decode(bufferlist::iterator& bl) {
848 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
849 ::decode(num, bl);
850 ::decode(size, bl);
851 ::decode(etag, bl);
852 ::decode(modified, bl);
853 if (struct_v >= 3)
854 ::decode(manifest, bl);
855 if (struct_v >= 4) {
856 ::decode(cs_info, bl);
857 ::decode(accounted_size, bl);
858 } else {
859 accounted_size = size;
860 }
861 DECODE_FINISH(bl);
862 }
863 void dump(Formatter *f) const;
864 static void generate_test_instances(list<RGWUploadPartInfo*>& o);
865 };
866 WRITE_CLASS_ENCODER(RGWUploadPartInfo)
867
868 struct RGWObjState {
869 rgw_obj obj;
870 bool is_atomic;
871 bool has_attrs;
872 bool exists;
873 uint64_t size; //< size of raw object
874 uint64_t accounted_size{0}; //< size before compression, encryption
875 ceph::real_time mtime;
876 uint64_t epoch;
877 bufferlist obj_tag;
878 bufferlist tail_tag;
879 string write_tag;
880 bool fake_tag;
881 RGWObjManifest manifest;
882 bool has_manifest;
883 string shadow_obj;
884 bool has_data;
885 bufferlist data;
886 bool prefetch_data;
887 bool keep_tail;
888 bool is_olh;
889 bufferlist olh_tag;
890 uint64_t pg_ver;
891 uint32_t zone_short_id;
892
893 /* important! don't forget to update copy constructor */
894
895 RGWObjVersionTracker objv_tracker;
896
897 map<string, bufferlist> attrset;
898 RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
899 size(0), epoch(0), fake_tag(false), has_manifest(false),
900 has_data(false), prefetch_data(false), keep_tail(false), is_olh(false),
901 pg_ver(0), zone_short_id(0) {}
902 RGWObjState(const RGWObjState& rhs) : obj (rhs.obj) {
903 is_atomic = rhs.is_atomic;
904 has_attrs = rhs.has_attrs;
905 exists = rhs.exists;
906 size = rhs.size;
907 accounted_size = rhs.accounted_size;
908 mtime = rhs.mtime;
909 epoch = rhs.epoch;
910 if (rhs.obj_tag.length()) {
911 obj_tag = rhs.obj_tag;
912 }
913 if (rhs.tail_tag.length()) {
914 tail_tag = rhs.tail_tag;
915 }
916 write_tag = rhs.write_tag;
917 fake_tag = rhs.fake_tag;
918 if (rhs.has_manifest) {
919 manifest = rhs.manifest;
920 }
921 has_manifest = rhs.has_manifest;
922 shadow_obj = rhs.shadow_obj;
923 has_data = rhs.has_data;
924 if (rhs.data.length()) {
925 data = rhs.data;
926 }
927 prefetch_data = rhs.prefetch_data;
928 keep_tail = rhs.keep_tail;
929 is_olh = rhs.is_olh;
930 objv_tracker = rhs.objv_tracker;
931 pg_ver = rhs.pg_ver;
932 }
933
934 bool get_attr(string name, bufferlist& dest) {
935 map<string, bufferlist>::iterator iter = attrset.find(name);
936 if (iter != attrset.end()) {
937 dest = iter->second;
938 return true;
939 }
940 return false;
941 }
942 };
943
944 struct RGWRawObjState {
945 rgw_raw_obj obj;
946 bool has_attrs{false};
947 bool exists{false};
948 uint64_t size{0};
949 ceph::real_time mtime;
950 uint64_t epoch;
951 bufferlist obj_tag;
952 bool has_data{false};
953 bufferlist data;
954 bool prefetch_data{false};
955 uint64_t pg_ver{0};
956
957 /* important! don't forget to update copy constructor */
958
959 RGWObjVersionTracker objv_tracker;
960
961 map<string, bufferlist> attrset;
962 RGWRawObjState() {}
963 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
964 has_attrs = rhs.has_attrs;
965 exists = rhs.exists;
966 size = rhs.size;
967 mtime = rhs.mtime;
968 epoch = rhs.epoch;
969 if (rhs.obj_tag.length()) {
970 obj_tag = rhs.obj_tag;
971 }
972 has_data = rhs.has_data;
973 if (rhs.data.length()) {
974 data = rhs.data;
975 }
976 prefetch_data = rhs.prefetch_data;
977 pg_ver = rhs.pg_ver;
978 objv_tracker = rhs.objv_tracker;
979 }
980 };
981
982 struct RGWPoolIterCtx {
983 librados::IoCtx io_ctx;
984 librados::NObjectIterator iter;
985 };
986
987 struct RGWListRawObjsCtx {
988 bool initialized;
989 RGWPoolIterCtx iter_ctx;
990
991 RGWListRawObjsCtx() : initialized(false) {}
992 };
993
994 struct RGWDefaultSystemMetaObjInfo {
995 string default_id;
996
997 void encode(bufferlist& bl) const {
998 ENCODE_START(1, 1, bl);
999 ::encode(default_id, bl);
1000 ENCODE_FINISH(bl);
1001 }
1002
1003 void decode(bufferlist::iterator& bl) {
1004 DECODE_START(1, bl);
1005 ::decode(default_id, bl);
1006 DECODE_FINISH(bl);
1007 }
1008
1009 void dump(Formatter *f) const;
1010 void decode_json(JSONObj *obj);
1011 };
1012 WRITE_CLASS_ENCODER(RGWDefaultSystemMetaObjInfo)
1013
1014 struct RGWNameToId {
1015 string obj_id;
1016
1017 void encode(bufferlist& bl) const {
1018 ENCODE_START(1, 1, bl);
1019 ::encode(obj_id, bl);
1020 ENCODE_FINISH(bl);
1021 }
1022
1023 void decode(bufferlist::iterator& bl) {
1024 DECODE_START(1, bl);
1025 ::decode(obj_id, bl);
1026 DECODE_FINISH(bl);
1027 }
1028
1029 void dump(Formatter *f) const;
1030 void decode_json(JSONObj *obj);
1031 };
1032 WRITE_CLASS_ENCODER(RGWNameToId)
1033
1034 class RGWSystemMetaObj {
1035 protected:
1036 string id;
1037 string name;
1038
1039 CephContext *cct;
1040 RGWRados *store;
1041
1042 int store_name(bool exclusive);
1043 int store_info(bool exclusive);
1044 int read_info(const string& obj_id, bool old_format = false);
1045 int read_id(const string& obj_name, string& obj_id);
1046 int read_default(RGWDefaultSystemMetaObjInfo& default_info,
1047 const string& oid);
1048 /* read and use default id */
1049 int use_default(bool old_format = false);
1050
1051 public:
1052 RGWSystemMetaObj() : cct(NULL), store(NULL) {}
1053 RGWSystemMetaObj(const string& _name): name(_name), cct(NULL), store(NULL) {}
1054 RGWSystemMetaObj(const string& _id, const string& _name) : id(_id), name(_name), cct(NULL), store(NULL) {}
1055 RGWSystemMetaObj(CephContext *_cct, RGWRados *_store): cct(_cct), store(_store){}
1056 RGWSystemMetaObj(const string& _name, CephContext *_cct, RGWRados *_store): name(_name), cct(_cct), store(_store){}
1057 const string& get_name() const { return name; }
1058 const string& get_id() const { return id; }
1059
1060 void set_name(const string& _name) { name = _name;}
1061 void set_id(const string& _id) { id = _id;}
1062 void clear_id() { id.clear(); }
1063
1064 virtual ~RGWSystemMetaObj() {}
1065
1066 virtual void encode(bufferlist& bl) const {
1067 ENCODE_START(1, 1, bl);
1068 ::encode(id, bl);
1069 ::encode(name, bl);
1070 ENCODE_FINISH(bl);
1071 }
1072
1073 virtual void decode(bufferlist::iterator& bl) {
1074 DECODE_START(1, bl);
1075 ::decode(id, bl);
1076 ::decode(name, bl);
1077 DECODE_FINISH(bl);
1078 }
1079
1080 void reinit_instance(CephContext *_cct, RGWRados *_store) {
1081 cct = _cct;
1082 store = _store;
1083 }
1084 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true, bool old_format = false);
1085 virtual int read_default_id(string& default_id, bool old_format = false);
1086 virtual int set_as_default(bool exclusive = false);
1087 int delete_default();
1088 virtual int create(bool exclusive = true);
1089 int delete_obj(bool old_format = false);
1090 int rename(const string& new_name);
1091 int update() { return store_info(false);}
1092 int update_name() { return store_name(false);}
1093 int read();
1094 int write(bool exclusive);
1095
1096 virtual rgw_pool get_pool(CephContext *cct) = 0;
1097 virtual const string get_default_oid(bool old_format = false) = 0;
1098 virtual const string& get_names_oid_prefix() = 0;
1099 virtual const string& get_info_oid_prefix(bool old_format = false) = 0;
1100 virtual const string& get_predefined_name(CephContext *cct) = 0;
1101
1102 void dump(Formatter *f) const;
1103 void decode_json(JSONObj *obj);
1104 };
1105 WRITE_CLASS_ENCODER(RGWSystemMetaObj)
1106
1107 struct RGWZonePlacementInfo {
1108 rgw_pool index_pool;
1109 rgw_pool data_pool;
1110 rgw_pool data_extra_pool; /* if not set we should use data_pool */
1111 RGWBucketIndexType index_type;
1112 std::string compression_type;
1113
1114 RGWZonePlacementInfo() : index_type(RGWBIType_Normal) {}
1115
1116 void encode(bufferlist& bl) const {
1117 ENCODE_START(6, 1, bl);
1118 ::encode(index_pool.to_str(), bl);
1119 ::encode(data_pool.to_str(), bl);
1120 ::encode(data_extra_pool.to_str(), bl);
1121 ::encode((uint32_t)index_type, bl);
1122 ::encode(compression_type, bl);
1123 ENCODE_FINISH(bl);
1124 }
1125
1126 void decode(bufferlist::iterator& bl) {
1127 DECODE_START(6, bl);
1128 string index_pool_str;
1129 string data_pool_str;
1130 ::decode(index_pool_str, bl);
1131 index_pool = rgw_pool(index_pool_str);
1132 ::decode(data_pool_str, bl);
1133 data_pool = rgw_pool(data_pool_str);
1134 if (struct_v >= 4) {
1135 string data_extra_pool_str;
1136 ::decode(data_extra_pool_str, bl);
1137 data_extra_pool = rgw_pool(data_extra_pool_str);
1138 }
1139 if (struct_v >= 5) {
1140 uint32_t it;
1141 ::decode(it, bl);
1142 index_type = (RGWBucketIndexType)it;
1143 }
1144 if (struct_v >= 6) {
1145 ::decode(compression_type, bl);
1146 }
1147 DECODE_FINISH(bl);
1148 }
1149 const rgw_pool& get_data_extra_pool() const {
1150 if (data_extra_pool.empty()) {
1151 return data_pool;
1152 }
1153 return data_extra_pool;
1154 }
1155 void dump(Formatter *f) const;
1156 void decode_json(JSONObj *obj);
1157 };
1158 WRITE_CLASS_ENCODER(RGWZonePlacementInfo)
1159
1160 struct RGWZoneParams : RGWSystemMetaObj {
1161 rgw_pool domain_root;
1162 rgw_pool metadata_heap;
1163 rgw_pool control_pool;
1164 rgw_pool gc_pool;
1165 rgw_pool lc_pool;
1166 rgw_pool log_pool;
1167 rgw_pool intent_log_pool;
1168 rgw_pool usage_log_pool;
1169
1170 rgw_pool user_keys_pool;
1171 rgw_pool user_email_pool;
1172 rgw_pool user_swift_pool;
1173 rgw_pool user_uid_pool;
1174 rgw_pool roles_pool;
1175 rgw_pool reshard_pool;
1176
1177 RGWAccessKey system_key;
1178
1179 map<string, RGWZonePlacementInfo> placement_pools;
1180
1181 string realm_id;
1182
1183 map<string, string, ltstr_nocase> tier_config;
1184
1185 RGWZoneParams() : RGWSystemMetaObj() {}
1186 RGWZoneParams(const string& name) : RGWSystemMetaObj(name){}
1187 RGWZoneParams(const string& id, const string& name) : RGWSystemMetaObj(id, name) {}
1188 RGWZoneParams(const string& id, const string& name, const string& _realm_id)
1189 : RGWSystemMetaObj(id, name), realm_id(_realm_id) {}
1190
1191 rgw_pool get_pool(CephContext *cct);
1192 const string get_default_oid(bool old_format = false) override;
1193 const string& get_names_oid_prefix() override;
1194 const string& get_info_oid_prefix(bool old_format = false) override;
1195 const string& get_predefined_name(CephContext *cct) override;
1196
1197 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true,
1198 bool old_format = false);
1199 using RGWSystemMetaObj::init;
1200 int read_default_id(string& default_id, bool old_format = false) override;
1201 int set_as_default(bool exclusive = false) override;
1202 int create_default(bool old_format = false);
1203 int create(bool exclusive = true) override;
1204 int fix_pool_names();
1205
1206 const string& get_compression_type(const string& placement_rule) const;
1207
1208 void encode(bufferlist& bl) const override {
1209 ENCODE_START(10, 1, bl);
1210 ::encode(domain_root, bl);
1211 ::encode(control_pool, bl);
1212 ::encode(gc_pool, bl);
1213 ::encode(log_pool, bl);
1214 ::encode(intent_log_pool, bl);
1215 ::encode(usage_log_pool, bl);
1216 ::encode(user_keys_pool, bl);
1217 ::encode(user_email_pool, bl);
1218 ::encode(user_swift_pool, bl);
1219 ::encode(user_uid_pool, bl);
1220 RGWSystemMetaObj::encode(bl);
1221 ::encode(system_key, bl);
1222 ::encode(placement_pools, bl);
1223 ::encode(metadata_heap, bl);
1224 ::encode(realm_id, bl);
1225 ::encode(lc_pool, bl);
1226 ::encode(tier_config, bl);
1227 ::encode(roles_pool, bl);
1228 ::encode(reshard_pool, bl);
1229 ENCODE_FINISH(bl);
1230 }
1231
1232 void decode(bufferlist::iterator& bl) override {
1233 DECODE_START(10, bl);
1234 ::decode(domain_root, bl);
1235 ::decode(control_pool, bl);
1236 ::decode(gc_pool, bl);
1237 ::decode(log_pool, bl);
1238 ::decode(intent_log_pool, bl);
1239 ::decode(usage_log_pool, bl);
1240 ::decode(user_keys_pool, bl);
1241 ::decode(user_email_pool, bl);
1242 ::decode(user_swift_pool, bl);
1243 ::decode(user_uid_pool, bl);
1244 if (struct_v >= 6) {
1245 RGWSystemMetaObj::decode(bl);
1246 } else if (struct_v >= 2) {
1247 ::decode(name, bl);
1248 id = name;
1249 }
1250 if (struct_v >= 3)
1251 ::decode(system_key, bl);
1252 if (struct_v >= 4)
1253 ::decode(placement_pools, bl);
1254 if (struct_v >= 5)
1255 ::decode(metadata_heap, bl);
1256 if (struct_v >= 6) {
1257 ::decode(realm_id, bl);
1258 }
1259 if (struct_v >= 7) {
1260 ::decode(lc_pool, bl);
1261 } else {
1262 lc_pool = log_pool.name + ":lc";
1263 }
1264 if (struct_v >= 8) {
1265 ::decode(tier_config, bl);
1266 }
1267 if (struct_v >= 9) {
1268 ::decode(roles_pool, bl);
1269 } else {
1270 roles_pool = name + ".rgw.meta:roles";
1271 }
1272 if (struct_v >= 10) {
1273 ::decode(reshard_pool, bl);
1274 } else {
1275 reshard_pool = log_pool.name + ":reshard";
1276 }
1277 DECODE_FINISH(bl);
1278 }
1279 void dump(Formatter *f) const;
1280 void decode_json(JSONObj *obj);
1281 static void generate_test_instances(list<RGWZoneParams*>& o);
1282
1283 bool get_placement(const string& placement_id, RGWZonePlacementInfo *placement) const {
1284 auto iter = placement_pools.find(placement_id);
1285 if (iter == placement_pools.end()) {
1286 return false;
1287 }
1288 *placement = iter->second;
1289 return true;
1290 }
1291
1292 /*
1293 * return data pool of the head object
1294 */
1295 bool get_head_data_pool(const string& placement_id, const rgw_obj& obj, rgw_pool *pool) const {
1296 const rgw_data_placement_target& explicit_placement = obj.bucket.explicit_placement;
1297 if (!explicit_placement.data_pool.empty()) {
1298 if (!obj.in_extra_data) {
1299 *pool = explicit_placement.data_pool;
1300 } else {
1301 *pool = explicit_placement.get_data_extra_pool();
1302 }
1303 return true;
1304 }
1305 if (placement_id.empty()) {
1306 return false;
1307 }
1308 auto iter = placement_pools.find(placement_id);
1309 if (iter == placement_pools.end()) {
1310 return false;
1311 }
1312 if (!obj.in_extra_data) {
1313 *pool = iter->second.data_pool;
1314 } else {
1315 *pool = iter->second.get_data_extra_pool();
1316 }
1317 return true;
1318 }
1319 };
1320 WRITE_CLASS_ENCODER(RGWZoneParams)
1321
1322 struct RGWZone {
1323 string id;
1324 string name;
1325 list<string> endpoints;
1326 bool log_meta;
1327 bool log_data;
1328 bool read_only;
1329 string tier_type;
1330
1331 /**
1332 * Represents the number of shards for the bucket index object, a value of zero
1333 * indicates there is no sharding. By default (no sharding, the name of the object
1334 * is '.dir.{marker}', with sharding, the name is '.dir.{marker}.{sharding_id}',
1335 * sharding_id is zero-based value. It is not recommended to set a too large value
1336 * (e.g. thousand) as it increases the cost for bucket listing.
1337 */
1338 uint32_t bucket_index_max_shards;
1339
1340 bool sync_from_all;
1341 set<string> sync_from; /* list of zones to sync from */
1342
1343 RGWZone() : log_meta(false), log_data(false), read_only(false), bucket_index_max_shards(0),
1344 sync_from_all(true) {}
1345
1346 void encode(bufferlist& bl) const {
1347 ENCODE_START(6, 1, bl);
1348 ::encode(name, bl);
1349 ::encode(endpoints, bl);
1350 ::encode(log_meta, bl);
1351 ::encode(log_data, bl);
1352 ::encode(bucket_index_max_shards, bl);
1353 ::encode(id, bl);
1354 ::encode(read_only, bl);
1355 ::encode(tier_type, bl);
1356 ::encode(sync_from_all, bl);
1357 ::encode(sync_from, bl);
1358 ENCODE_FINISH(bl);
1359 }
1360
1361 void decode(bufferlist::iterator& bl) {
1362 DECODE_START(6, bl);
1363 ::decode(name, bl);
1364 if (struct_v < 4) {
1365 id = name;
1366 }
1367 ::decode(endpoints, bl);
1368 if (struct_v >= 2) {
1369 ::decode(log_meta, bl);
1370 ::decode(log_data, bl);
1371 }
1372 if (struct_v >= 3) {
1373 ::decode(bucket_index_max_shards, bl);
1374 }
1375 if (struct_v >= 4) {
1376 ::decode(id, bl);
1377 ::decode(read_only, bl);
1378 }
1379 if (struct_v >= 5) {
1380 ::decode(tier_type, bl);
1381 }
1382 if (struct_v >= 6) {
1383 ::decode(sync_from_all, bl);
1384 ::decode(sync_from, bl);
1385 }
1386 DECODE_FINISH(bl);
1387 }
1388 void dump(Formatter *f) const;
1389 void decode_json(JSONObj *obj);
1390 static void generate_test_instances(list<RGWZone*>& o);
1391
1392 bool is_read_only() { return read_only; }
1393
1394 bool syncs_from(const string& zone_id) {
1395 return (sync_from_all || sync_from.find(zone_id) != sync_from.end());
1396 }
1397 };
1398 WRITE_CLASS_ENCODER(RGWZone)
1399
1400 struct RGWDefaultZoneGroupInfo {
1401 string default_zonegroup;
1402
1403 void encode(bufferlist& bl) const {
1404 ENCODE_START(1, 1, bl);
1405 ::encode(default_zonegroup, bl);
1406 ENCODE_FINISH(bl);
1407 }
1408
1409 void decode(bufferlist::iterator& bl) {
1410 DECODE_START(1, bl);
1411 ::decode(default_zonegroup, bl);
1412 DECODE_FINISH(bl);
1413 }
1414 void dump(Formatter *f) const;
1415 void decode_json(JSONObj *obj);
1416 //todo: implement ceph-dencoder
1417 };
1418 WRITE_CLASS_ENCODER(RGWDefaultZoneGroupInfo)
1419
1420 struct RGWZoneGroupPlacementTarget {
1421 string name;
1422 set<string> tags;
1423
1424 bool user_permitted(list<string>& user_tags) const {
1425 if (tags.empty()) {
1426 return true;
1427 }
1428 for (auto& rule : user_tags) {
1429 if (tags.find(rule) != tags.end()) {
1430 return true;
1431 }
1432 }
1433 return false;
1434 }
1435
1436 void encode(bufferlist& bl) const {
1437 ENCODE_START(1, 1, bl);
1438 ::encode(name, bl);
1439 ::encode(tags, bl);
1440 ENCODE_FINISH(bl);
1441 }
1442
1443 void decode(bufferlist::iterator& bl) {
1444 DECODE_START(1, bl);
1445 ::decode(name, bl);
1446 ::decode(tags, bl);
1447 DECODE_FINISH(bl);
1448 }
1449 void dump(Formatter *f) const;
1450 void decode_json(JSONObj *obj);
1451 };
1452 WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTarget)
1453
1454
1455 struct RGWZoneGroup : public RGWSystemMetaObj {
1456 string api_name;
1457 list<string> endpoints;
1458 bool is_master;
1459
1460 string master_zone;
1461 map<string, RGWZone> zones;
1462
1463 map<string, RGWZoneGroupPlacementTarget> placement_targets;
1464 string default_placement;
1465
1466 list<string> hostnames;
1467 list<string> hostnames_s3website;
1468 // TODO: Maybe convert hostnames to a map<string,list<string>> for
1469 // endpoint_type->hostnames
1470 /*
1471 20:05 < _robbat21irssi> maybe I do someting like: if (hostname_map.empty()) { populate all map keys from hostnames; };
1472 20:05 < _robbat21irssi> but that's a later compatability migration planning bit
1473 20:06 < yehudasa> more like if (!hostnames.empty()) {
1474 20:06 < yehudasa> for (list<string>::iterator iter = hostnames.begin(); iter != hostnames.end(); ++iter) {
1475 20:06 < yehudasa> hostname_map["s3"].append(iter->second);
1476 20:07 < yehudasa> hostname_map["s3website"].append(iter->second);
1477 20:07 < yehudasa> s/append/push_back/g
1478 20:08 < _robbat21irssi> inner loop over APIs
1479 20:08 < yehudasa> yeah, probably
1480 20:08 < _robbat21irssi> s3, s3website, swift, swith_auth, swift_website
1481 */
1482 map<string, list<string> > api_hostname_map;
1483 map<string, list<string> > api_endpoints_map;
1484
1485 string realm_id;
1486
1487 RGWZoneGroup(): is_master(false){}
1488 RGWZoneGroup(const std::string &id, const std::string &name):RGWSystemMetaObj(id, name) {}
1489 RGWZoneGroup(const std::string &_name):RGWSystemMetaObj(_name) {}
1490 RGWZoneGroup(const std::string &_name, bool _is_master, CephContext *cct, RGWRados* store,
1491 const string& _realm_id, const list<string>& _endpoints)
1492 : RGWSystemMetaObj(_name, cct , store), endpoints(_endpoints), is_master(_is_master),
1493 realm_id(_realm_id) {}
1494
1495 bool is_master_zonegroup() const { return is_master;}
1496 void update_master(bool _is_master) {
1497 is_master = _is_master;
1498 post_process_params();
1499 }
1500 void post_process_params();
1501
1502 void encode(bufferlist& bl) const override {
1503 ENCODE_START(4, 1, bl);
1504 ::encode(name, bl);
1505 ::encode(api_name, bl);
1506 ::encode(is_master, bl);
1507 ::encode(endpoints, bl);
1508 ::encode(master_zone, bl);
1509 ::encode(zones, bl);
1510 ::encode(placement_targets, bl);
1511 ::encode(default_placement, bl);
1512 ::encode(hostnames, bl);
1513 ::encode(hostnames_s3website, bl);
1514 RGWSystemMetaObj::encode(bl);
1515 ::encode(realm_id, bl);
1516 ENCODE_FINISH(bl);
1517 }
1518
1519 void decode(bufferlist::iterator& bl) override {
1520 DECODE_START(4, bl);
1521 ::decode(name, bl);
1522 ::decode(api_name, bl);
1523 ::decode(is_master, bl);
1524 ::decode(endpoints, bl);
1525 ::decode(master_zone, bl);
1526 ::decode(zones, bl);
1527 ::decode(placement_targets, bl);
1528 ::decode(default_placement, bl);
1529 if (struct_v >= 2) {
1530 ::decode(hostnames, bl);
1531 }
1532 if (struct_v >= 3) {
1533 ::decode(hostnames_s3website, bl);
1534 }
1535 if (struct_v >= 4) {
1536 RGWSystemMetaObj::decode(bl);
1537 ::decode(realm_id, bl);
1538 } else {
1539 id = name;
1540 }
1541 DECODE_FINISH(bl);
1542 }
1543
1544 int read_default_id(string& default_id, bool old_format = false) override;
1545 int set_as_default(bool exclusive = false) override;
1546 int create_default(bool old_format = false);
1547 int equals(const string& other_zonegroup) const;
1548 int add_zone(const RGWZoneParams& zone_params, bool *is_master, bool *read_only,
1549 const list<string>& endpoints, const string *ptier_type,
1550 bool *psync_from_all, list<string>& sync_from, list<string>& sync_from_rm);
1551 int remove_zone(const std::string& zone_id);
1552 int rename_zone(const RGWZoneParams& zone_params);
1553 rgw_pool get_pool(CephContext *cct);
1554 const string get_default_oid(bool old_region_format = false) override;
1555 const string& get_info_oid_prefix(bool old_region_format = false) override;
1556 const string& get_names_oid_prefix() override;
1557 const string& get_predefined_name(CephContext *cct) override;
1558
1559 void dump(Formatter *f) const;
1560 void decode_json(JSONObj *obj);
1561 static void generate_test_instances(list<RGWZoneGroup*>& o);
1562 };
1563 WRITE_CLASS_ENCODER(RGWZoneGroup)
1564
1565 struct RGWPeriodMap
1566 {
1567 string id;
1568 map<string, RGWZoneGroup> zonegroups;
1569 map<string, RGWZoneGroup> zonegroups_by_api;
1570 map<string, uint32_t> short_zone_ids;
1571
1572 string master_zonegroup;
1573
1574 void encode(bufferlist& bl) const;
1575 void decode(bufferlist::iterator& bl);
1576
1577 int update(const RGWZoneGroup& zonegroup, CephContext *cct);
1578
1579 void dump(Formatter *f) const;
1580 void decode_json(JSONObj *obj);
1581
1582 void reset() {
1583 zonegroups.clear();
1584 zonegroups_by_api.clear();
1585 master_zonegroup.clear();
1586 }
1587
1588 uint32_t get_zone_short_id(const string& zone_id) const;
1589 };
1590 WRITE_CLASS_ENCODER(RGWPeriodMap)
1591
1592 struct RGWPeriodConfig
1593 {
1594 RGWQuotaInfo bucket_quota;
1595 RGWQuotaInfo user_quota;
1596
1597 void encode(bufferlist& bl) const {
1598 ENCODE_START(1, 1, bl);
1599 ::encode(bucket_quota, bl);
1600 ::encode(user_quota, bl);
1601 ENCODE_FINISH(bl);
1602 }
1603
1604 void decode(bufferlist::iterator& bl) {
1605 DECODE_START(1, bl);
1606 ::decode(bucket_quota, bl);
1607 ::decode(user_quota, bl);
1608 DECODE_FINISH(bl);
1609 }
1610
1611 void dump(Formatter *f) const;
1612 void decode_json(JSONObj *obj);
1613
1614 // the period config must be stored in a local object outside of the period,
1615 // so that it can be used in a default configuration where no realm/period
1616 // exists
1617 int read(RGWRados *store, const std::string& realm_id);
1618 int write(RGWRados *store, const std::string& realm_id);
1619
1620 static std::string get_oid(const std::string& realm_id);
1621 static rgw_pool get_pool(CephContext *cct);
1622 };
1623 WRITE_CLASS_ENCODER(RGWPeriodConfig)
1624
1625 /* for backward comaptability */
1626 struct RGWRegionMap {
1627
1628 map<string, RGWZoneGroup> regions;
1629
1630 string master_region;
1631
1632 RGWQuotaInfo bucket_quota;
1633 RGWQuotaInfo user_quota;
1634
1635 void encode(bufferlist& bl) const;
1636 void decode(bufferlist::iterator& bl);
1637
1638 void dump(Formatter *f) const;
1639 void decode_json(JSONObj *obj);
1640 };
1641 WRITE_CLASS_ENCODER(RGWRegionMap)
1642
1643 struct RGWZoneGroupMap {
1644
1645 map<string, RGWZoneGroup> zonegroups;
1646 map<string, RGWZoneGroup> zonegroups_by_api;
1647
1648 string master_zonegroup;
1649
1650 RGWQuotaInfo bucket_quota;
1651 RGWQuotaInfo user_quota;
1652
1653 /* constract the map */
1654 int read(CephContext *cct, RGWRados *store);
1655
1656 void encode(bufferlist& bl) const;
1657 void decode(bufferlist::iterator& bl);
1658
1659 void dump(Formatter *f) const;
1660 void decode_json(JSONObj *obj);
1661 };
1662 WRITE_CLASS_ENCODER(RGWZoneGroupMap)
1663
1664 class RGWRealm;
1665
1666 struct objexp_hint_entry {
1667 string tenant;
1668 string bucket_name;
1669 string bucket_id;
1670 rgw_obj_key obj_key;
1671 ceph::real_time exp_time;
1672
1673 void encode(bufferlist& bl) const {
1674 ENCODE_START(2, 1, bl);
1675 ::encode(bucket_name, bl);
1676 ::encode(bucket_id, bl);
1677 ::encode(obj_key, bl);
1678 ::encode(exp_time, bl);
1679 ::encode(tenant, bl);
1680 ENCODE_FINISH(bl);
1681 }
1682
1683 void decode(bufferlist::iterator& bl) {
1684 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
1685 DECODE_START(2, bl);
1686 ::decode(bucket_name, bl);
1687 ::decode(bucket_id, bl);
1688 ::decode(obj_key, bl);
1689 ::decode(exp_time, bl);
1690 if (struct_v >= 2) {
1691 ::decode(tenant, bl);
1692 } else {
1693 tenant.clear();
1694 }
1695 DECODE_FINISH(bl);
1696 }
1697 };
1698 WRITE_CLASS_ENCODER(objexp_hint_entry)
1699
1700 class RGWPeriod;
1701
1702 class RGWRealm : public RGWSystemMetaObj
1703 {
1704 string current_period;
1705 epoch_t epoch{0}; //< realm epoch, incremented for each new period
1706
1707 int create_control(bool exclusive);
1708 int delete_control();
1709 public:
1710 RGWRealm() {}
1711 RGWRealm(const string& _id, const string& _name = "") : RGWSystemMetaObj(_id, _name) {}
1712 RGWRealm(CephContext *_cct, RGWRados *_store): RGWSystemMetaObj(_cct, _store) {}
1713 RGWRealm(const string& _name, CephContext *_cct, RGWRados *_store): RGWSystemMetaObj(_name, _cct, _store){}
1714
1715 void encode(bufferlist& bl) const override {
1716 ENCODE_START(1, 1, bl);
1717 RGWSystemMetaObj::encode(bl);
1718 ::encode(current_period, bl);
1719 ::encode(epoch, bl);
1720 ENCODE_FINISH(bl);
1721 }
1722
1723 void decode(bufferlist::iterator& bl) override {
1724 DECODE_START(1, bl);
1725 RGWSystemMetaObj::decode(bl);
1726 ::decode(current_period, bl);
1727 ::decode(epoch, bl);
1728 DECODE_FINISH(bl);
1729 }
1730
1731 int create(bool exclusive = true) override;
1732 int delete_obj();
1733 rgw_pool get_pool(CephContext *cct);
1734 const string get_default_oid(bool old_format = false) override;
1735 const string& get_names_oid_prefix() override;
1736 const string& get_info_oid_prefix(bool old_format = false) override;
1737 const string& get_predefined_name(CephContext *cct) override;
1738
1739 using RGWSystemMetaObj::read_id; // expose as public for radosgw-admin
1740
1741 void dump(Formatter *f) const;
1742 void decode_json(JSONObj *obj);
1743
1744 const string& get_current_period() const {
1745 return current_period;
1746 }
1747 int set_current_period(RGWPeriod& period);
1748 void clear_current_period_and_epoch() {
1749 current_period.clear();
1750 epoch = 0;
1751 }
1752 epoch_t get_epoch() const { return epoch; }
1753
1754 string get_control_oid();
1755 /// send a notify on the realm control object
1756 int notify_zone(bufferlist& bl);
1757 /// notify the zone of a new period
1758 int notify_new_period(const RGWPeriod& period);
1759 };
1760 WRITE_CLASS_ENCODER(RGWRealm)
1761
1762 struct RGWPeriodLatestEpochInfo {
1763 epoch_t epoch;
1764
1765 void encode(bufferlist& bl) const {
1766 ENCODE_START(1, 1, bl);
1767 ::encode(epoch, bl);
1768 ENCODE_FINISH(bl);
1769 }
1770
1771 void decode(bufferlist::iterator& bl) {
1772 DECODE_START(1, bl);
1773 ::decode(epoch, bl);
1774 DECODE_FINISH(bl);
1775 }
1776
1777 void dump(Formatter *f) const;
1778 void decode_json(JSONObj *obj);
1779 };
1780 WRITE_CLASS_ENCODER(RGWPeriodLatestEpochInfo)
1781
1782 class RGWPeriod
1783 {
1784 string id;
1785 epoch_t epoch;
1786 string predecessor_uuid;
1787 std::vector<std::string> sync_status;
1788 RGWPeriodMap period_map;
1789 RGWPeriodConfig period_config;
1790 string master_zonegroup;
1791 string master_zone;
1792
1793 string realm_id;
1794 string realm_name;
1795 epoch_t realm_epoch{1}; //< realm epoch when period was made current
1796
1797 CephContext *cct;
1798 RGWRados *store;
1799
1800 int read_info();
1801 int read_latest_epoch(RGWPeriodLatestEpochInfo& epoch_info,
1802 RGWObjVersionTracker *objv = nullptr);
1803 int use_latest_epoch();
1804 int use_current_period();
1805
1806 const string get_period_oid();
1807 const string get_period_oid_prefix();
1808
1809 // gather the metadata sync status for each shard; only for use on master zone
1810 int update_sync_status(const RGWPeriod &current_period,
1811 std::ostream& error_stream, bool force_if_stale);
1812
1813 public:
1814 RGWPeriod() : epoch(0), cct(NULL), store(NULL) {}
1815
1816 RGWPeriod(const string& period_id, epoch_t _epoch = 0)
1817 : id(period_id), epoch(_epoch),
1818 cct(NULL), store(NULL) {}
1819
1820 const string& get_id() const { return id; }
1821 epoch_t get_epoch() const { return epoch; }
1822 epoch_t get_realm_epoch() const { return realm_epoch; }
1823 const string& get_predecessor() const { return predecessor_uuid; }
1824 const string& get_master_zone() const { return master_zone; }
1825 const string& get_master_zonegroup() const { return master_zonegroup; }
1826 const string& get_realm() const { return realm_id; }
1827 const RGWPeriodMap& get_map() const { return period_map; }
1828 RGWPeriodConfig& get_config() { return period_config; }
1829 const RGWPeriodConfig& get_config() const { return period_config; }
1830 const std::vector<std::string>& get_sync_status() const { return sync_status; }
1831 rgw_pool get_pool(CephContext *cct);
1832 const string& get_latest_epoch_oid();
1833 const string& get_info_oid_prefix();
1834
1835 void set_user_quota(RGWQuotaInfo& user_quota) {
1836 period_config.user_quota = user_quota;
1837 }
1838
1839 void set_bucket_quota(RGWQuotaInfo& bucket_quota) {
1840 period_config.bucket_quota = bucket_quota;
1841 }
1842
1843 void set_id(const string& id) {
1844 this->id = id;
1845 period_map.id = id;
1846 }
1847 void set_epoch(epoch_t epoch) { this->epoch = epoch; }
1848 void set_realm_epoch(epoch_t epoch) { realm_epoch = epoch; }
1849
1850 void set_predecessor(const string& predecessor)
1851 {
1852 predecessor_uuid = predecessor;
1853 }
1854
1855 void set_realm_id(const string& _realm_id) {
1856 realm_id = _realm_id;
1857 }
1858
1859 int reflect();
1860
1861 int get_zonegroup(RGWZoneGroup& zonegroup,
1862 const string& zonegroup_id);
1863
1864 bool is_single_zonegroup() const
1865 {
1866 return (period_map.zonegroups.size() == 1);
1867 }
1868
1869 /*
1870 returns true if there are several zone groups with a least one zone
1871 */
1872 bool is_multi_zonegroups_with_zones()
1873 {
1874 int count = 0;
1875 for (const auto& zg: period_map.zonegroups) {
1876 if (zg.second.zones.size() > 0) {
1877 if (count++ > 0) {
1878 return true;
1879 }
1880 }
1881 }
1882 return false;
1883 }
1884
1885 int get_latest_epoch(epoch_t& epoch);
1886 int set_latest_epoch(epoch_t epoch, bool exclusive = false,
1887 RGWObjVersionTracker *objv = nullptr);
1888 // update latest_epoch if the given epoch is higher, else return -EEXIST
1889 int update_latest_epoch(epoch_t epoch);
1890
1891 int init(CephContext *_cct, RGWRados *_store, const string &period_realm_id, const string &period_realm_name = "",
1892 bool setup_obj = true);
1893 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true);
1894
1895 int create(bool exclusive = true);
1896 int delete_obj();
1897 int store_info(bool exclusive);
1898 int add_zonegroup(const RGWZoneGroup& zonegroup);
1899
1900 void fork();
1901 int update();
1902
1903 // commit a staging period; only for use on master zone
1904 int commit(RGWRealm& realm, const RGWPeriod &current_period,
1905 std::ostream& error_stream, bool force_if_stale = false);
1906
1907 void encode(bufferlist& bl) const {
1908 ENCODE_START(1, 1, bl);
1909 ::encode(id, bl);
1910 ::encode(epoch, bl);
1911 ::encode(realm_epoch, bl);
1912 ::encode(predecessor_uuid, bl);
1913 ::encode(sync_status, bl);
1914 ::encode(period_map, bl);
1915 ::encode(master_zone, bl);
1916 ::encode(master_zonegroup, bl);
1917 ::encode(period_config, bl);
1918 ::encode(realm_id, bl);
1919 ::encode(realm_name, bl);
1920 ENCODE_FINISH(bl);
1921 }
1922
1923 void decode(bufferlist::iterator& bl) {
1924 DECODE_START(1, bl);
1925 ::decode(id, bl);
1926 ::decode(epoch, bl);
1927 ::decode(realm_epoch, bl);
1928 ::decode(predecessor_uuid, bl);
1929 ::decode(sync_status, bl);
1930 ::decode(period_map, bl);
1931 ::decode(master_zone, bl);
1932 ::decode(master_zonegroup, bl);
1933 ::decode(period_config, bl);
1934 ::decode(realm_id, bl);
1935 ::decode(realm_name, bl);
1936 DECODE_FINISH(bl);
1937 }
1938 void dump(Formatter *f) const;
1939 void decode_json(JSONObj *obj);
1940
1941 static string get_staging_id(const string& realm_id) {
1942 return realm_id + ":staging";
1943 }
1944 };
1945 WRITE_CLASS_ENCODER(RGWPeriod)
1946
1947 class RGWDataChangesLog;
1948 class RGWMetaSyncStatusManager;
1949 class RGWDataSyncStatusManager;
1950 class RGWReplicaLogger;
1951 class RGWCoroutinesManagerRegistry;
1952
1953 class RGWStateLog {
1954 RGWRados *store;
1955 int num_shards;
1956 string module_name;
1957
1958 void oid_str(int shard, string& oid);
1959 int get_shard_num(const string& object);
1960 string get_oid(const string& object);
1961 int open_ioctx(librados::IoCtx& ioctx);
1962
1963 struct list_state {
1964 int cur_shard;
1965 int max_shard;
1966 string marker;
1967 string client_id;
1968 string op_id;
1969 string object;
1970
1971 list_state() : cur_shard(0), max_shard(0) {}
1972 };
1973
1974 protected:
1975 virtual bool dump_entry_internal(const cls_statelog_entry& entry, Formatter *f) {
1976 return false;
1977 }
1978
1979 public:
1980 RGWStateLog(RGWRados *_store, int _num_shards, const string& _module_name) :
1981 store(_store), num_shards(_num_shards), module_name(_module_name) {}
1982 virtual ~RGWStateLog() {}
1983
1984 int store_entry(const string& client_id, const string& op_id, const string& object,
1985 uint32_t state, bufferlist *bl, uint32_t *check_state);
1986
1987 int remove_entry(const string& client_id, const string& op_id, const string& object);
1988
1989 void init_list_entries(const string& client_id, const string& op_id, const string& object,
1990 void **handle);
1991
1992 int list_entries(void *handle, int max_entries, list<cls_statelog_entry>& entries, bool *done);
1993
1994 void finish_list_entries(void *handle);
1995
1996 virtual void dump_entry(const cls_statelog_entry& entry, Formatter *f);
1997 };
1998
1999 /*
2000 * state transitions:
2001 *
2002 * unknown -> in-progress -> complete
2003 * -> error
2004 *
2005 * user can try setting the 'abort' state, and it can only succeed if state is
2006 * in-progress.
2007 *
2008 * state renewal cannot switch state (stays in the same state)
2009 *
2010 * rgw can switch from in-progress to complete
2011 * rgw can switch from in-progress to error
2012 *
2013 * rgw can switch from abort to cancelled
2014 *
2015 */
2016
2017 class RGWOpState : public RGWStateLog {
2018 protected:
2019 bool dump_entry_internal(const cls_statelog_entry& entry, Formatter *f) override;
2020 public:
2021
2022 enum OpState {
2023 OPSTATE_UNKNOWN = 0,
2024 OPSTATE_IN_PROGRESS = 1,
2025 OPSTATE_COMPLETE = 2,
2026 OPSTATE_ERROR = 3,
2027 OPSTATE_ABORT = 4,
2028 OPSTATE_CANCELLED = 5,
2029 };
2030
2031 explicit RGWOpState(RGWRados *_store);
2032
2033 int state_from_str(const string& s, OpState *state);
2034 int set_state(const string& client_id, const string& op_id, const string& object, OpState state);
2035 int renew_state(const string& client_id, const string& op_id, const string& object, OpState state);
2036 };
2037
2038 class RGWOpStateSingleOp
2039 {
2040 RGWOpState os;
2041 string client_id;
2042 string op_id;
2043 string object;
2044
2045 CephContext *cct;
2046
2047 RGWOpState::OpState cur_state;
2048 ceph::real_time last_update;
2049
2050 public:
2051 RGWOpStateSingleOp(RGWRados *store, const string& cid, const string& oid, const string& obj);
2052
2053 int set_state(RGWOpState::OpState state);
2054 int renew_state();
2055 };
2056
2057 class RGWGetBucketStats_CB : public RefCountedObject {
2058 protected:
2059 rgw_bucket bucket;
2060 map<RGWObjCategory, RGWStorageStats> *stats;
2061 public:
2062 explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
2063 ~RGWGetBucketStats_CB() override {}
2064 virtual void handle_response(int r) = 0;
2065 virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
2066 stats = _stats;
2067 }
2068 };
2069
2070 class RGWGetUserStats_CB : public RefCountedObject {
2071 protected:
2072 rgw_user user;
2073 RGWStorageStats stats;
2074 public:
2075 explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
2076 ~RGWGetUserStats_CB() override {}
2077 virtual void handle_response(int r) = 0;
2078 virtual void set_response(RGWStorageStats& _stats) {
2079 stats = _stats;
2080 }
2081 };
2082
2083 class RGWGetDirHeader_CB;
2084 class RGWGetUserHeader_CB;
2085
2086 struct rgw_rados_ref {
2087 rgw_pool pool;
2088 string oid;
2089 string key;
2090 librados::IoCtx ioctx;
2091 };
2092
2093 class RGWChainedCache {
2094 public:
2095 virtual ~RGWChainedCache() {}
2096 virtual void chain_cb(const string& key, void *data) = 0;
2097 virtual void invalidate(const string& key) = 0;
2098 virtual void invalidate_all() = 0;
2099
2100 struct Entry {
2101 RGWChainedCache *cache;
2102 const string& key;
2103 void *data;
2104
2105 Entry(RGWChainedCache *_c, const string& _k, void *_d) : cache(_c), key(_k), data(_d) {}
2106 };
2107 };
2108
2109 template <class T, class S>
2110 class RGWObjectCtxImpl {
2111 RGWRados *store;
2112 std::map<T, S> objs_state;
2113 RWLock lock;
2114
2115 public:
2116 RGWObjectCtxImpl(RGWRados *_store) : store(_store), lock("RGWObjectCtxImpl") {}
2117
2118 S *get_state(const T& obj) {
2119 S *result;
2120 typename std::map<T, S>::iterator iter;
2121 lock.get_read();
2122 assert (!obj.empty());
2123 iter = objs_state.find(obj);
2124 if (iter != objs_state.end()) {
2125 result = &iter->second;
2126 lock.unlock();
2127 } else {
2128 lock.unlock();
2129 lock.get_write();
2130 result = &objs_state[obj];
2131 lock.unlock();
2132 }
2133 return result;
2134 }
2135
2136 void set_atomic(T& obj) {
2137 RWLock::WLocker wl(lock);
2138 assert (!obj.empty());
2139 objs_state[obj].is_atomic = true;
2140 }
2141 void set_prefetch_data(T& obj) {
2142 RWLock::WLocker wl(lock);
2143 assert (!obj.empty());
2144 objs_state[obj].prefetch_data = true;
2145 }
2146 void invalidate(T& obj) {
2147 RWLock::WLocker wl(lock);
2148 auto iter = objs_state.find(obj);
2149 if (iter == objs_state.end()) {
2150 return;
2151 }
2152 bool is_atomic = iter->second.is_atomic;
2153 bool prefetch_data = iter->second.prefetch_data;
2154
2155 objs_state.erase(iter);
2156
2157 if (is_atomic || prefetch_data) {
2158 auto& s = objs_state[obj];
2159 s.is_atomic = is_atomic;
2160 s.prefetch_data = prefetch_data;
2161 }
2162 }
2163 };
2164
2165 template<>
2166 void RGWObjectCtxImpl<rgw_obj, RGWObjState>::invalidate(rgw_obj& obj);
2167
2168 template<>
2169 void RGWObjectCtxImpl<rgw_raw_obj, RGWRawObjState>::invalidate(rgw_raw_obj& obj);
2170
2171 struct RGWObjectCtx {
2172 RGWRados *store;
2173 void *user_ctx;
2174
2175 RGWObjectCtxImpl<rgw_obj, RGWObjState> obj;
2176 RGWObjectCtxImpl<rgw_raw_obj, RGWRawObjState> raw;
2177
2178 explicit RGWObjectCtx(RGWRados *_store) : store(_store), user_ctx(NULL), obj(store), raw(store) { }
2179 RGWObjectCtx(RGWRados *_store, void *_user_ctx) : store(_store), user_ctx(_user_ctx), obj(store), raw(store) { }
2180 };
2181
2182 class Finisher;
2183 class RGWAsyncRadosProcessor;
2184
2185 template <class T>
2186 class RGWChainedCacheImpl;
2187
2188 struct bucket_info_entry {
2189 RGWBucketInfo info;
2190 real_time mtime;
2191 map<string, bufferlist> attrs;
2192 };
2193
2194 struct tombstone_entry {
2195 ceph::real_time mtime;
2196 uint32_t zone_short_id;
2197 uint64_t pg_ver;
2198
2199 tombstone_entry() = default;
2200 tombstone_entry(const RGWObjState& state)
2201 : mtime(state.mtime), zone_short_id(state.zone_short_id),
2202 pg_ver(state.pg_ver) {}
2203 };
2204
2205 class RGWIndexCompletionManager;
2206
2207 class RGWRados
2208 {
2209 friend class RGWGC;
2210 friend class RGWMetaNotifier;
2211 friend class RGWDataNotifier;
2212 friend class RGWLC;
2213 friend class RGWObjectExpirer;
2214 friend class RGWMetaSyncProcessorThread;
2215 friend class RGWDataSyncProcessorThread;
2216 friend class RGWStateLog;
2217 friend class RGWReplicaLogger;
2218 friend class RGWReshard;
2219 friend class RGWBucketReshard;
2220 friend class BucketIndexLockGuard;
2221 friend class RGWCompleteMultipart;
2222
2223 /** Open the pool used as root for this gateway */
2224 int open_root_pool_ctx();
2225 int open_gc_pool_ctx();
2226 int open_lc_pool_ctx();
2227 int open_objexp_pool_ctx();
2228 int open_reshard_pool_ctx();
2229
2230 int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx);
2231 int open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx);
2232 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid);
2233 int open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2234 string& bucket_oid_base);
2235 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2236 const string& obj_key, string *bucket_obj, int *shard_id);
2237 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2238 int shard_id, string *bucket_obj);
2239 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2240 map<int, string>& bucket_objs, int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
2241 template<typename T>
2242 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2243 map<int, string>& oids, map<int, T>& bucket_objs,
2244 int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
2245 void build_bucket_index_marker(const string& shard_id_str, const string& shard_marker,
2246 string *marker);
2247
2248 void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result);
2249
2250 std::atomic<int64_t> max_req_id = { 0 };
2251 Mutex lock;
2252 Mutex watchers_lock;
2253 SafeTimer *timer;
2254
2255 RGWGC *gc;
2256 RGWLC *lc;
2257 RGWObjectExpirer *obj_expirer;
2258 bool use_gc_thread;
2259 bool use_lc_thread;
2260 bool quota_threads;
2261 bool run_sync_thread;
2262 bool run_reshard_thread;
2263
2264 RGWAsyncRadosProcessor* async_rados;
2265
2266 RGWMetaNotifier *meta_notifier;
2267 RGWDataNotifier *data_notifier;
2268 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
2269 map<string, RGWDataSyncProcessorThread *> data_sync_processor_threads;
2270
2271 boost::optional<rgw::BucketTrimManager> bucket_trim;
2272 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
2273
2274 Mutex meta_sync_thread_lock;
2275 Mutex data_sync_thread_lock;
2276
2277 int num_watchers;
2278 RGWWatcher **watchers;
2279 std::set<int> watchers_set;
2280 librados::IoCtx root_pool_ctx; // .rgw
2281 librados::IoCtx control_pool_ctx; // .rgw.control
2282 bool watch_initialized;
2283
2284 friend class RGWWatcher;
2285
2286 Mutex bucket_id_lock;
2287
2288 // This field represents the number of bucket index object shards
2289 uint32_t bucket_index_max_shards;
2290
2291 int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
2292 int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
2293 int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
2294 uint64_t max_bucket_id;
2295
2296 int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2297 RGWObjState *olh_state, RGWObjState **target_state);
2298 int get_system_obj_state_impl(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker);
2299 int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
2300 bool follow_olh, bool assume_noent = false);
2301 int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2302 librados::ObjectOperation& op, RGWObjState **state);
2303
2304 int update_placement_map();
2305 int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
2306
2307 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
2308 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
2309 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
2310 protected:
2311 CephContext *cct;
2312
2313 std::vector<librados::Rados> rados;
2314 uint32_t next_rados_handle;
2315 RWLock handle_lock;
2316 std::map<pthread_t, int> rados_map;
2317
2318 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
2319 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
2320
2321 using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
2322 tombstone_cache_t *obj_tombstone_cache;
2323
2324 librados::IoCtx gc_pool_ctx; // .rgw.gc
2325 librados::IoCtx lc_pool_ctx; // .rgw.lc
2326 librados::IoCtx objexp_pool_ctx;
2327 librados::IoCtx reshard_pool_ctx;
2328
2329 bool pools_initialized;
2330
2331 string trans_id_suffix;
2332
2333 RGWQuotaHandler *quota_handler;
2334
2335 Finisher *finisher;
2336
2337 RGWCoroutinesManagerRegistry *cr_registry;
2338
2339 RGWSyncModulesManager *sync_modules_manager{nullptr};
2340 RGWSyncModuleInstanceRef sync_module;
2341 bool writeable_zone{false};
2342
2343 RGWZoneGroup zonegroup;
2344 RGWZone zone_public_config; /* external zone params, e.g., entrypoints, log flags, etc. */
2345 RGWZoneParams zone_params; /* internal zone params, e.g., rados pools */
2346 uint32_t zone_short_id;
2347
2348 RGWPeriod current_period;
2349
2350 RGWIndexCompletionManager *index_completion_manager{nullptr};
2351 public:
2352 RGWRados() : lock("rados_timer_lock"), watchers_lock("watchers_lock"), timer(NULL),
2353 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
2354 run_sync_thread(false), run_reshard_thread(false), async_rados(nullptr), meta_notifier(NULL),
2355 data_notifier(NULL), meta_sync_processor_thread(NULL),
2356 meta_sync_thread_lock("meta_sync_thread_lock"), data_sync_thread_lock("data_sync_thread_lock"),
2357 num_watchers(0), watchers(NULL),
2358 watch_initialized(false),
2359 bucket_id_lock("rados_bucket_id"),
2360 bucket_index_max_shards(0),
2361 max_bucket_id(0), cct(NULL),
2362 next_rados_handle(0),
2363 handle_lock("rados_handle_lock"),
2364 binfo_cache(NULL), obj_tombstone_cache(nullptr),
2365 pools_initialized(false),
2366 quota_handler(NULL),
2367 finisher(NULL),
2368 cr_registry(NULL),
2369 zone_short_id(0),
2370 rest_master_conn(NULL),
2371 meta_mgr(NULL), data_log(NULL), reshard(NULL) {}
2372
2373 uint64_t get_new_req_id() {
2374 return ++max_req_id;
2375 }
2376
2377 librados::IoCtx* get_lc_pool_ctx() {
2378 return &lc_pool_ctx;
2379 }
2380 void set_context(CephContext *_cct) {
2381 cct = _cct;
2382 }
2383
2384 /**
2385 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
2386 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
2387 */
2388 void init_host_id() {
2389 /* uint64_t needs 16, two '-' separators and a trailing null */
2390 const string& zone_name = get_zone().name;
2391 const string& zonegroup_name = zonegroup.get_name();
2392 char charbuf[16 + zone_name.size() + zonegroup_name.size() + 2 + 1];
2393 snprintf(charbuf, sizeof(charbuf), "%llx-%s-%s", (unsigned long long)instance_id(), zone_name.c_str(), zonegroup_name.c_str());
2394 string s(charbuf);
2395 host_id = s;
2396 }
2397
2398 string host_id;
2399
2400 RGWRealm realm;
2401
2402 RGWRESTConn *rest_master_conn;
2403 map<string, RGWRESTConn *> zone_conn_map;
2404 map<string, RGWRESTConn *> zone_data_sync_from_map;
2405 map<string, RGWRESTConn *> zone_data_notify_to_map;
2406 map<string, RGWRESTConn *> zonegroup_conn_map;
2407
2408 map<string, string> zone_id_by_name;
2409 map<string, RGWZone> zone_by_id;
2410
2411 RGWRESTConn *get_zone_conn_by_id(const string& id) {
2412 auto citer = zone_conn_map.find(id);
2413 if (citer == zone_conn_map.end()) {
2414 return NULL;
2415 }
2416
2417 return citer->second;
2418 }
2419
2420 RGWRESTConn *get_zone_conn_by_name(const string& name) {
2421 auto i = zone_id_by_name.find(name);
2422 if (i == zone_id_by_name.end()) {
2423 return NULL;
2424 }
2425
2426 return get_zone_conn_by_id(i->second);
2427 }
2428
2429 bool find_zone_id_by_name(const string& name, string *id) {
2430 auto i = zone_id_by_name.find(name);
2431 if (i == zone_id_by_name.end()) {
2432 return false;
2433 }
2434 *id = i->second;
2435 return true;
2436 }
2437
2438 int get_zonegroup(const string& id, RGWZoneGroup& zonegroup) {
2439 int ret = 0;
2440 if (id == get_zonegroup().get_id()) {
2441 zonegroup = get_zonegroup();
2442 } else if (!current_period.get_id().empty()) {
2443 ret = current_period.get_zonegroup(zonegroup, id);
2444 }
2445 return ret;
2446 }
2447
2448 RGWRealm& get_realm() {
2449 return realm;
2450 }
2451
2452 RGWZoneParams& get_zone_params() { return zone_params; }
2453 RGWZoneGroup& get_zonegroup() {
2454 return zonegroup;
2455 }
2456 RGWZone& get_zone() {
2457 return zone_public_config;
2458 }
2459
2460 bool zone_is_writeable() {
2461 return writeable_zone && !get_zone().is_read_only();
2462 }
2463
2464 uint32_t get_zone_short_id() const {
2465 return zone_short_id;
2466 }
2467
2468 bool zone_syncs_from(RGWZone& target_zone, RGWZone& source_zone);
2469
2470 const RGWQuotaInfo& get_bucket_quota() {
2471 return current_period.get_config().bucket_quota;
2472 }
2473
2474 const RGWQuotaInfo& get_user_quota() {
2475 return current_period.get_config().user_quota;
2476 }
2477
2478 const string& get_current_period_id() {
2479 return current_period.get_id();
2480 }
2481
2482 bool has_zonegroup_api(const std::string& api) const {
2483 if (!current_period.get_id().empty()) {
2484 const auto& zonegroups_by_api = current_period.get_map().zonegroups_by_api;
2485 if (zonegroups_by_api.find(api) != zonegroups_by_api.end())
2486 return true;
2487 }
2488 return false;
2489 }
2490
2491 // pulls missing periods for period_history
2492 std::unique_ptr<RGWPeriodPuller> period_puller;
2493 // maintains a connected history of periods
2494 std::unique_ptr<RGWPeriodHistory> period_history;
2495
2496 RGWAsyncRadosProcessor* get_async_rados() const { return async_rados; };
2497
2498 RGWMetadataManager *meta_mgr;
2499
2500 RGWDataChangesLog *data_log;
2501
2502 RGWReshard *reshard;
2503 std::shared_ptr<RGWReshardWait> reshard_wait;
2504
2505 virtual ~RGWRados() = default;
2506
2507 tombstone_cache_t *get_tombstone_cache() {
2508 return obj_tombstone_cache;
2509 }
2510
2511 RGWSyncModulesManager *get_sync_modules_manager() {
2512 return sync_modules_manager;
2513 }
2514 const RGWSyncModuleInstanceRef& get_sync_module() {
2515 return sync_module;
2516 }
2517
2518 int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
2519 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size);
2520 int get_max_chunk_size(const string& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size);
2521
2522 uint32_t get_max_bucket_shards() {
2523 return rgw_shards_max();
2524 }
2525
2526
2527 int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
2528
2529 int list_raw_objects_init(const rgw_pool& pool, const string& marker, RGWListRawObjsCtx *ctx);
2530 int list_raw_objects_next(const string& prefix_filter, int max,
2531 RGWListRawObjsCtx& ctx, list<string>& oids,
2532 bool *is_truncated);
2533 int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
2534 RGWListRawObjsCtx& ctx, list<string>& oids,
2535 bool *is_truncated);
2536 string list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx);
2537
2538 int list_raw_prefixed_objs(const rgw_pool& pool, const string& prefix, list<string>& result);
2539 int list_zonegroups(list<string>& zonegroups);
2540 int list_regions(list<string>& regions);
2541 int list_zones(list<string>& zones);
2542 int list_realms(list<string>& realms);
2543 int list_periods(list<string>& periods);
2544 int list_periods(const string& current_period, list<string>& periods);
2545 void tick();
2546
2547 CephContext *ctx() { return cct; }
2548 /** do all necessary setup of the storage device */
2549 int initialize(CephContext *_cct, bool _use_gc_thread, bool _use_lc_thread, bool _quota_threads, bool _run_sync_thread, bool _run_reshard_thread) {
2550 set_context(_cct);
2551 use_gc_thread = _use_gc_thread;
2552 use_lc_thread = _use_lc_thread;
2553 quota_threads = _quota_threads;
2554 run_sync_thread = _run_sync_thread;
2555 run_reshard_thread = _run_reshard_thread;
2556 return initialize();
2557 }
2558 /** Initialize the RADOS instance and prepare to do other ops */
2559 virtual int init_rados();
2560 int init_zg_from_period(bool *initialized);
2561 int init_zg_from_local(bool *creating_defaults);
2562 int init_complete();
2563 int replace_region_with_zonegroup();
2564 int convert_regionmap();
2565 int initialize();
2566 void finalize();
2567
2568 int register_to_service_map(const string& daemon_type, const map<string, string>& meta);
2569
2570 void schedule_context(Context *c);
2571
2572 /** set up a bucket listing. handle is filled in. */
2573 int list_buckets_init(RGWAccessHandle *handle);
2574 /**
2575 * get the next bucket in the listing. obj is filled in,
2576 * handle is updated.
2577 */
2578 int list_buckets_next(rgw_bucket_dir_entry& obj, RGWAccessHandle *handle);
2579
2580 /// list logs
2581 int log_list_init(const string& prefix, RGWAccessHandle *handle);
2582 int log_list_next(RGWAccessHandle handle, string *name);
2583
2584 /// remove log
2585 int log_remove(const string& name);
2586
2587 /// show log
2588 int log_show_init(const string& name, RGWAccessHandle *handle);
2589 int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
2590
2591 // log bandwidth info
2592 int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
2593 int read_usage(const rgw_user& user, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
2594 bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage);
2595 int trim_usage(rgw_user& user, uint64_t start_epoch, uint64_t end_epoch);
2596
2597 int create_pool(const rgw_pool& pool);
2598
2599 int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
2600 int select_bucket_placement(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
2601 string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
2602 int select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info);
2603 int select_new_bucket_location(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
2604 string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
2605 int select_bucket_location_by_rule(const string& location_rule, RGWZonePlacementInfo *rule_info);
2606 void create_bucket_id(string *bucket_id);
2607
2608 bool get_obj_data_pool(const string& placement_rule, const rgw_obj& obj, rgw_pool *pool);
2609 bool obj_to_raw(const string& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
2610
2611 int create_bucket(RGWUserInfo& owner, rgw_bucket& bucket,
2612 const string& zonegroup_id,
2613 const string& placement_rule,
2614 const string& swift_ver_location,
2615 const RGWQuotaInfo * pquota_info,
2616 map<std::string,bufferlist>& attrs,
2617 RGWBucketInfo& bucket_info,
2618 obj_version *pobjv,
2619 obj_version *pep_objv,
2620 ceph::real_time creation_time,
2621 rgw_bucket *master_bucket,
2622 uint32_t *master_num_shards,
2623 bool exclusive = true);
2624 int add_bucket_placement(const rgw_pool& new_pool);
2625 int remove_bucket_placement(const rgw_pool& new_pool);
2626 int list_placement_set(set<rgw_pool>& names);
2627 int create_pools(vector<rgw_pool>& pools, vector<int>& retcodes);
2628
2629 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
2630
2631 class SystemObject {
2632 RGWRados *store;
2633 RGWObjectCtx& ctx;
2634 rgw_raw_obj obj;
2635
2636 RGWObjState *state;
2637
2638 protected:
2639 int get_state(RGWRawObjState **pstate, RGWObjVersionTracker *objv_tracker);
2640
2641 public:
2642 SystemObject(RGWRados *_store, RGWObjectCtx& _ctx, rgw_raw_obj& _obj) : store(_store), ctx(_ctx), obj(_obj), state(NULL) {}
2643
2644 void invalidate_state();
2645
2646 RGWRados *get_store() { return store; }
2647 rgw_raw_obj& get_obj() { return obj; }
2648 RGWObjectCtx& get_ctx() { return ctx; }
2649
2650 struct Read {
2651 RGWRados::SystemObject *source;
2652
2653 struct GetObjState {
2654 rgw_rados_ref ref;
2655 bool has_ref{false};
2656 uint64_t last_ver{0};
2657
2658 GetObjState() {}
2659
2660 int get_ref(RGWRados *store, rgw_raw_obj& obj, rgw_rados_ref **pref);
2661 } state;
2662
2663 struct StatParams {
2664 ceph::real_time *lastmod;
2665 uint64_t *obj_size;
2666 map<string, bufferlist> *attrs;
2667
2668 StatParams() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
2669 } stat_params;
2670
2671 struct ReadParams {
2672 rgw_cache_entry_info *cache_info{nullptr};
2673 map<string, bufferlist> *attrs;
2674
2675 ReadParams() : attrs(NULL) {}
2676 } read_params;
2677
2678 explicit Read(RGWRados::SystemObject *_source) : source(_source) {}
2679
2680 int stat(RGWObjVersionTracker *objv_tracker);
2681 int read(int64_t ofs, int64_t end, bufferlist& bl, RGWObjVersionTracker *objv_tracker,
2682 boost::optional<obj_version> refresh_version = boost::none);
2683 int get_attr(const char *name, bufferlist& dest);
2684 };
2685 };
2686
2687 struct BucketShard {
2688 RGWRados *store;
2689 rgw_bucket bucket;
2690 int shard_id;
2691 librados::IoCtx index_ctx;
2692 string bucket_obj;
2693
2694 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
2695 int init(const rgw_bucket& _bucket, const rgw_obj& obj);
2696 int init(const rgw_bucket& _bucket, int sid);
2697 int init(const RGWBucketInfo& bucket_info, int sid);
2698 };
2699
2700 class Object {
2701 RGWRados *store;
2702 RGWBucketInfo bucket_info;
2703 RGWObjectCtx& ctx;
2704 rgw_obj obj;
2705
2706 BucketShard bs;
2707
2708 RGWObjState *state;
2709
2710 bool versioning_disabled;
2711
2712 bool bs_initialized;
2713
2714 protected:
2715 int get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent = false);
2716 void invalidate_state();
2717
2718 int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
2719 const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail);
2720 int complete_atomic_modification();
2721
2722 public:
2723 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
2724 ctx(_ctx), obj(_obj), bs(store),
2725 state(NULL), versioning_disabled(false),
2726 bs_initialized(false) {}
2727
2728 RGWRados *get_store() { return store; }
2729 rgw_obj& get_obj() { return obj; }
2730 RGWObjectCtx& get_ctx() { return ctx; }
2731 RGWBucketInfo& get_bucket_info() { return bucket_info; }
2732 int get_manifest(RGWObjManifest **pmanifest);
2733
2734 int get_bucket_shard(BucketShard **pbs) {
2735 if (!bs_initialized) {
2736 int r = bs.init(bucket_info.bucket, obj);
2737 if (r < 0) {
2738 return r;
2739 }
2740 bs_initialized = true;
2741 }
2742 *pbs = &bs;
2743 return 0;
2744 }
2745
2746 void set_versioning_disabled(bool status) {
2747 versioning_disabled = status;
2748 }
2749
2750 bool versioning_enabled() {
2751 return (!versioning_disabled && bucket_info.versioning_enabled());
2752 }
2753
2754 struct Read {
2755 RGWRados::Object *source;
2756
2757 struct GetObjState {
2758 librados::IoCtx io_ctx;
2759 rgw_obj obj;
2760 rgw_raw_obj head_obj;
2761 } state;
2762
2763 struct ConditionParams {
2764 const ceph::real_time *mod_ptr;
2765 const ceph::real_time *unmod_ptr;
2766 bool high_precision_time;
2767 uint32_t mod_zone_id;
2768 uint64_t mod_pg_ver;
2769 const char *if_match;
2770 const char *if_nomatch;
2771
2772 ConditionParams() :
2773 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
2774 if_match(NULL), if_nomatch(NULL) {}
2775 } conds;
2776
2777 struct Params {
2778 ceph::real_time *lastmod;
2779 uint64_t *obj_size;
2780 map<string, bufferlist> *attrs;
2781
2782 Params() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
2783 } params;
2784
2785 explicit Read(RGWRados::Object *_source) : source(_source) {}
2786
2787 int prepare();
2788 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
2789 int read(int64_t ofs, int64_t end, bufferlist& bl);
2790 int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb);
2791 int get_attr(const char *name, bufferlist& dest);
2792 };
2793
2794 struct Write {
2795 RGWRados::Object *target;
2796
2797 struct MetaParams {
2798 ceph::real_time *mtime;
2799 map<std::string, bufferlist>* rmattrs;
2800 const bufferlist *data;
2801 RGWObjManifest *manifest;
2802 const string *ptag;
2803 list<rgw_obj_index_key> *remove_objs;
2804 ceph::real_time set_mtime;
2805 rgw_user owner;
2806 RGWObjCategory category;
2807 int flags;
2808 const char *if_match;
2809 const char *if_nomatch;
2810 uint64_t olh_epoch;
2811 ceph::real_time delete_at;
2812 bool canceled;
2813 const string *user_data;
2814 rgw_zone_set *zones_trace;
2815 bool modify_tail;
2816 bool completeMultipart;
2817
2818 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
2819 remove_objs(NULL), category(RGW_OBJ_CATEGORY_MAIN), flags(0),
2820 if_match(NULL), if_nomatch(NULL), olh_epoch(0), canceled(false), user_data(nullptr), zones_trace(nullptr),
2821 modify_tail(false), completeMultipart(false) {}
2822 } meta;
2823
2824 explicit Write(RGWRados::Object *_target) : target(_target) {}
2825
2826 int _do_write_meta(uint64_t size, uint64_t accounted_size,
2827 map<std::string, bufferlist>& attrs,
2828 bool modify_tail, bool assume_noent,
2829 void *index_op);
2830 int write_meta(uint64_t size, uint64_t accounted_size,
2831 map<std::string, bufferlist>& attrs);
2832 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
2833 };
2834
2835 struct Delete {
2836 RGWRados::Object *target;
2837
2838 struct DeleteParams {
2839 rgw_user bucket_owner;
2840 int versioning_status;
2841 ACLOwner obj_owner; /* needed for creation of deletion marker */
2842 uint64_t olh_epoch;
2843 string marker_version_id;
2844 uint32_t bilog_flags;
2845 list<rgw_obj_index_key> *remove_objs;
2846 ceph::real_time expiration_time;
2847 ceph::real_time unmod_since;
2848 ceph::real_time mtime; /* for setting delete marker mtime */
2849 bool high_precision_time;
2850 rgw_zone_set *zones_trace;
2851
2852 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr) {}
2853 } params;
2854
2855 struct DeleteResult {
2856 bool delete_marker;
2857 string version_id;
2858
2859 DeleteResult() : delete_marker(false) {}
2860 } result;
2861
2862 explicit Delete(RGWRados::Object *_target) : target(_target) {}
2863
2864 int delete_obj();
2865 };
2866
2867 struct Stat {
2868 RGWRados::Object *source;
2869
2870 struct Result {
2871 rgw_obj obj;
2872 RGWObjManifest manifest;
2873 bool has_manifest;
2874 uint64_t size;
2875 struct timespec mtime;
2876 map<string, bufferlist> attrs;
2877
2878 Result() : has_manifest(false), size(0) {}
2879 } result;
2880
2881 struct State {
2882 librados::IoCtx io_ctx;
2883 librados::AioCompletion *completion;
2884 int ret;
2885
2886 State() : completion(NULL), ret(0) {}
2887 } state;
2888
2889
2890 explicit Stat(RGWRados::Object *_source) : source(_source) {}
2891
2892 int stat_async();
2893 int wait();
2894 int stat();
2895 private:
2896 int finish();
2897 };
2898 };
2899
2900 class Bucket {
2901 RGWRados *store;
2902 RGWBucketInfo bucket_info;
2903 rgw_bucket& bucket;
2904 int shard_id;
2905
2906 public:
2907 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
2908 shard_id(RGW_NO_SHARD) {}
2909 RGWRados *get_store() { return store; }
2910 rgw_bucket& get_bucket() { return bucket; }
2911 RGWBucketInfo& get_bucket_info() { return bucket_info; }
2912
2913 int update_bucket_id(const string& new_bucket_id);
2914
2915 int get_shard_id() { return shard_id; }
2916 void set_shard_id(int id) {
2917 shard_id = id;
2918 }
2919
2920 class UpdateIndex {
2921 RGWRados::Bucket *target;
2922 string optag;
2923 rgw_obj obj;
2924 uint16_t bilog_flags{0};
2925 BucketShard bs;
2926 bool bs_initialized{false};
2927 bool blind;
2928 bool prepared{false};
2929 rgw_zone_set *zones_trace{nullptr};
2930
2931 int init_bs() {
2932 int r = bs.init(target->get_bucket(), obj);
2933 if (r < 0) {
2934 return r;
2935 }
2936 bs_initialized = true;
2937 return 0;
2938 }
2939
2940 void invalidate_bs() {
2941 bs_initialized = false;
2942 }
2943
2944 int guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call);
2945 public:
2946
2947 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
2948 bs(target->get_store()) {
2949 blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
2950 }
2951
2952 int get_bucket_shard(BucketShard **pbs) {
2953 if (!bs_initialized) {
2954 int r = init_bs();
2955 if (r < 0) {
2956 return r;
2957 }
2958 }
2959 *pbs = &bs;
2960 return 0;
2961 }
2962
2963 void set_bilog_flags(uint16_t flags) {
2964 bilog_flags = flags;
2965 }
2966
2967 void set_zones_trace(rgw_zone_set *_zones_trace) {
2968 zones_trace = _zones_trace;
2969 }
2970
2971 int prepare(RGWModifyOp, const string *write_tag);
2972 int complete(int64_t poolid, uint64_t epoch, uint64_t size,
2973 uint64_t accounted_size, ceph::real_time& ut,
2974 const string& etag, const string& content_type,
2975 bufferlist *acl_bl, RGWObjCategory category,
2976 list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr);
2977 int complete_del(int64_t poolid, uint64_t epoch,
2978 ceph::real_time& removed_mtime, /* mtime of removed object */
2979 list<rgw_obj_index_key> *remove_objs);
2980 int cancel();
2981
2982 const string *get_optag() { return &optag; }
2983
2984 bool is_prepared() { return prepared; }
2985 };
2986
2987 struct List {
2988 RGWRados::Bucket *target;
2989 rgw_obj_key next_marker;
2990
2991 struct Params {
2992 string prefix;
2993 string delim;
2994 rgw_obj_key marker;
2995 rgw_obj_key end_marker;
2996 string ns;
2997 bool enforce_ns;
2998 RGWAccessListFilter *filter;
2999 bool list_versions;
3000
3001 Params() : enforce_ns(true), filter(NULL), list_versions(false) {}
3002 } params;
3003
3004 public:
3005 explicit List(RGWRados::Bucket *_target) : target(_target) {}
3006
3007 int list_objects(int64_t max, vector<rgw_bucket_dir_entry> *result, map<string, bool> *common_prefixes, bool *is_truncated);
3008 rgw_obj_key& get_next_marker() {
3009 return next_marker;
3010 }
3011 };
3012 };
3013
3014 /** Write/overwrite an object to the bucket storage. */
3015 virtual int put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, ceph::real_time *mtime,
3016 map<std::string, bufferlist>& attrs, int flags,
3017 bufferlist& data,
3018 RGWObjVersionTracker *objv_tracker,
3019 ceph::real_time set_mtime /* 0 for don't set */);
3020
3021 virtual int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
3022 off_t ofs, bool exclusive,
3023 RGWObjVersionTracker *objv_tracker = nullptr);
3024 int aio_put_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
3025 off_t ofs, bool exclusive, void **handle);
3026
3027 int put_system_obj(void *ctx, rgw_raw_obj& obj, const char *data, size_t len, bool exclusive,
3028 ceph::real_time *mtime, map<std::string, bufferlist>& attrs, RGWObjVersionTracker *objv_tracker,
3029 ceph::real_time set_mtime) {
3030 bufferlist bl;
3031 bl.append(data, len);
3032 int flags = PUT_OBJ_CREATE;
3033 if (exclusive)
3034 flags |= PUT_OBJ_EXCL;
3035
3036 return put_system_obj_impl(obj, len, mtime, attrs, flags, bl, objv_tracker, set_mtime);
3037 }
3038 int aio_wait(void *handle);
3039 bool aio_completed(void *handle);
3040
3041 int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
3042 const std::string& obj_prefix,
3043 const std::string& obj_delim,
3044 std::function<int(const rgw_bucket_dir_entry&)> handler);
3045
3046 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
3047 return bucket_info.has_swift_versioning() &&
3048 bucket_info.swift_ver_location.size();
3049 }
3050
3051 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
3052 const rgw_user& user, /* in */
3053 RGWBucketInfo& bucket_info, /* in */
3054 rgw_obj& obj); /* in */
3055 int swift_versioning_restore(RGWObjectCtx& obj_ctx, /* in/out */
3056 const rgw_user& user, /* in */
3057 RGWBucketInfo& bucket_info, /* in */
3058 rgw_obj& obj, /* in */
3059 bool& restored); /* out */
3060 int copy_obj_to_remote_dest(RGWObjState *astate,
3061 map<string, bufferlist>& src_attrs,
3062 RGWRados::Object::Read& read_op,
3063 const rgw_user& user_id,
3064 rgw_obj& dest_obj,
3065 ceph::real_time *mtime);
3066
3067 enum AttrsMod {
3068 ATTRSMOD_NONE = 0,
3069 ATTRSMOD_REPLACE = 1,
3070 ATTRSMOD_MERGE = 2
3071 };
3072
3073 int rewrite_obj(RGWBucketInfo& dest_bucket_info, rgw_obj& obj);
3074
3075 int stat_remote_obj(RGWObjectCtx& obj_ctx,
3076 const rgw_user& user_id,
3077 const string& client_id,
3078 req_info *info,
3079 const string& source_zone,
3080 rgw_obj& src_obj,
3081 RGWBucketInfo& src_bucket_info,
3082 real_time *src_mtime,
3083 uint64_t *psize,
3084 const real_time *mod_ptr,
3085 const real_time *unmod_ptr,
3086 bool high_precision_time,
3087 const char *if_match,
3088 const char *if_nomatch,
3089 map<string, bufferlist> *pattrs,
3090 string *version_id,
3091 string *ptag,
3092 string *petag);
3093
3094 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
3095 const rgw_user& user_id,
3096 const string& client_id,
3097 const string& op_id,
3098 bool record_op_state,
3099 req_info *info,
3100 const string& source_zone,
3101 rgw_obj& dest_obj,
3102 rgw_obj& src_obj,
3103 RGWBucketInfo& dest_bucket_info,
3104 RGWBucketInfo& src_bucket_info,
3105 ceph::real_time *src_mtime,
3106 ceph::real_time *mtime,
3107 const ceph::real_time *mod_ptr,
3108 const ceph::real_time *unmod_ptr,
3109 bool high_precision_time,
3110 const char *if_match,
3111 const char *if_nomatch,
3112 AttrsMod attrs_mod,
3113 bool copy_if_newer,
3114 map<string, bufferlist>& attrs,
3115 RGWObjCategory category,
3116 uint64_t olh_epoch,
3117 ceph::real_time delete_at,
3118 string *version_id,
3119 string *ptag,
3120 ceph::buffer::list *petag,
3121 void (*progress_cb)(off_t, void *),
3122 void *progress_data,
3123 rgw_zone_set *zones_trace= nullptr);
3124 /**
3125 * Copy an object.
3126 * dest_obj: the object to copy into
3127 * src_obj: the object to copy from
3128 * attrs: usage depends on attrs_mod parameter
3129 * attrs_mod: the modification mode of the attrs, may have the following values:
3130 * ATTRSMOD_NONE - the attributes of the source object will be
3131 * copied without modifications, attrs parameter is ignored;
3132 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
3133 * parameter, source object attributes are not copied;
3134 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
3135 * are overwritten by values contained in attrs parameter.
3136 * Returns: 0 on success, -ERR# otherwise.
3137 */
3138 int copy_obj(RGWObjectCtx& obj_ctx,
3139 const rgw_user& user_id,
3140 const string& client_id,
3141 const string& op_id,
3142 req_info *info,
3143 const string& source_zone,
3144 rgw_obj& dest_obj,
3145 rgw_obj& src_obj,
3146 RGWBucketInfo& dest_bucket_info,
3147 RGWBucketInfo& src_bucket_info,
3148 ceph::real_time *src_mtime,
3149 ceph::real_time *mtime,
3150 const ceph::real_time *mod_ptr,
3151 const ceph::real_time *unmod_ptr,
3152 bool high_precision_time,
3153 const char *if_match,
3154 const char *if_nomatch,
3155 AttrsMod attrs_mod,
3156 bool copy_if_newer,
3157 map<std::string, bufferlist>& attrs,
3158 RGWObjCategory category,
3159 uint64_t olh_epoch,
3160 ceph::real_time delete_at,
3161 string *version_id,
3162 string *ptag,
3163 ceph::buffer::list *petag,
3164 void (*progress_cb)(off_t, void *),
3165 void *progress_data);
3166
3167 int copy_obj_data(RGWObjectCtx& obj_ctx,
3168 RGWBucketInfo& dest_bucket_info,
3169 RGWRados::Object::Read& read_op, off_t end,
3170 rgw_obj& dest_obj,
3171 rgw_obj& src_obj,
3172 uint64_t max_chunk_size,
3173 ceph::real_time *mtime,
3174 ceph::real_time set_mtime,
3175 map<string, bufferlist>& attrs,
3176 RGWObjCategory category,
3177 uint64_t olh_epoch,
3178 ceph::real_time delete_at,
3179 string *version_id,
3180 string *ptag,
3181 ceph::buffer::list *petag);
3182
3183 int check_bucket_empty(RGWBucketInfo& bucket_info);
3184
3185 /**
3186 * Delete a bucket.
3187 * bucket: the name of the bucket to delete
3188 * Returns 0 on success, -ERR# otherwise.
3189 */
3190 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty = true);
3191
3192 bool is_meta_master();
3193
3194 /**
3195 * Check to see if the bucket metadata is synced
3196 */
3197 bool is_syncing_bucket_meta(const rgw_bucket& bucket);
3198 void wakeup_meta_sync_shards(set<int>& shard_ids);
3199 void wakeup_data_sync_shards(const string& source_zone, map<int, set<string> >& shard_ids);
3200
3201 RGWMetaSyncStatusManager* get_meta_sync_manager();
3202 RGWDataSyncStatusManager* get_data_sync_manager(const std::string& source_zone);
3203
3204 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
3205 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
3206 int bucket_suspended(rgw_bucket& bucket, bool *suspended);
3207
3208 /** Delete an object.*/
3209 int delete_obj(RGWObjectCtx& obj_ctx,
3210 const RGWBucketInfo& bucket_owner,
3211 const rgw_obj& src_obj,
3212 int versioning_status,
3213 uint16_t bilog_flags = 0,
3214 const ceph::real_time& expiration_time = ceph::real_time(),
3215 rgw_zone_set *zones_trace = nullptr);
3216
3217 /** Delete a raw object.*/
3218 int delete_raw_obj(const rgw_raw_obj& obj);
3219
3220 /* Delete a system object */
3221 virtual int delete_system_obj(rgw_raw_obj& src_obj, RGWObjVersionTracker *objv_tracker = NULL);
3222
3223 /** Remove an object from the bucket index */
3224 int delete_obj_index(const rgw_obj& obj);
3225
3226 /**
3227 * Get an attribute for a system object.
3228 * obj: the object to get attr
3229 * name: name of the attr to retrieve
3230 * dest: bufferlist to store the result in
3231 * Returns: 0 on success, -ERR# otherwise.
3232 */
3233 virtual int system_obj_get_attr(rgw_raw_obj& obj, const char *name, bufferlist& dest);
3234
3235 int system_obj_set_attr(void *ctx, rgw_raw_obj& obj, const char *name, bufferlist& bl,
3236 RGWObjVersionTracker *objv_tracker);
3237 virtual int system_obj_set_attrs(void *ctx, rgw_raw_obj& obj,
3238 map<string, bufferlist>& attrs,
3239 map<string, bufferlist>* rmattrs,
3240 RGWObjVersionTracker *objv_tracker);
3241
3242 /**
3243 * Set an attr on an object.
3244 * bucket: name of the bucket holding the object
3245 * obj: name of the object to set the attr on
3246 * name: the attr to set
3247 * bl: the contents of the attr
3248 * Returns: 0 on success, -ERR# otherwise.
3249 */
3250 int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
3251
3252 int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
3253 map<string, bufferlist>& attrs,
3254 map<string, bufferlist>* rmattrs);
3255
3256 int get_system_obj_state(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker);
3257 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
3258 bool follow_olh, bool assume_noent = false);
3259 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state) {
3260 return get_obj_state(rctx, bucket_info, obj, state, true);
3261 }
3262
3263 virtual int stat_system_obj(RGWObjectCtx& obj_ctx,
3264 RGWRados::SystemObject::Read::GetObjState& state,
3265 rgw_raw_obj& obj,
3266 map<string, bufferlist> *attrs,
3267 ceph::real_time *lastmod,
3268 uint64_t *obj_size,
3269 RGWObjVersionTracker *objv_tracker);
3270
3271 virtual int get_system_obj(RGWObjectCtx& obj_ctx, RGWRados::SystemObject::Read::GetObjState& read_state,
3272 RGWObjVersionTracker *objv_tracker, rgw_raw_obj& obj,
3273 bufferlist& bl, off_t ofs, off_t end,
3274 map<string, bufferlist> *attrs,
3275 rgw_cache_entry_info *cache_info,
3276 boost::optional<obj_version> refresh_version =
3277 boost::none);
3278
3279 virtual void register_chained_cache(RGWChainedCache *cache) {}
3280 virtual bool chain_cache_entry(list<rgw_cache_entry_info *>& cache_info_entries, RGWChainedCache::Entry *chained_entry) { return false; }
3281
3282 int iterate_obj(RGWObjectCtx& ctx,
3283 const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3284 off_t ofs, off_t end,
3285 uint64_t max_chunk_size,
3286 int (*iterate_obj_cb)(const RGWBucketInfo& bucket_info, const rgw_obj& obj, const rgw_raw_obj&, off_t, off_t, off_t, bool, RGWObjState *, void *),
3287 void *arg);
3288
3289 int flush_read_list(struct get_obj_data *d);
3290
3291 int get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate,
3292 const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3293 const rgw_raw_obj& read_obj,
3294 off_t obj_ofs, off_t read_ofs, off_t len,
3295 bool is_head_obj, void *arg);
3296
3297 void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
3298
3299 /**
3300 * a simple object read without keeping state
3301 */
3302
3303 virtual int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
3304 map<string, bufferlist> *attrs, bufferlist *first_chunk,
3305 RGWObjVersionTracker *objv_tracker);
3306
3307 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
3308 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
3309
3310 int guard_reshard(BucketShard *bs, const rgw_obj& obj_instance, std::function<int(BucketShard *)> call);
3311 int block_while_resharding(RGWRados::BucketShard *bs, string *new_bucket_id);
3312
3313 void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
3314 int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
3315 int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
3316 int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
3317 const rgw_obj& obj_instance, bool delete_marker,
3318 const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
3319 uint64_t olh_epoch,
3320 ceph::real_time unmod_since, bool high_precision_time, rgw_zone_set *zones_trace = nullptr);
3321 int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
3322 int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
3323 map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
3324 int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
3325 int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
3326 int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3327 bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
3328 uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
3329 int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
3330 int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
3331 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time, rgw_zone_set *zones_trace = nullptr);
3332 int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
3333 uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
3334
3335 void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
3336 int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
3337 int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
3338 int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
3339
3340 void gen_rand_obj_instance_name(rgw_obj *target);
3341
3342 int omap_get_vals(rgw_raw_obj& obj, bufferlist& header, const std::string& marker, uint64_t count, std::map<string, bufferlist>& m);
3343 int omap_get_all(rgw_raw_obj& obj, bufferlist& header, std::map<string, bufferlist>& m);
3344 int omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl);
3345 int omap_set(rgw_raw_obj& obj, map<std::string, bufferlist>& m);
3346 int omap_del(rgw_raw_obj& obj, const std::string& key);
3347 int update_containers_stats(map<string, RGWBucketEnt>& m);
3348 int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
3349
3350 int watch(const string& oid, uint64_t *watch_handle, librados::WatchCtx2 *ctx);
3351 int unwatch(uint64_t watch_handle);
3352 void add_watcher(int i);
3353 void remove_watcher(int i);
3354 virtual bool need_watch_notify() { return false; }
3355 int init_watch();
3356 void finalize_watch();
3357 int distribute(const string& key, bufferlist& bl);
3358 virtual int watch_cb(uint64_t notify_id,
3359 uint64_t cookie,
3360 uint64_t notifier_id,
3361 bufferlist& bl) { return 0; }
3362 void pick_control_oid(const string& key, string& notify_oid);
3363
3364 virtual void set_cache_enabled(bool state) {}
3365
3366 void set_atomic(void *ctx, rgw_obj& obj) {
3367 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3368 rctx->obj.set_atomic(obj);
3369 }
3370 void set_prefetch_data(void *ctx, rgw_obj& obj) {
3371 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3372 rctx->obj.set_prefetch_data(obj);
3373 }
3374 void set_prefetch_data(void *ctx, rgw_raw_obj& obj) {
3375 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3376 rctx->raw.set_prefetch_data(obj);
3377 }
3378
3379 int decode_policy(bufferlist& bl, ACLOwner *owner);
3380 int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
3381 map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool* syncstopped = NULL);
3382 int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
3383 int get_user_stats(const rgw_user& user, RGWStorageStats& stats);
3384 int get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *cb);
3385 void get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj);
3386 void get_bucket_meta_oid(const rgw_bucket& bucket, string& oid);
3387
3388 int put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
3389 bool exclusive, RGWObjVersionTracker& objv_tracker, ceph::real_time mtime,
3390 map<string, bufferlist> *pattrs);
3391 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
3392 int get_bucket_entrypoint_info(RGWObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name,
3393 RGWBucketEntryPoint& entry_point, RGWObjVersionTracker *objv_tracker,
3394 ceph::real_time *pmtime, map<string, bufferlist> *pattrs, rgw_cache_entry_info *cache_info = NULL,
3395 boost::optional<obj_version> refresh_version = boost::none);
3396 int get_bucket_instance_info(RGWObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
3397 int get_bucket_instance_info(RGWObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
3398 int get_bucket_instance_from_oid(RGWObjectCtx& obj_ctx, const string& oid, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs,
3399 rgw_cache_entry_info *cache_info = NULL,
3400 boost::optional<obj_version> refresh_version = boost::none);
3401
3402 int convert_old_bucket_info(RGWObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name);
3403 static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
3404
3405
3406 private:
3407 int _get_bucket_info(RGWObjectCtx& obj_ctx, const string& tenant,
3408 const string& bucket_name, RGWBucketInfo& info,
3409 real_time *pmtime,
3410 map<string, bufferlist> *pattrs,
3411 boost::optional<obj_version> refresh_version);
3412 public:
3413
3414
3415 int get_bucket_info(RGWObjectCtx& obj_ctx,
3416 const string& tenant_name, const string& bucket_name,
3417 RGWBucketInfo& info,
3418 ceph::real_time *pmtime, map<string, bufferlist> *pattrs = NULL);
3419
3420 // Returns true on successful refresh. Returns false if there was an
3421 // error or the version stored on the OSD is the same as that
3422 // presented in the BucketInfo structure.
3423 //
3424 int try_refresh_bucket_info(RGWBucketInfo& info,
3425 ceph::real_time *pmtime,
3426 map<string, bufferlist> *pattrs = nullptr);
3427
3428 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
3429 map<string, bufferlist> *pattrs, bool create_entry_point);
3430
3431 int cls_rgw_init_index(librados::IoCtx& io_ctx, librados::ObjectWriteOperation& op, string& oid);
3432 int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3433 int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
3434 rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3435 int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
3436 RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3437 int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
3438 ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3439 int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3440 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
3441 int cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_index_key& start, const string& prefix,
3442 uint32_t num_entries, bool list_versions, map<string, rgw_bucket_dir_entry>& m,
3443 bool *is_truncated, rgw_obj_index_key *last_entry,
3444 bool (*force_check_filter)(const string& name) = NULL);
3445 int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, map<string, struct rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
3446 int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
3447 int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
3448 int trim_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, string& end_marker);
3449 int resync_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
3450 int stop_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
3451 int get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id, map<int, string>& max_marker);
3452
3453 int bi_get_instance(const RGWBucketInfo& bucket_info, rgw_obj& obj, rgw_bucket_dir_entry *dirent);
3454 int bi_get(rgw_bucket& bucket, rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
3455 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
3456 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
3457 int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
3458 int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3459 int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3460 int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
3461 list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3462 int bi_remove(BucketShard& bs);
3463
3464 int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
3465 int cls_obj_usage_log_read(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
3466 string& read_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage, bool *is_truncated);
3467 int cls_obj_usage_log_trim(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch);
3468
3469 int key_to_shard_id(const string& key, int max_shards);
3470 void shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id);
3471 void shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name);
3472 void shard_name(const string& prefix, unsigned shard_id, string& name);
3473 int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
3474 void time_log_prepare_entry(cls_log_entry& entry, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
3475 int time_log_add_init(librados::IoCtx& io_ctx);
3476 int time_log_add(const string& oid, list<cls_log_entry>& entries,
3477 librados::AioCompletion *completion, bool monotonic_inc = true);
3478 int time_log_add(const string& oid, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
3479 int time_log_list(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
3480 int max_entries, list<cls_log_entry>& entries,
3481 const string& marker, string *out_marker, bool *truncated);
3482 int time_log_info(const string& oid, cls_log_header *header);
3483 int time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion);
3484 int time_log_trim(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
3485 const string& from_marker, const string& to_marker,
3486 librados::AioCompletion *completion = nullptr);
3487
3488 string objexp_hint_get_shardname(int shard_num);
3489 int objexp_key_shard(const rgw_obj_index_key& key);
3490 void objexp_get_shard(int shard_num,
3491 string& shard); /* out */
3492 int objexp_hint_add(const ceph::real_time& delete_at,
3493 const string& tenant_name,
3494 const string& bucket_name,
3495 const string& bucket_id,
3496 const rgw_obj_index_key& obj_key);
3497 int objexp_hint_list(const string& oid,
3498 const ceph::real_time& start_time,
3499 const ceph::real_time& end_time,
3500 const int max_entries,
3501 const string& marker,
3502 list<cls_timeindex_entry>& entries, /* out */
3503 string *out_marker, /* out */
3504 bool *truncated); /* out */
3505 int objexp_hint_parse(cls_timeindex_entry &ti_entry,
3506 objexp_hint_entry& hint_entry); /* out */
3507 int objexp_hint_trim(const string& oid,
3508 const ceph::real_time& start_time,
3509 const ceph::real_time& end_time,
3510 const string& from_marker = std::string(),
3511 const string& to_marker = std::string());
3512
3513 int lock_exclusive(rgw_pool& pool, const string& oid, ceph::timespan& duration, string& zone_id, string& owner_id);
3514 int unlock(rgw_pool& pool, const string& oid, string& zone_id, string& owner_id);
3515
3516 void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
3517 int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool sync);
3518 int gc_operate(string& oid, librados::ObjectWriteOperation *op);
3519 int gc_aio_operate(string& oid, librados::ObjectWriteOperation *op);
3520 int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
3521
3522 int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
3523 int process_gc();
3524 int process_expire_objects();
3525 int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
3526
3527 int process_lc();
3528 int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
3529
3530 int bucket_check_index(RGWBucketInfo& bucket_info,
3531 map<RGWObjCategory, RGWStorageStats> *existing_stats,
3532 map<RGWObjCategory, RGWStorageStats> *calculated_stats);
3533 int bucket_rebuild_index(RGWBucketInfo& bucket_info);
3534 int bucket_set_reshard(RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
3535 int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
3536 int move_rados_obj(librados::IoCtx& src_ioctx,
3537 const string& src_oid, const string& src_locator,
3538 librados::IoCtx& dst_ioctx,
3539 const string& dst_oid, const string& dst_locator);
3540 int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
3541 int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix);
3542
3543 int cls_user_get_header(const string& user_id, cls_user_header *header);
3544 int cls_user_get_header_async(const string& user_id, RGWGetUserHeader_CB *ctx);
3545 int cls_user_sync_bucket_stats(rgw_raw_obj& user_obj, const RGWBucketInfo& bucket_info);
3546 int cls_user_list_buckets(rgw_raw_obj& obj,
3547 const string& in_marker,
3548 const string& end_marker,
3549 int max_entries,
3550 list<cls_user_bucket_entry>& entries,
3551 string *out_marker,
3552 bool *truncated);
3553 int cls_user_add_bucket(rgw_raw_obj& obj, const cls_user_bucket_entry& entry);
3554 int cls_user_update_buckets(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add);
3555 int cls_user_complete_stats_sync(rgw_raw_obj& obj);
3556 int complete_sync_user_stats(const rgw_user& user_id);
3557 int cls_user_add_bucket(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries);
3558 int cls_user_remove_bucket(rgw_raw_obj& obj, const cls_user_bucket& bucket);
3559 int cls_user_get_bucket_stats(const rgw_bucket& bucket, cls_user_bucket_entry& entry);
3560
3561 int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
3562 RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size);
3563
3564 int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
3565 RGWQuotaInfo& bucket_quota);
3566
3567 int add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
3568
3569 uint64_t instance_id();
3570 const string& zone_name() {
3571 return get_zone_params().get_name();
3572 }
3573 const string& zone_id() {
3574 return get_zone_params().get_id();
3575 }
3576 string unique_id(uint64_t unique_num) {
3577 char buf[32];
3578 snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)instance_id(), (unsigned long long)unique_num);
3579 string s = get_zone_params().get_id() + buf;
3580 return s;
3581 }
3582
3583 void init_unique_trans_id_deps() {
3584 char buf[16 + 2 + 1]; /* uint64_t needs 16, 2 hyphens add further 2 */
3585
3586 snprintf(buf, sizeof(buf), "-%llx-", (unsigned long long)instance_id());
3587 url_encode(string(buf) + get_zone_params().get_name(), trans_id_suffix);
3588 }
3589
3590 /* In order to preserve compability with Swift API, transaction ID
3591 * should contain at least 32 characters satisfying following spec:
3592 * - first 21 chars must be in range [0-9a-f]. Swift uses this
3593 * space for storing fragment of UUID obtained through a call to
3594 * uuid4() function of Python's uuid module;
3595 * - char no. 22 must be a hyphen;
3596 * - at least 10 next characters constitute hex-formatted timestamp
3597 * padded with zeroes if necessary. All bytes must be in [0-9a-f]
3598 * range;
3599 * - last, optional part of transaction ID is any url-encoded string
3600 * without restriction on length. */
3601 string unique_trans_id(const uint64_t unique_num) {
3602 char buf[41]; /* 2 + 21 + 1 + 16 (timestamp can consume up to 16) + 1 */
3603 time_t timestamp = time(NULL);
3604
3605 snprintf(buf, sizeof(buf), "tx%021llx-%010llx",
3606 (unsigned long long)unique_num,
3607 (unsigned long long)timestamp);
3608
3609 return string(buf) + trans_id_suffix;
3610 }
3611
3612 void get_log_pool(rgw_pool& pool) {
3613 pool = get_zone_params().log_pool;
3614 }
3615
3616 bool need_to_log_data() {
3617 return get_zone().log_data;
3618 }
3619
3620 bool need_to_log_metadata() {
3621 return is_meta_master() &&
3622 (get_zonegroup().zones.size() > 1 || current_period.is_multi_zonegroups_with_zones());
3623 }
3624
3625 bool can_reshard() const {
3626 return current_period.get_id().empty() ||
3627 (zonegroup.zones.size() == 1 && current_period.is_single_zonegroup());
3628 }
3629
3630 librados::Rados* get_rados_handle();
3631
3632 int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
3633 int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
3634 list<librados::AioCompletion *>& handles, bool keep_index_consistent);
3635 private:
3636 /**
3637 * This is a helper method, it generates a list of bucket index objects with the given
3638 * bucket base oid and number of shards.
3639 *
3640 * bucket_oid_base [in] - base name of the bucket index object;
3641 * num_shards [in] - number of bucket index object shards.
3642 * bucket_objs [out] - filled by this method, a list of bucket index objects.
3643 */
3644 void get_bucket_index_objects(const string& bucket_oid_base, uint32_t num_shards,
3645 map<int, string>& bucket_objs, int shard_id = -1);
3646
3647 /**
3648 * Get the bucket index object with the given base bucket index object and object key,
3649 * and the number of bucket index shards.
3650 *
3651 * bucket_oid_base [in] - bucket object base name.
3652 * obj_key [in] - object key.
3653 * num_shards [in] - number of bucket index shards.
3654 * hash_type [in] - type of hash to find the shard ID.
3655 * bucket_obj [out] - the bucket index object for the given object.
3656 *
3657 * Return 0 on success, a failure code otherwise.
3658 */
3659 int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
3660 uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj, int *shard);
3661
3662 void get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards,
3663 int shard_id, string *bucket_obj);
3664
3665 /**
3666 * Check the actual on-disk state of the object specified
3667 * by list_state, and fill in the time and size of object.
3668 * Then append any changes to suggested_updates for
3669 * the rgw class' dir_suggest_changes function.
3670 *
3671 * Note that this can maul list_state; don't use it afterwards. Also
3672 * it expects object to already be filled in from list_state; it only
3673 * sets the size and mtime.
3674 *
3675 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
3676 * and -errno on other failures. (-ENOENT is not a failure, and it
3677 * will encode that info as a suggested update.)
3678 */
3679 int check_disk_state(librados::IoCtx io_ctx,
3680 const RGWBucketInfo& bucket_info,
3681 rgw_bucket_dir_entry& list_state,
3682 rgw_bucket_dir_entry& object,
3683 bufferlist& suggested_updates);
3684
3685 /**
3686 * Init pool iteration
3687 * pool: pool to use for the ctx initialization
3688 * ctx: context object to use for the iteration
3689 * Returns: 0 on success, -ERR# otherwise.
3690 */
3691 int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
3692
3693 /**
3694 * Init pool iteration
3695 * pool: pool to use
3696 * cursor: position to start iteration
3697 * ctx: context object to use for the iteration
3698 * Returns: 0 on success, -ERR# otherwise.
3699 */
3700 int pool_iterate_begin(const rgw_pool& pool, const string& cursor, RGWPoolIterCtx& ctx);
3701
3702 /**
3703 * Get pool iteration position
3704 * ctx: context object to use for the iteration
3705 * Returns: string representation of position
3706 */
3707 string pool_iterate_get_cursor(RGWPoolIterCtx& ctx);
3708
3709 /**
3710 * Iterate over pool return object names, use optional filter
3711 * ctx: iteration context, initialized with pool_iterate_begin()
3712 * num: max number of objects to return
3713 * objs: a vector that the results will append into
3714 * is_truncated: if not NULL, will hold true iff iteration is complete
3715 * filter: if not NULL, will be used to filter returned objects
3716 * Returns: 0 on success, -ERR# otherwise.
3717 */
3718 int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
3719 bool *is_truncated, RGWAccessListFilter *filter);
3720
3721 uint64_t next_bucket_id();
3722 };
3723
3724 class RGWStoreManager {
3725 public:
3726 RGWStoreManager() {}
3727 static RGWRados *get_storage(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread) {
3728 RGWRados *store = init_storage_provider(cct, use_gc_thread, use_lc_thread, quota_threads, run_sync_thread,
3729 run_reshard_thread);
3730 return store;
3731 }
3732 static RGWRados *get_raw_storage(CephContext *cct) {
3733 RGWRados *store = init_raw_storage_provider(cct);
3734 return store;
3735 }
3736 static RGWRados *init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread);
3737 static RGWRados *init_raw_storage_provider(CephContext *cct);
3738 static void close_storage(RGWRados *store);
3739
3740 };
3741
3742 template <class T>
3743 class RGWChainedCacheImpl : public RGWChainedCache {
3744 ceph::timespan expiry;
3745 RWLock lock;
3746
3747 map<string, std::pair<T, ceph::coarse_mono_time>> entries;
3748
3749 public:
3750 RGWChainedCacheImpl() : lock("RGWChainedCacheImpl::lock") {}
3751
3752 void init(RGWRados *store) {
3753 store->register_chained_cache(this);
3754 expiry = std::chrono::seconds(store->ctx()->_conf->get_val<uint64_t>(
3755 "rgw_cache_expiry_interval"));
3756 }
3757
3758 bool find(const string& key, T *entry) {
3759 RWLock::RLocker rl(lock);
3760 auto iter = entries.find(key);
3761 if (iter == entries.end()) {
3762 return false;
3763 }
3764 if (expiry.count() &&
3765 (ceph::coarse_mono_clock::now() - iter->second.second) > expiry) {
3766 return false;
3767 }
3768
3769 *entry = iter->second.first;
3770 return true;
3771 }
3772
3773 bool put(RGWRados *store, const string& key, T *entry, list<rgw_cache_entry_info *>& cache_info_entries) {
3774 Entry chain_entry(this, key, entry);
3775
3776 /* we need the store cache to call us under its lock to maintain lock ordering */
3777 return store->chain_cache_entry(cache_info_entries, &chain_entry);
3778 }
3779
3780 void chain_cb(const string& key, void *data) override {
3781 T *entry = static_cast<T *>(data);
3782 RWLock::WLocker wl(lock);
3783 entries[key].first = *entry;
3784 if (expiry.count() > 0) {
3785 entries[key].second = ceph::coarse_mono_clock::now();
3786 }
3787 }
3788
3789 void invalidate(const string& key) override {
3790 RWLock::WLocker wl(lock);
3791 entries.erase(key);
3792 }
3793
3794 void invalidate_all() override {
3795 RWLock::WLocker wl(lock);
3796 entries.clear();
3797 }
3798 }; /* RGWChainedCacheImpl */
3799
3800 /**
3801 * Base of PUT operation.
3802 * Allow to create chained data transformers like compresors and encryptors.
3803 */
3804 class RGWPutObjDataProcessor
3805 {
3806 public:
3807 RGWPutObjDataProcessor(){}
3808 virtual ~RGWPutObjDataProcessor(){}
3809 virtual int handle_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool *again) = 0;
3810 virtual int throttle_data(void *handle, const rgw_raw_obj& obj, uint64_t size, bool need_to_wait) = 0;
3811 }; /* RGWPutObjDataProcessor */
3812
3813
3814 class RGWPutObjProcessor : public RGWPutObjDataProcessor
3815 {
3816 protected:
3817 RGWRados *store;
3818 RGWObjectCtx& obj_ctx;
3819 bool is_complete;
3820 RGWBucketInfo bucket_info;
3821 bool canceled;
3822
3823 virtual int do_complete(size_t accounted_size, const string& etag,
3824 ceph::real_time *mtime, ceph::real_time set_mtime,
3825 map<string, bufferlist>& attrs, ceph::real_time delete_at,
3826 const char *if_match, const char *if_nomatch, const string *user_data,
3827 rgw_zone_set* zones_trace = nullptr) = 0;
3828
3829 public:
3830 RGWPutObjProcessor(RGWObjectCtx& _obj_ctx, RGWBucketInfo& _bi) : store(NULL),
3831 obj_ctx(_obj_ctx),
3832 is_complete(false),
3833 bucket_info(_bi),
3834 canceled(false) {}
3835 ~RGWPutObjProcessor() override {}
3836 virtual int prepare(RGWRados *_store, string *oid_rand) {
3837 store = _store;
3838 return 0;
3839 }
3840
3841 int complete(size_t accounted_size, const string& etag,
3842 ceph::real_time *mtime, ceph::real_time set_mtime,
3843 map<string, bufferlist>& attrs, ceph::real_time delete_at,
3844 const char *if_match = NULL, const char *if_nomatch = NULL, const string *user_data = nullptr,
3845 rgw_zone_set *zones_trace = nullptr);
3846
3847 CephContext *ctx();
3848
3849 bool is_canceled() { return canceled; }
3850 }; /* RGWPutObjProcessor */
3851
3852 struct put_obj_aio_info {
3853 void *handle;
3854 rgw_raw_obj obj;
3855 uint64_t size;
3856 };
3857
3858 #define RGW_PUT_OBJ_MIN_WINDOW_SIZE_DEFAULT (16 * 1024 * 1024)
3859
3860 class RGWPutObjProcessor_Aio : public RGWPutObjProcessor
3861 {
3862 list<struct put_obj_aio_info> pending;
3863 uint64_t window_size{RGW_PUT_OBJ_MIN_WINDOW_SIZE_DEFAULT};
3864 uint64_t pending_size{0};
3865
3866 struct put_obj_aio_info pop_pending();
3867 int wait_pending_front();
3868 bool pending_has_completed();
3869
3870 rgw_raw_obj last_written_obj;
3871
3872 protected:
3873 uint64_t obj_len{0};
3874
3875 set<rgw_raw_obj> written_objs;
3876 rgw_obj head_obj;
3877
3878 void add_written_obj(const rgw_raw_obj& obj) {
3879 written_objs.insert(obj);
3880 }
3881
3882 int drain_pending();
3883 int handle_obj_data(rgw_raw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle, bool exclusive);
3884
3885 public:
3886 int prepare(RGWRados *store, string *oid_rand) override;
3887 int throttle_data(void *handle, const rgw_raw_obj& obj, uint64_t size, bool need_to_wait) override;
3888
3889 RGWPutObjProcessor_Aio(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info) : RGWPutObjProcessor(obj_ctx, bucket_info) {}
3890 ~RGWPutObjProcessor_Aio() override;
3891 }; /* RGWPutObjProcessor_Aio */
3892
3893 class RGWPutObjProcessor_Atomic : public RGWPutObjProcessor_Aio
3894 {
3895 bufferlist first_chunk;
3896 uint64_t part_size;
3897 off_t cur_part_ofs;
3898 off_t next_part_ofs;
3899 int cur_part_id;
3900 off_t data_ofs;
3901
3902 bufferlist pending_data_bl;
3903 uint64_t max_chunk_size;
3904
3905 bool versioned_object;
3906 uint64_t olh_epoch;
3907 string version_id;
3908
3909 protected:
3910 rgw_bucket bucket;
3911 string obj_str;
3912
3913 string unique_tag;
3914
3915 rgw_raw_obj cur_obj;
3916 RGWObjManifest manifest;
3917 RGWObjManifest::generator manifest_gen;
3918
3919 int write_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool exclusive);
3920 int do_complete(size_t accounted_size, const string& etag,
3921 ceph::real_time *mtime, ceph::real_time set_mtime,
3922 map<string, bufferlist>& attrs, ceph::real_time delete_at,
3923 const char *if_match, const char *if_nomatch, const string *user_data, rgw_zone_set *zones_trace) override;
3924
3925 int prepare_next_part(off_t ofs);
3926 int complete_parts();
3927 int complete_writing_data();
3928
3929 int prepare_init(RGWRados *store, string *oid_rand);
3930
3931 public:
3932 ~RGWPutObjProcessor_Atomic() override {}
3933 RGWPutObjProcessor_Atomic(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info,
3934 rgw_bucket& _b, const string& _o, uint64_t _p, const string& _t, bool versioned) :
3935 RGWPutObjProcessor_Aio(obj_ctx, bucket_info),
3936 part_size(_p),
3937 cur_part_ofs(0),
3938 next_part_ofs(_p),
3939 cur_part_id(0),
3940 data_ofs(0),
3941 max_chunk_size(0),
3942 versioned_object(versioned),
3943 olh_epoch(0),
3944 bucket(_b),
3945 obj_str(_o),
3946 unique_tag(_t) {}
3947 int prepare(RGWRados *store, string *oid_rand) override;
3948 virtual bool immutable_head() { return false; }
3949 int handle_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool *again) override;
3950
3951 void set_olh_epoch(uint64_t epoch) {
3952 olh_epoch = epoch;
3953 }
3954
3955 void set_version_id(const string& vid) {
3956 version_id = vid;
3957 }
3958 }; /* RGWPutObjProcessor_Atomic */
3959
3960 #define MP_META_SUFFIX ".meta"
3961
3962 class RGWMPObj {
3963 string oid;
3964 string prefix;
3965 string meta;
3966 string upload_id;
3967 public:
3968 RGWMPObj() {}
3969 RGWMPObj(const string& _oid, const string& _upload_id) {
3970 init(_oid, _upload_id, _upload_id);
3971 }
3972 void init(const string& _oid, const string& _upload_id) {
3973 init(_oid, _upload_id, _upload_id);
3974 }
3975 void init(const string& _oid, const string& _upload_id, const string& part_unique_str) {
3976 if (_oid.empty()) {
3977 clear();
3978 return;
3979 }
3980 oid = _oid;
3981 upload_id = _upload_id;
3982 prefix = oid + ".";
3983 meta = prefix + upload_id + MP_META_SUFFIX;
3984 prefix.append(part_unique_str);
3985 }
3986 string& get_meta() { return meta; }
3987 string get_part(int num) {
3988 char buf[16];
3989 snprintf(buf, 16, ".%d", num);
3990 string s = prefix;
3991 s.append(buf);
3992 return s;
3993 }
3994 string get_part(string& part) {
3995 string s = prefix;
3996 s.append(".");
3997 s.append(part);
3998 return s;
3999 }
4000 string& get_upload_id() {
4001 return upload_id;
4002 }
4003 string& get_key() {
4004 return oid;
4005 }
4006 bool from_meta(string& meta) {
4007 int end_pos = meta.rfind('.'); // search for ".meta"
4008 if (end_pos < 0)
4009 return false;
4010 int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id>
4011 if (mid_pos < 0)
4012 return false;
4013 oid = meta.substr(0, mid_pos);
4014 upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
4015 init(oid, upload_id, upload_id);
4016 return true;
4017 }
4018 void clear() {
4019 oid = "";
4020 prefix = "";
4021 meta = "";
4022 upload_id = "";
4023 }
4024 };
4025
4026 class RGWPutObjProcessor_Multipart : public RGWPutObjProcessor_Atomic
4027 {
4028 string part_num;
4029 RGWMPObj mp;
4030 req_state *s;
4031 string upload_id;
4032
4033 protected:
4034 int prepare(RGWRados *store, string *oid_rand);
4035 int do_complete(size_t accounted_size, const string& etag,
4036 ceph::real_time *mtime, ceph::real_time set_mtime,
4037 map<string, bufferlist>& attrs, ceph::real_time delete_at,
4038 const char *if_match, const char *if_nomatch, const string *user_data,
4039 rgw_zone_set *zones_trace) override;
4040 public:
4041 bool immutable_head() { return true; }
4042 RGWPutObjProcessor_Multipart(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, uint64_t _p, req_state *_s) :
4043 RGWPutObjProcessor_Atomic(obj_ctx, bucket_info, _s->bucket, _s->object.name, _p, _s->req_id, false), s(_s) {}
4044 void get_mp(RGWMPObj** _mp);
4045 }; /* RGWPutObjProcessor_Multipart */
4046 #endif