]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_rados.h
update sources to v12.1.1
[ceph.git] / ceph / src / rgw / rgw_rados.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef CEPH_RGWRADOS_H
5 #define CEPH_RGWRADOS_H
6
7 #include <functional>
8
9 #include "include/rados/librados.hpp"
10 #include "include/Context.h"
11 #include "common/RefCountedObj.h"
12 #include "common/RWLock.h"
13 #include "common/ceph_time.h"
14 #include "common/lru_map.h"
15 #include "rgw_common.h"
16 #include "cls/rgw/cls_rgw_types.h"
17 #include "cls/version/cls_version_types.h"
18 #include "cls/log/cls_log_types.h"
19 #include "cls/statelog/cls_statelog_types.h"
20 #include "cls/timeindex/cls_timeindex_types.h"
21 #include "rgw_log.h"
22 #include "rgw_metadata.h"
23 #include "rgw_meta_sync_status.h"
24 #include "rgw_period_puller.h"
25 #include "rgw_sync_module.h"
26
27 class RGWWatcher;
28 class SafeTimer;
29 class ACLOwner;
30 class RGWGC;
31 class RGWMetaNotifier;
32 class RGWDataNotifier;
33 class RGWLC;
34 class RGWObjectExpirer;
35 class RGWMetaSyncProcessorThread;
36 class RGWDataSyncProcessorThread;
37 class RGWSyncLogTrimThread;
38 class RGWRESTConn;
39 struct RGWZoneGroup;
40 struct RGWZoneParams;
41 class RGWReshard;
42 class RGWReshardWait;
43
44 /* flags for put_obj_meta() */
45 #define PUT_OBJ_CREATE 0x01
46 #define PUT_OBJ_EXCL 0x02
47 #define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
48
49 #define RGW_OBJ_NS_MULTIPART "multipart"
50 #define RGW_OBJ_NS_SHADOW "shadow"
51
52 #define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
53
54 #define RGW_NO_SHARD -1
55
56 #define RGW_SHARDS_PRIME_0 7877
57 #define RGW_SHARDS_PRIME_1 65521
58
59 static inline int rgw_shards_mod(unsigned hval, int max_shards)
60 {
61 if (max_shards <= RGW_SHARDS_PRIME_0) {
62 return hval % RGW_SHARDS_PRIME_0 % max_shards;
63 }
64 return hval % RGW_SHARDS_PRIME_1 % max_shards;
65 }
66
67 static inline int rgw_shards_hash(const string& key, int max_shards)
68 {
69 return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()), max_shards);
70 }
71
72 static inline int rgw_shards_max()
73 {
74 return RGW_SHARDS_PRIME_1;
75 }
76
77 static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
78 {
79 if (bucket.marker.empty() || orig_oid.empty()) {
80 oid = orig_oid;
81 } else {
82 oid = bucket.marker;
83 oid.append("_");
84 oid.append(orig_oid);
85 }
86 }
87
88 static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
89 {
90 const rgw_bucket& bucket = obj.bucket;
91 prepend_bucket_marker(bucket, obj.get_oid(), oid);
92 const string& loc = obj.key.get_loc();
93 if (!loc.empty()) {
94 prepend_bucket_marker(bucket, loc, locator);
95 } else {
96 locator.clear();
97 }
98 }
99
100 int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool, librados::IoCtx& ioctx, bool create = false);
101
102 int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
103
104 static inline bool rgw_raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj)
105 {
106 ssize_t pos = raw_obj.oid.find('_');
107 if (pos < 0) {
108 return false;
109 }
110
111 if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) {
112 return false;
113 }
114 obj->bucket = bucket;
115
116 return true;
117 }
118
119 struct rgw_bucket_placement {
120 string placement_rule;
121 rgw_bucket bucket;
122
123 void dump(Formatter *f) const;
124 };
125
126 class rgw_obj_select {
127 string placement_rule;
128 rgw_obj obj;
129 rgw_raw_obj raw_obj;
130 bool is_raw;
131
132 public:
133 rgw_obj_select() : is_raw(false) {}
134 rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {}
135 rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {}
136 rgw_obj_select(const rgw_obj_select& rhs) {
137 is_raw = rhs.is_raw;
138 if (is_raw) {
139 raw_obj = rhs.raw_obj;
140 } else {
141 obj = rhs.obj;
142 }
143 }
144
145 rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const;
146 rgw_raw_obj get_raw_obj(RGWRados *store) const;
147
148 rgw_obj_select& operator=(const rgw_obj& rhs) {
149 obj = rhs;
150 is_raw = false;
151 return *this;
152 }
153
154 rgw_obj_select& operator=(const rgw_raw_obj& rhs) {
155 raw_obj = rhs;
156 is_raw = true;
157 return *this;
158 }
159
160 void set_placement_rule(const string& rule) {
161 placement_rule = rule;
162 }
163 };
164
165 struct compression_block {
166 uint64_t old_ofs;
167 uint64_t new_ofs;
168 uint64_t len;
169
170 void encode(bufferlist& bl) const {
171 ENCODE_START(1, 1, bl);
172 ::encode(old_ofs, bl);
173 ::encode(new_ofs, bl);
174 ::encode(len, bl);
175 ENCODE_FINISH(bl);
176 }
177
178 void decode(bufferlist::iterator& bl) {
179 DECODE_START(1, bl);
180 ::decode(old_ofs, bl);
181 ::decode(new_ofs, bl);
182 ::decode(len, bl);
183 DECODE_FINISH(bl);
184 }
185 };
186 WRITE_CLASS_ENCODER(compression_block)
187
188 struct RGWCompressionInfo {
189 string compression_type;
190 uint64_t orig_size;
191 vector<compression_block> blocks;
192
193 RGWCompressionInfo() : compression_type("none"), orig_size(0) {}
194 RGWCompressionInfo(const RGWCompressionInfo& cs_info) : compression_type(cs_info.compression_type),
195 orig_size(cs_info.orig_size),
196 blocks(cs_info.blocks) {}
197
198 void encode(bufferlist& bl) const {
199 ENCODE_START(1, 1, bl);
200 ::encode(compression_type, bl);
201 ::encode(orig_size, bl);
202 ::encode(blocks, bl);
203 ENCODE_FINISH(bl);
204 }
205
206 void decode(bufferlist::iterator& bl) {
207 DECODE_START(1, bl);
208 ::decode(compression_type, bl);
209 ::decode(orig_size, bl);
210 ::decode(blocks, bl);
211 DECODE_FINISH(bl);
212 }
213 };
214 WRITE_CLASS_ENCODER(RGWCompressionInfo)
215
216 int rgw_compression_info_from_attrset(map<string, bufferlist>& attrs, bool& need_decompress, RGWCompressionInfo& cs_info);
217
218 struct RGWOLHInfo {
219 rgw_obj target;
220 bool removed;
221
222 RGWOLHInfo() : removed(false) {}
223
224 void encode(bufferlist& bl) const {
225 ENCODE_START(1, 1, bl);
226 ::encode(target, bl);
227 ::encode(removed, bl);
228 ENCODE_FINISH(bl);
229 }
230
231 void decode(bufferlist::iterator& bl) {
232 DECODE_START(1, bl);
233 ::decode(target, bl);
234 ::decode(removed, bl);
235 DECODE_FINISH(bl);
236 }
237 static void generate_test_instances(list<RGWOLHInfo*>& o);
238 void dump(Formatter *f) const;
239 };
240 WRITE_CLASS_ENCODER(RGWOLHInfo)
241
242 struct RGWOLHPendingInfo {
243 ceph::real_time time;
244
245 RGWOLHPendingInfo() {}
246
247 void encode(bufferlist& bl) const {
248 ENCODE_START(1, 1, bl);
249 ::encode(time, bl);
250 ENCODE_FINISH(bl);
251 }
252
253 void decode(bufferlist::iterator& bl) {
254 DECODE_START(1, bl);
255 ::decode(time, bl);
256 DECODE_FINISH(bl);
257 }
258
259 void dump(Formatter *f) const;
260 };
261 WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
262
263 struct RGWUsageBatch {
264 map<ceph::real_time, rgw_usage_log_entry> m;
265
266 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
267 bool exists = m.find(t) != m.end();
268 *account = !exists;
269 m[t].aggregate(entry);
270 }
271 };
272
273 struct RGWUsageIter {
274 string read_iter;
275 uint32_t index;
276
277 RGWUsageIter() : index(0) {}
278 };
279
280 class RGWGetDataCB {
281 protected:
282 uint64_t extra_data_len;
283 public:
284 virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
285 RGWGetDataCB() : extra_data_len(0) {}
286 virtual ~RGWGetDataCB() {}
287 virtual void set_extra_data_len(uint64_t len) {
288 extra_data_len = len;
289 }
290 /**
291 * Flushes any cached data. Used by RGWGetObjFilter.
292 * Return logic same as handle_data.
293 */
294 virtual int flush() {
295 return 0;
296 }
297 /**
298 * Allows to extend fetch range of RGW object. Used by RGWGetObjFilter.
299 */
300 virtual int fixup_range(off_t& bl_ofs, off_t& bl_end) {
301 return 0;
302 }
303 };
304
305 class RGWAccessListFilter {
306 public:
307 virtual ~RGWAccessListFilter() {}
308 virtual bool filter(string& name, string& key) = 0;
309 };
310
311 struct RGWCloneRangeInfo {
312 rgw_obj src;
313 off_t src_ofs;
314 off_t dst_ofs;
315 uint64_t len;
316 };
317
318 struct RGWObjManifestPart {
319 rgw_obj loc; /* the object where the data is located */
320 uint64_t loc_ofs; /* the offset at that object where the data is located */
321 uint64_t size; /* the part size */
322
323 RGWObjManifestPart() : loc_ofs(0), size(0) {}
324
325 void encode(bufferlist& bl) const {
326 ENCODE_START(2, 2, bl);
327 ::encode(loc, bl);
328 ::encode(loc_ofs, bl);
329 ::encode(size, bl);
330 ENCODE_FINISH(bl);
331 }
332
333 void decode(bufferlist::iterator& bl) {
334 DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
335 ::decode(loc, bl);
336 ::decode(loc_ofs, bl);
337 ::decode(size, bl);
338 DECODE_FINISH(bl);
339 }
340
341 void dump(Formatter *f) const;
342 static void generate_test_instances(list<RGWObjManifestPart*>& o);
343 };
344 WRITE_CLASS_ENCODER(RGWObjManifestPart)
345
346 /*
347 The manifest defines a set of rules for structuring the object parts.
348 There are a few terms to note:
349 - head: the head part of the object, which is the part that contains
350 the first chunk of data. An object might not have a head (as in the
351 case of multipart-part objects).
352 - stripe: data portion of a single rgw object that resides on a single
353 rados object.
354 - part: a collection of stripes that make a contiguous part of an
355 object. A regular object will only have one part (although might have
356 many stripes), a multipart object might have many parts. Each part
357 has a fixed stripe size, although the last stripe of a part might
358 be smaller than that. Consecutive parts may be merged if their stripe
359 value is the same.
360 */
361
362 struct RGWObjManifestRule {
363 uint32_t start_part_num;
364 uint64_t start_ofs;
365 uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */
366 uint64_t stripe_max_size; /* underlying obj max size */
367 string override_prefix;
368
369 RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
370 RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) :
371 start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {}
372
373 void encode(bufferlist& bl) const {
374 ENCODE_START(2, 1, bl);
375 ::encode(start_part_num, bl);
376 ::encode(start_ofs, bl);
377 ::encode(part_size, bl);
378 ::encode(stripe_max_size, bl);
379 ::encode(override_prefix, bl);
380 ENCODE_FINISH(bl);
381 }
382
383 void decode(bufferlist::iterator& bl) {
384 DECODE_START(2, bl);
385 ::decode(start_part_num, bl);
386 ::decode(start_ofs, bl);
387 ::decode(part_size, bl);
388 ::decode(stripe_max_size, bl);
389 if (struct_v >= 2)
390 ::decode(override_prefix, bl);
391 DECODE_FINISH(bl);
392 }
393 void dump(Formatter *f) const;
394 };
395 WRITE_CLASS_ENCODER(RGWObjManifestRule)
396
397 class RGWObjManifest {
398 protected:
399 bool explicit_objs; /* old manifest? */
400 map<uint64_t, RGWObjManifestPart> objs;
401
402 uint64_t obj_size;
403
404 rgw_obj obj;
405 uint64_t head_size;
406 string head_placement_rule;
407
408 uint64_t max_head_size;
409 string prefix;
410 rgw_bucket_placement tail_placement; /* might be different than the original bucket,
411 as object might have been copied across pools */
412 map<uint64_t, RGWObjManifestRule> rules;
413
414 string tail_instance; /* tail object's instance */
415
416 void convert_to_explicit(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
417 int append_explicit(RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
418 void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter, string *override_prefix);
419
420 void update_iterators() {
421 begin_iter.seek(0);
422 end_iter.seek(obj_size);
423 }
424 public:
425
426 RGWObjManifest() : explicit_objs(false), obj_size(0), head_size(0), max_head_size(0),
427 begin_iter(this), end_iter(this) {}
428 RGWObjManifest(const RGWObjManifest& rhs) {
429 *this = rhs;
430 }
431 RGWObjManifest& operator=(const RGWObjManifest& rhs) {
432 explicit_objs = rhs.explicit_objs;
433 objs = rhs.objs;
434 obj_size = rhs.obj_size;
435 obj = rhs.obj;
436 head_size = rhs.head_size;
437 max_head_size = rhs.max_head_size;
438 prefix = rhs.prefix;
439 tail_placement = rhs.tail_placement;
440 rules = rhs.rules;
441 tail_instance = rhs.tail_instance;
442
443 begin_iter.set_manifest(this);
444 end_iter.set_manifest(this);
445
446 begin_iter.seek(rhs.begin_iter.get_ofs());
447 end_iter.seek(rhs.end_iter.get_ofs());
448
449 return *this;
450 }
451
452 map<uint64_t, RGWObjManifestPart>& get_explicit_objs() {
453 return objs;
454 }
455
456
457 void set_explicit(uint64_t _size, map<uint64_t, RGWObjManifestPart>& _objs) {
458 explicit_objs = true;
459 obj_size = _size;
460 objs.swap(_objs);
461 }
462
463 void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj_select *location);
464
465 void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) {
466 RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size);
467 rules[0] = rule;
468 max_head_size = tail_ofs;
469 }
470
471 void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) {
472 RGWObjManifestRule rule(0, 0, 0, stripe_max_size);
473 rule.start_part_num = part_num;
474 rules[0] = rule;
475 max_head_size = 0;
476 }
477
478 void encode(bufferlist& bl) const {
479 ENCODE_START(7, 6, bl);
480 ::encode(obj_size, bl);
481 ::encode(objs, bl);
482 ::encode(explicit_objs, bl);
483 ::encode(obj, bl);
484 ::encode(head_size, bl);
485 ::encode(max_head_size, bl);
486 ::encode(prefix, bl);
487 ::encode(rules, bl);
488 bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket);
489 ::encode(encode_tail_bucket, bl);
490 if (encode_tail_bucket) {
491 ::encode(tail_placement.bucket, bl);
492 }
493 bool encode_tail_instance = (tail_instance != obj.key.instance);
494 ::encode(encode_tail_instance, bl);
495 if (encode_tail_instance) {
496 ::encode(tail_instance, bl);
497 }
498 ::encode(head_placement_rule, bl);
499 ::encode(tail_placement.placement_rule, bl);
500 ENCODE_FINISH(bl);
501 }
502
503 void decode(bufferlist::iterator& bl) {
504 DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl);
505 ::decode(obj_size, bl);
506 ::decode(objs, bl);
507 if (struct_v >= 3) {
508 ::decode(explicit_objs, bl);
509 ::decode(obj, bl);
510 ::decode(head_size, bl);
511 ::decode(max_head_size, bl);
512 ::decode(prefix, bl);
513 ::decode(rules, bl);
514 } else {
515 explicit_objs = true;
516 if (!objs.empty()) {
517 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
518 obj = iter->second.loc;
519 head_size = iter->second.size;
520 max_head_size = head_size;
521 }
522 }
523
524 if (explicit_objs && head_size > 0 && !objs.empty()) {
525 /* patch up manifest due to issue 16435:
526 * the first object in the explicit objs list might not be the one we need to access, use the
527 * head object instead if set. This would happen if we had an old object that was created
528 * when the explicit objs manifest was around, and it got copied.
529 */
530 rgw_obj& obj_0 = objs[0].loc;
531 if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) {
532 objs[0].loc = obj;
533 objs[0].size = head_size;
534 }
535 }
536
537 if (struct_v >= 4) {
538 if (struct_v < 6) {
539 ::decode(tail_placement.bucket, bl);
540 } else {
541 bool need_to_decode;
542 ::decode(need_to_decode, bl);
543 if (need_to_decode) {
544 ::decode(tail_placement.bucket, bl);
545 } else {
546 tail_placement.bucket = obj.bucket;
547 }
548 }
549 }
550
551 if (struct_v >= 5) {
552 if (struct_v < 6) {
553 ::decode(tail_instance, bl);
554 } else {
555 bool need_to_decode;
556 ::decode(need_to_decode, bl);
557 if (need_to_decode) {
558 ::decode(tail_instance, bl);
559 } else {
560 tail_instance = obj.key.instance;
561 }
562 }
563 } else { // old object created before 'tail_instance' field added to manifest
564 tail_instance = obj.key.instance;
565 }
566
567 if (struct_v >= 7) {
568 ::decode(head_placement_rule, bl);
569 ::decode(tail_placement.placement_rule, bl);
570 }
571
572 update_iterators();
573 DECODE_FINISH(bl);
574 }
575
576 void dump(Formatter *f) const;
577 static void generate_test_instances(list<RGWObjManifest*>& o);
578
579 int append(RGWObjManifest& m, RGWZoneGroup& zonegroup, RGWZoneParams& zone_params);
580 int append(RGWObjManifest& m, RGWRados *store);
581
582 bool get_rule(uint64_t ofs, RGWObjManifestRule *rule);
583
584 bool empty() {
585 if (explicit_objs)
586 return objs.empty();
587 return rules.empty();
588 }
589
590 bool has_explicit_objs() {
591 return explicit_objs;
592 }
593
594 bool has_tail() {
595 if (explicit_objs) {
596 if (objs.size() == 1) {
597 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
598 rgw_obj& o = iter->second.loc;
599 return !(obj == o);
600 }
601 return (objs.size() >= 2);
602 }
603 return (obj_size > head_size);
604 }
605
606 void set_head(const string& placement_rule, const rgw_obj& _o, uint64_t _s) {
607 head_placement_rule = placement_rule;
608 obj = _o;
609 head_size = _s;
610
611 if (explicit_objs && head_size > 0) {
612 objs[0].loc = obj;
613 objs[0].size = head_size;
614 }
615 }
616
617 const rgw_obj& get_obj() {
618 return obj;
619 }
620
621 void set_tail_placement(const string& placement_rule, const rgw_bucket& _b) {
622 tail_placement.placement_rule = placement_rule;
623 tail_placement.bucket = _b;
624 }
625
626 const rgw_bucket_placement& get_tail_placement() {
627 return tail_placement;
628 }
629
630 const string& get_head_placement_rule() {
631 return head_placement_rule;
632 }
633
634 void set_prefix(const string& _p) {
635 prefix = _p;
636 }
637
638 const string& get_prefix() {
639 return prefix;
640 }
641
642 void set_tail_instance(const string& _ti) {
643 tail_instance = _ti;
644 }
645
646 const string& get_tail_instance() {
647 return tail_instance;
648 }
649
650 void set_head_size(uint64_t _s) {
651 head_size = _s;
652 }
653
654 void set_obj_size(uint64_t s) {
655 obj_size = s;
656
657 update_iterators();
658 }
659
660 uint64_t get_obj_size() {
661 return obj_size;
662 }
663
664 uint64_t get_head_size() {
665 return head_size;
666 }
667
668 void set_max_head_size(uint64_t s) {
669 max_head_size = s;
670 }
671
672 uint64_t get_max_head_size() {
673 return max_head_size;
674 }
675
676 class obj_iterator {
677 RGWObjManifest *manifest;
678 uint64_t part_ofs; /* where current part starts */
679 uint64_t stripe_ofs; /* where current stripe starts */
680 uint64_t ofs; /* current position within the object */
681 uint64_t stripe_size; /* current part size */
682
683 int cur_part_id;
684 int cur_stripe;
685 string cur_override_prefix;
686
687 rgw_obj_select location;
688
689 map<uint64_t, RGWObjManifestRule>::iterator rule_iter;
690 map<uint64_t, RGWObjManifestRule>::iterator next_rule_iter;
691
692 map<uint64_t, RGWObjManifestPart>::iterator explicit_iter;
693
694 void init() {
695 part_ofs = 0;
696 stripe_ofs = 0;
697 ofs = 0;
698 stripe_size = 0;
699 cur_part_id = 0;
700 cur_stripe = 0;
701 }
702
703 void update_explicit_pos();
704
705
706 protected:
707
708 void set_manifest(RGWObjManifest *m) {
709 manifest = m;
710 }
711
712 public:
713 obj_iterator() : manifest(NULL) {
714 init();
715 }
716 explicit obj_iterator(RGWObjManifest *_m) : manifest(_m) {
717 init();
718 if (!manifest->empty()) {
719 seek(0);
720 }
721 }
722 obj_iterator(RGWObjManifest *_m, uint64_t _ofs) : manifest(_m) {
723 init();
724 if (!manifest->empty()) {
725 seek(_ofs);
726 }
727 }
728 void seek(uint64_t ofs);
729
730 void operator++();
731 bool operator==(const obj_iterator& rhs) {
732 return (ofs == rhs.ofs);
733 }
734 bool operator!=(const obj_iterator& rhs) {
735 return (ofs != rhs.ofs);
736 }
737 const rgw_obj_select& get_location() {
738 return location;
739 }
740
741 /* start of current stripe */
742 uint64_t get_stripe_ofs() {
743 if (manifest->explicit_objs) {
744 return explicit_iter->first;
745 }
746 return stripe_ofs;
747 }
748
749 /* current ofs relative to start of rgw object */
750 uint64_t get_ofs() const {
751 return ofs;
752 }
753
754 /* stripe number */
755 int get_cur_stripe() const {
756 return cur_stripe;
757 }
758
759 /* current stripe size */
760 uint64_t get_stripe_size() {
761 if (manifest->explicit_objs) {
762 return explicit_iter->second.size;
763 }
764 return stripe_size;
765 }
766
767 /* offset where data starts within current stripe */
768 uint64_t location_ofs() {
769 if (manifest->explicit_objs) {
770 return explicit_iter->second.loc_ofs;
771 }
772 return 0; /* all stripes start at zero offset */
773 }
774
775 void update_location();
776
777 friend class RGWObjManifest;
778 };
779
780 const obj_iterator& obj_begin();
781 const obj_iterator& obj_end();
782 obj_iterator obj_find(uint64_t ofs);
783
784 obj_iterator begin_iter;
785 obj_iterator end_iter;
786
787 /*
788 * simple object generator. Using a simple single rule manifest.
789 */
790 class generator {
791 RGWObjManifest *manifest;
792 uint64_t last_ofs;
793 uint64_t cur_part_ofs;
794 int cur_part_id;
795 int cur_stripe;
796 uint64_t cur_stripe_size;
797 string cur_oid;
798
799 string oid_prefix;
800
801 rgw_obj_select cur_obj;
802
803 RGWObjManifestRule rule;
804
805 public:
806 generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0),
807 cur_stripe(0), cur_stripe_size(0) {}
808 int create_begin(CephContext *cct, RGWObjManifest *manifest, const string& placement_rule, rgw_bucket& bucket, rgw_obj& obj);
809
810 int create_next(uint64_t ofs);
811
812 rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); }
813 rgw_raw_obj get_cur_obj(RGWRados *store) { return cur_obj.get_raw_obj(store); }
814
815 /* total max size of current stripe (including head obj) */
816 uint64_t cur_stripe_max_size() {
817 return cur_stripe_size;
818 }
819 };
820 };
821 WRITE_CLASS_ENCODER(RGWObjManifest)
822
823 struct RGWUploadPartInfo {
824 uint32_t num;
825 uint64_t size;
826 uint64_t accounted_size{0};
827 string etag;
828 ceph::real_time modified;
829 RGWObjManifest manifest;
830 RGWCompressionInfo cs_info;
831
832 RGWUploadPartInfo() : num(0), size(0) {}
833
834 void encode(bufferlist& bl) const {
835 ENCODE_START(4, 2, bl);
836 ::encode(num, bl);
837 ::encode(size, bl);
838 ::encode(etag, bl);
839 ::encode(modified, bl);
840 ::encode(manifest, bl);
841 ::encode(cs_info, bl);
842 ::encode(accounted_size, bl);
843 ENCODE_FINISH(bl);
844 }
845 void decode(bufferlist::iterator& bl) {
846 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
847 ::decode(num, bl);
848 ::decode(size, bl);
849 ::decode(etag, bl);
850 ::decode(modified, bl);
851 if (struct_v >= 3)
852 ::decode(manifest, bl);
853 if (struct_v >= 4) {
854 ::decode(cs_info, bl);
855 ::decode(accounted_size, bl);
856 } else {
857 accounted_size = size;
858 }
859 DECODE_FINISH(bl);
860 }
861 void dump(Formatter *f) const;
862 static void generate_test_instances(list<RGWUploadPartInfo*>& o);
863 };
864 WRITE_CLASS_ENCODER(RGWUploadPartInfo)
865
866 struct RGWObjState {
867 rgw_obj obj;
868 bool is_atomic;
869 bool has_attrs;
870 bool exists;
871 uint64_t size; //< size of raw object
872 uint64_t accounted_size{0}; //< size before compression, encryption
873 ceph::real_time mtime;
874 uint64_t epoch;
875 bufferlist obj_tag;
876 string write_tag;
877 bool fake_tag;
878 RGWObjManifest manifest;
879 bool has_manifest;
880 string shadow_obj;
881 bool has_data;
882 bufferlist data;
883 bool prefetch_data;
884 bool keep_tail;
885 bool is_olh;
886 bufferlist olh_tag;
887 uint64_t pg_ver;
888 uint32_t zone_short_id;
889
890 /* important! don't forget to update copy constructor */
891
892 RGWObjVersionTracker objv_tracker;
893
894 map<string, bufferlist> attrset;
895 RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
896 size(0), epoch(0), fake_tag(false), has_manifest(false),
897 has_data(false), prefetch_data(false), keep_tail(false), is_olh(false),
898 pg_ver(0), zone_short_id(0) {}
899 RGWObjState(const RGWObjState& rhs) : obj (rhs.obj) {
900 is_atomic = rhs.is_atomic;
901 has_attrs = rhs.has_attrs;
902 exists = rhs.exists;
903 size = rhs.size;
904 accounted_size = rhs.accounted_size;
905 mtime = rhs.mtime;
906 epoch = rhs.epoch;
907 if (rhs.obj_tag.length()) {
908 obj_tag = rhs.obj_tag;
909 }
910 write_tag = rhs.write_tag;
911 fake_tag = rhs.fake_tag;
912 if (rhs.has_manifest) {
913 manifest = rhs.manifest;
914 }
915 has_manifest = rhs.has_manifest;
916 shadow_obj = rhs.shadow_obj;
917 has_data = rhs.has_data;
918 if (rhs.data.length()) {
919 data = rhs.data;
920 }
921 prefetch_data = rhs.prefetch_data;
922 keep_tail = rhs.keep_tail;
923 is_olh = rhs.is_olh;
924 objv_tracker = rhs.objv_tracker;
925 pg_ver = rhs.pg_ver;
926 }
927
928 bool get_attr(string name, bufferlist& dest) {
929 map<string, bufferlist>::iterator iter = attrset.find(name);
930 if (iter != attrset.end()) {
931 dest = iter->second;
932 return true;
933 }
934 return false;
935 }
936 };
937
938 struct RGWRawObjState {
939 rgw_raw_obj obj;
940 bool has_attrs{false};
941 bool exists{false};
942 uint64_t size{0};
943 ceph::real_time mtime;
944 uint64_t epoch;
945 bufferlist obj_tag;
946 bool has_data{false};
947 bufferlist data;
948 bool prefetch_data{false};
949 uint64_t pg_ver{0};
950
951 /* important! don't forget to update copy constructor */
952
953 RGWObjVersionTracker objv_tracker;
954
955 map<string, bufferlist> attrset;
956 RGWRawObjState() {}
957 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
958 has_attrs = rhs.has_attrs;
959 exists = rhs.exists;
960 size = rhs.size;
961 mtime = rhs.mtime;
962 epoch = rhs.epoch;
963 if (rhs.obj_tag.length()) {
964 obj_tag = rhs.obj_tag;
965 }
966 has_data = rhs.has_data;
967 if (rhs.data.length()) {
968 data = rhs.data;
969 }
970 prefetch_data = rhs.prefetch_data;
971 pg_ver = rhs.pg_ver;
972 objv_tracker = rhs.objv_tracker;
973 }
974 };
975
976 struct RGWPoolIterCtx {
977 librados::IoCtx io_ctx;
978 librados::NObjectIterator iter;
979 };
980
981 struct RGWListRawObjsCtx {
982 bool initialized;
983 RGWPoolIterCtx iter_ctx;
984
985 RGWListRawObjsCtx() : initialized(false) {}
986 };
987
988 struct RGWDefaultSystemMetaObjInfo {
989 string default_id;
990
991 void encode(bufferlist& bl) const {
992 ENCODE_START(1, 1, bl);
993 ::encode(default_id, bl);
994 ENCODE_FINISH(bl);
995 }
996
997 void decode(bufferlist::iterator& bl) {
998 DECODE_START(1, bl);
999 ::decode(default_id, bl);
1000 DECODE_FINISH(bl);
1001 }
1002
1003 void dump(Formatter *f) const;
1004 void decode_json(JSONObj *obj);
1005 };
1006 WRITE_CLASS_ENCODER(RGWDefaultSystemMetaObjInfo)
1007
1008 struct RGWNameToId {
1009 string obj_id;
1010
1011 void encode(bufferlist& bl) const {
1012 ENCODE_START(1, 1, bl);
1013 ::encode(obj_id, bl);
1014 ENCODE_FINISH(bl);
1015 }
1016
1017 void decode(bufferlist::iterator& bl) {
1018 DECODE_START(1, bl);
1019 ::decode(obj_id, bl);
1020 DECODE_FINISH(bl);
1021 }
1022
1023 void dump(Formatter *f) const;
1024 void decode_json(JSONObj *obj);
1025 };
1026 WRITE_CLASS_ENCODER(RGWNameToId)
1027
1028 class RGWSystemMetaObj {
1029 protected:
1030 string id;
1031 string name;
1032
1033 CephContext *cct;
1034 RGWRados *store;
1035
1036 int store_name(bool exclusive);
1037 int store_info(bool exclusive);
1038 int read_info(const string& obj_id, bool old_format = false);
1039 int read_id(const string& obj_name, string& obj_id);
1040 int read_default(RGWDefaultSystemMetaObjInfo& default_info,
1041 const string& oid);
1042 /* read and use default id */
1043 int use_default(bool old_format = false);
1044
1045 public:
1046 RGWSystemMetaObj() : cct(NULL), store(NULL) {}
1047 RGWSystemMetaObj(const string& _name): name(_name), cct(NULL), store(NULL) {}
1048 RGWSystemMetaObj(const string& _id, const string& _name) : id(_id), name(_name), cct(NULL), store(NULL) {}
1049 RGWSystemMetaObj(CephContext *_cct, RGWRados *_store): cct(_cct), store(_store){}
1050 RGWSystemMetaObj(const string& _name, CephContext *_cct, RGWRados *_store): name(_name), cct(_cct), store(_store){}
1051 const string& get_name() const { return name; }
1052 const string& get_id() const { return id; }
1053
1054 void set_name(const string& _name) { name = _name;}
1055 void set_id(const string& _id) { id = _id;}
1056 void clear_id() { id.clear(); }
1057
1058 virtual ~RGWSystemMetaObj() {}
1059
1060 virtual void encode(bufferlist& bl) const {
1061 ENCODE_START(1, 1, bl);
1062 ::encode(id, bl);
1063 ::encode(name, bl);
1064 ENCODE_FINISH(bl);
1065 }
1066
1067 virtual void decode(bufferlist::iterator& bl) {
1068 DECODE_START(1, bl);
1069 ::decode(id, bl);
1070 ::decode(name, bl);
1071 DECODE_FINISH(bl);
1072 }
1073
1074 void reinit_instance(CephContext *_cct, RGWRados *_store) {
1075 cct = _cct;
1076 store = _store;
1077 }
1078 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true, bool old_format = false);
1079 virtual int read_default_id(string& default_id, bool old_format = false);
1080 virtual int set_as_default(bool exclusive = false);
1081 int delete_default();
1082 virtual int create(bool exclusive = true);
1083 int delete_obj(bool old_format = false);
1084 int rename(const string& new_name);
1085 int update() { return store_info(false);}
1086 int update_name() { return store_name(false);}
1087 int read();
1088 int write(bool exclusive);
1089
1090 virtual rgw_pool get_pool(CephContext *cct) = 0;
1091 virtual const string get_default_oid(bool old_format = false) = 0;
1092 virtual const string& get_names_oid_prefix() = 0;
1093 virtual const string& get_info_oid_prefix(bool old_format = false) = 0;
1094 virtual const string& get_predefined_name(CephContext *cct) = 0;
1095
1096 void dump(Formatter *f) const;
1097 void decode_json(JSONObj *obj);
1098 };
1099 WRITE_CLASS_ENCODER(RGWSystemMetaObj)
1100
1101 struct RGWZonePlacementInfo {
1102 rgw_pool index_pool;
1103 rgw_pool data_pool;
1104 rgw_pool data_extra_pool; /* if not set we should use data_pool */
1105 RGWBucketIndexType index_type;
1106 std::string compression_type;
1107
1108 RGWZonePlacementInfo() : index_type(RGWBIType_Normal) {}
1109
1110 void encode(bufferlist& bl) const {
1111 ENCODE_START(6, 1, bl);
1112 ::encode(index_pool.to_str(), bl);
1113 ::encode(data_pool.to_str(), bl);
1114 ::encode(data_extra_pool.to_str(), bl);
1115 ::encode((uint32_t)index_type, bl);
1116 ::encode(compression_type, bl);
1117 ENCODE_FINISH(bl);
1118 }
1119
1120 void decode(bufferlist::iterator& bl) {
1121 DECODE_START(6, bl);
1122 string index_pool_str;
1123 string data_pool_str;
1124 ::decode(index_pool_str, bl);
1125 index_pool = rgw_pool(index_pool_str);
1126 ::decode(data_pool_str, bl);
1127 data_pool = rgw_pool(data_pool_str);
1128 if (struct_v >= 4) {
1129 string data_extra_pool_str;
1130 ::decode(data_extra_pool_str, bl);
1131 data_extra_pool = rgw_pool(data_extra_pool_str);
1132 }
1133 if (struct_v >= 5) {
1134 uint32_t it;
1135 ::decode(it, bl);
1136 index_type = (RGWBucketIndexType)it;
1137 }
1138 if (struct_v >= 6) {
1139 ::decode(compression_type, bl);
1140 }
1141 DECODE_FINISH(bl);
1142 }
1143 const rgw_pool& get_data_extra_pool() const {
1144 if (data_extra_pool.empty()) {
1145 return data_pool;
1146 }
1147 return data_extra_pool;
1148 }
1149 void dump(Formatter *f) const;
1150 void decode_json(JSONObj *obj);
1151 };
1152 WRITE_CLASS_ENCODER(RGWZonePlacementInfo)
1153
1154 struct RGWZoneParams : RGWSystemMetaObj {
1155 rgw_pool domain_root;
1156 rgw_pool metadata_heap;
1157 rgw_pool control_pool;
1158 rgw_pool gc_pool;
1159 rgw_pool lc_pool;
1160 rgw_pool log_pool;
1161 rgw_pool intent_log_pool;
1162 rgw_pool usage_log_pool;
1163
1164 rgw_pool user_keys_pool;
1165 rgw_pool user_email_pool;
1166 rgw_pool user_swift_pool;
1167 rgw_pool user_uid_pool;
1168 rgw_pool roles_pool;
1169 rgw_pool reshard_pool;
1170
1171 RGWAccessKey system_key;
1172
1173 map<string, RGWZonePlacementInfo> placement_pools;
1174
1175 string realm_id;
1176
1177 map<string, string, ltstr_nocase> tier_config;
1178
1179 RGWZoneParams() : RGWSystemMetaObj() {}
1180 RGWZoneParams(const string& name) : RGWSystemMetaObj(name){}
1181 RGWZoneParams(const string& id, const string& name) : RGWSystemMetaObj(id, name) {}
1182 RGWZoneParams(const string& id, const string& name, const string& _realm_id)
1183 : RGWSystemMetaObj(id, name), realm_id(_realm_id) {}
1184
1185 rgw_pool get_pool(CephContext *cct);
1186 const string get_default_oid(bool old_format = false) override;
1187 const string& get_names_oid_prefix() override;
1188 const string& get_info_oid_prefix(bool old_format = false) override;
1189 const string& get_predefined_name(CephContext *cct) override;
1190
1191 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true,
1192 bool old_format = false);
1193 using RGWSystemMetaObj::init;
1194 int read_default_id(string& default_id, bool old_format = false) override;
1195 int set_as_default(bool exclusive = false) override;
1196 int create_default(bool old_format = false);
1197 int create(bool exclusive = true) override;
1198 int fix_pool_names();
1199
1200 const string& get_compression_type(const string& placement_rule) const;
1201
1202 void encode(bufferlist& bl) const override {
1203 ENCODE_START(10, 1, bl);
1204 ::encode(domain_root, bl);
1205 ::encode(control_pool, bl);
1206 ::encode(gc_pool, bl);
1207 ::encode(log_pool, bl);
1208 ::encode(intent_log_pool, bl);
1209 ::encode(usage_log_pool, bl);
1210 ::encode(user_keys_pool, bl);
1211 ::encode(user_email_pool, bl);
1212 ::encode(user_swift_pool, bl);
1213 ::encode(user_uid_pool, bl);
1214 RGWSystemMetaObj::encode(bl);
1215 ::encode(system_key, bl);
1216 ::encode(placement_pools, bl);
1217 ::encode(metadata_heap, bl);
1218 ::encode(realm_id, bl);
1219 ::encode(lc_pool, bl);
1220 ::encode(tier_config, bl);
1221 ::encode(roles_pool, bl);
1222 ::encode(reshard_pool, bl);
1223 ENCODE_FINISH(bl);
1224 }
1225
1226 void decode(bufferlist::iterator& bl) override {
1227 DECODE_START(10, bl);
1228 ::decode(domain_root, bl);
1229 ::decode(control_pool, bl);
1230 ::decode(gc_pool, bl);
1231 ::decode(log_pool, bl);
1232 ::decode(intent_log_pool, bl);
1233 ::decode(usage_log_pool, bl);
1234 ::decode(user_keys_pool, bl);
1235 ::decode(user_email_pool, bl);
1236 ::decode(user_swift_pool, bl);
1237 ::decode(user_uid_pool, bl);
1238 if (struct_v >= 6) {
1239 RGWSystemMetaObj::decode(bl);
1240 } else if (struct_v >= 2) {
1241 ::decode(name, bl);
1242 id = name;
1243 }
1244 if (struct_v >= 3)
1245 ::decode(system_key, bl);
1246 if (struct_v >= 4)
1247 ::decode(placement_pools, bl);
1248 if (struct_v >= 5)
1249 ::decode(metadata_heap, bl);
1250 if (struct_v >= 6) {
1251 ::decode(realm_id, bl);
1252 }
1253 if (struct_v >= 7) {
1254 ::decode(lc_pool, bl);
1255 } else {
1256 lc_pool.init(name + ".rgw.lc");
1257 }
1258 if (struct_v >= 8) {
1259 ::decode(tier_config, bl);
1260 }
1261 if (struct_v >= 9) {
1262 ::decode(roles_pool, bl);
1263 } else {
1264 roles_pool = name + ".rgw.roles";
1265 }
1266 if (struct_v >= 10) {
1267 ::decode(reshard_pool, bl);
1268 } else {
1269 reshard_pool = name + ".rgw.reshard";
1270 }
1271 DECODE_FINISH(bl);
1272 }
1273 void dump(Formatter *f) const;
1274 void decode_json(JSONObj *obj);
1275 static void generate_test_instances(list<RGWZoneParams*>& o);
1276
1277 bool find_placement(const rgw_data_placement_target& placement, string *placement_id) {
1278 for (const auto& pp : placement_pools) {
1279 const RGWZonePlacementInfo& info = pp.second;
1280 if (info.index_pool == placement.index_pool.to_str() &&
1281 info.data_pool == placement.data_pool.to_str() &&
1282 info.data_extra_pool == placement.data_extra_pool.to_str()) {
1283 *placement_id = pp.first;
1284 return true;
1285 }
1286 }
1287 return false;
1288 }
1289
1290 bool get_placement(const string& placement_id, RGWZonePlacementInfo *placement) const {
1291 auto iter = placement_pools.find(placement_id);
1292 if (iter == placement_pools.end()) {
1293 return false;
1294 }
1295 *placement = iter->second;
1296 return true;
1297 }
1298
1299 /*
1300 * return data pool of the head object
1301 */
1302 bool get_head_data_pool(const string& placement_id, const rgw_obj& obj, rgw_pool *pool) const {
1303 const rgw_data_placement_target& explicit_placement = obj.bucket.explicit_placement;
1304 if (!explicit_placement.data_pool.empty()) {
1305 if (!obj.in_extra_data) {
1306 *pool = explicit_placement.data_pool;
1307 } else {
1308 *pool = explicit_placement.get_data_extra_pool();
1309 }
1310 return true;
1311 }
1312 if (placement_id.empty()) {
1313 return false;
1314 }
1315 auto iter = placement_pools.find(placement_id);
1316 if (iter == placement_pools.end()) {
1317 return false;
1318 }
1319 if (!obj.in_extra_data) {
1320 *pool = iter->second.data_pool;
1321 } else {
1322 *pool = iter->second.get_data_extra_pool();
1323 }
1324 return true;
1325 }
1326 };
1327 WRITE_CLASS_ENCODER(RGWZoneParams)
1328
1329 struct RGWZone {
1330 string id;
1331 string name;
1332 list<string> endpoints;
1333 bool log_meta;
1334 bool log_data;
1335 bool read_only;
1336 string tier_type;
1337
1338 /**
1339 * Represents the number of shards for the bucket index object, a value of zero
1340 * indicates there is no sharding. By default (no sharding, the name of the object
1341 * is '.dir.{marker}', with sharding, the name is '.dir.{marker}.{sharding_id}',
1342 * sharding_id is zero-based value. It is not recommended to set a too large value
1343 * (e.g. thousand) as it increases the cost for bucket listing.
1344 */
1345 uint32_t bucket_index_max_shards;
1346
1347 bool sync_from_all;
1348 set<string> sync_from; /* list of zones to sync from */
1349
1350 RGWZone() : log_meta(false), log_data(false), read_only(false), bucket_index_max_shards(0),
1351 sync_from_all(true) {}
1352
1353 void encode(bufferlist& bl) const {
1354 ENCODE_START(6, 1, bl);
1355 ::encode(name, bl);
1356 ::encode(endpoints, bl);
1357 ::encode(log_meta, bl);
1358 ::encode(log_data, bl);
1359 ::encode(bucket_index_max_shards, bl);
1360 ::encode(id, bl);
1361 ::encode(read_only, bl);
1362 ::encode(tier_type, bl);
1363 ::encode(sync_from_all, bl);
1364 ::encode(sync_from, bl);
1365 ENCODE_FINISH(bl);
1366 }
1367
1368 void decode(bufferlist::iterator& bl) {
1369 DECODE_START(6, bl);
1370 ::decode(name, bl);
1371 if (struct_v < 4) {
1372 id = name;
1373 }
1374 ::decode(endpoints, bl);
1375 if (struct_v >= 2) {
1376 ::decode(log_meta, bl);
1377 ::decode(log_data, bl);
1378 }
1379 if (struct_v >= 3) {
1380 ::decode(bucket_index_max_shards, bl);
1381 }
1382 if (struct_v >= 4) {
1383 ::decode(id, bl);
1384 ::decode(read_only, bl);
1385 }
1386 if (struct_v >= 5) {
1387 ::decode(tier_type, bl);
1388 }
1389 if (struct_v >= 6) {
1390 ::decode(sync_from_all, bl);
1391 ::decode(sync_from, bl);
1392 }
1393 DECODE_FINISH(bl);
1394 }
1395 void dump(Formatter *f) const;
1396 void decode_json(JSONObj *obj);
1397 static void generate_test_instances(list<RGWZone*>& o);
1398
1399 bool is_read_only() { return read_only; }
1400
1401 bool syncs_from(const string& zone_id) {
1402 return (sync_from_all || sync_from.find(zone_id) != sync_from.end());
1403 }
1404 };
1405 WRITE_CLASS_ENCODER(RGWZone)
1406
1407 struct RGWDefaultZoneGroupInfo {
1408 string default_zonegroup;
1409
1410 void encode(bufferlist& bl) const {
1411 ENCODE_START(1, 1, bl);
1412 ::encode(default_zonegroup, bl);
1413 ENCODE_FINISH(bl);
1414 }
1415
1416 void decode(bufferlist::iterator& bl) {
1417 DECODE_START(1, bl);
1418 ::decode(default_zonegroup, bl);
1419 DECODE_FINISH(bl);
1420 }
1421 void dump(Formatter *f) const;
1422 void decode_json(JSONObj *obj);
1423 //todo: implement ceph-dencoder
1424 };
1425 WRITE_CLASS_ENCODER(RGWDefaultZoneGroupInfo)
1426
1427 struct RGWZoneGroupPlacementTarget {
1428 string name;
1429 set<string> tags;
1430
1431 bool user_permitted(list<string>& user_tags) {
1432 if (tags.empty()) {
1433 return true;
1434 }
1435 for (auto& rule : user_tags) {
1436 if (tags.find(rule) != tags.end()) {
1437 return true;
1438 }
1439 }
1440 return false;
1441 }
1442
1443 void encode(bufferlist& bl) const {
1444 ENCODE_START(1, 1, bl);
1445 ::encode(name, bl);
1446 ::encode(tags, bl);
1447 ENCODE_FINISH(bl);
1448 }
1449
1450 void decode(bufferlist::iterator& bl) {
1451 DECODE_START(1, bl);
1452 ::decode(name, bl);
1453 ::decode(tags, bl);
1454 DECODE_FINISH(bl);
1455 }
1456 void dump(Formatter *f) const;
1457 void decode_json(JSONObj *obj);
1458 };
1459 WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTarget)
1460
1461
1462 struct RGWZoneGroup : public RGWSystemMetaObj {
1463 string api_name;
1464 list<string> endpoints;
1465 bool is_master;
1466
1467 string master_zone;
1468 map<string, RGWZone> zones;
1469
1470 map<string, RGWZoneGroupPlacementTarget> placement_targets;
1471 string default_placement;
1472
1473 list<string> hostnames;
1474 list<string> hostnames_s3website;
1475 // TODO: Maybe convert hostnames to a map<string,list<string>> for
1476 // endpoint_type->hostnames
1477 /*
1478 20:05 < _robbat21irssi> maybe I do someting like: if (hostname_map.empty()) { populate all map keys from hostnames; };
1479 20:05 < _robbat21irssi> but that's a later compatability migration planning bit
1480 20:06 < yehudasa> more like if (!hostnames.empty()) {
1481 20:06 < yehudasa> for (list<string>::iterator iter = hostnames.begin(); iter != hostnames.end(); ++iter) {
1482 20:06 < yehudasa> hostname_map["s3"].append(iter->second);
1483 20:07 < yehudasa> hostname_map["s3website"].append(iter->second);
1484 20:07 < yehudasa> s/append/push_back/g
1485 20:08 < _robbat21irssi> inner loop over APIs
1486 20:08 < yehudasa> yeah, probably
1487 20:08 < _robbat21irssi> s3, s3website, swift, swith_auth, swift_website
1488 */
1489 map<string, list<string> > api_hostname_map;
1490 map<string, list<string> > api_endpoints_map;
1491
1492 string realm_id;
1493
1494 RGWZoneGroup(): is_master(false){}
1495 RGWZoneGroup(const std::string &id, const std::string &name):RGWSystemMetaObj(id, name) {}
1496 RGWZoneGroup(const std::string &_name):RGWSystemMetaObj(_name) {}
1497 RGWZoneGroup(const std::string &_name, bool _is_master, CephContext *cct, RGWRados* store,
1498 const string& _realm_id, const list<string>& _endpoints)
1499 : RGWSystemMetaObj(_name, cct , store), endpoints(_endpoints), is_master(_is_master),
1500 realm_id(_realm_id) {}
1501
1502 bool is_master_zonegroup() const { return is_master;}
1503 void update_master(bool _is_master) {
1504 is_master = _is_master;
1505 post_process_params();
1506 }
1507 void post_process_params();
1508
1509 void encode(bufferlist& bl) const override {
1510 ENCODE_START(4, 1, bl);
1511 ::encode(name, bl);
1512 ::encode(api_name, bl);
1513 ::encode(is_master, bl);
1514 ::encode(endpoints, bl);
1515 ::encode(master_zone, bl);
1516 ::encode(zones, bl);
1517 ::encode(placement_targets, bl);
1518 ::encode(default_placement, bl);
1519 ::encode(hostnames, bl);
1520 ::encode(hostnames_s3website, bl);
1521 RGWSystemMetaObj::encode(bl);
1522 ::encode(realm_id, bl);
1523 ENCODE_FINISH(bl);
1524 }
1525
1526 void decode(bufferlist::iterator& bl) override {
1527 DECODE_START(4, bl);
1528 ::decode(name, bl);
1529 ::decode(api_name, bl);
1530 ::decode(is_master, bl);
1531 ::decode(endpoints, bl);
1532 ::decode(master_zone, bl);
1533 ::decode(zones, bl);
1534 ::decode(placement_targets, bl);
1535 ::decode(default_placement, bl);
1536 if (struct_v >= 2) {
1537 ::decode(hostnames, bl);
1538 }
1539 if (struct_v >= 3) {
1540 ::decode(hostnames_s3website, bl);
1541 }
1542 if (struct_v >= 4) {
1543 RGWSystemMetaObj::decode(bl);
1544 ::decode(realm_id, bl);
1545 } else {
1546 id = name;
1547 }
1548 DECODE_FINISH(bl);
1549 }
1550
1551 int read_default_id(string& default_id, bool old_format = false) override;
1552 int set_as_default(bool exclusive = false) override;
1553 int create_default(bool old_format = false);
1554 int equals(const string& other_zonegroup) const;
1555 int add_zone(const RGWZoneParams& zone_params, bool *is_master, bool *read_only,
1556 const list<string>& endpoints, const string *ptier_type,
1557 bool *psync_from_all, list<string>& sync_from, list<string>& sync_from_rm);
1558 int remove_zone(const std::string& zone_id);
1559 int rename_zone(const RGWZoneParams& zone_params);
1560 rgw_pool get_pool(CephContext *cct);
1561 const string get_default_oid(bool old_region_format = false) override;
1562 const string& get_info_oid_prefix(bool old_region_format = false) override;
1563 const string& get_names_oid_prefix() override;
1564 const string& get_predefined_name(CephContext *cct) override;
1565
1566 void dump(Formatter *f) const;
1567 void decode_json(JSONObj *obj);
1568 static void generate_test_instances(list<RGWZoneGroup*>& o);
1569 };
1570 WRITE_CLASS_ENCODER(RGWZoneGroup)
1571
1572 struct RGWPeriodMap
1573 {
1574 string id;
1575 map<string, RGWZoneGroup> zonegroups;
1576 map<string, RGWZoneGroup> zonegroups_by_api;
1577 map<string, uint32_t> short_zone_ids;
1578
1579 string master_zonegroup;
1580
1581 void encode(bufferlist& bl) const;
1582 void decode(bufferlist::iterator& bl);
1583
1584 int update(const RGWZoneGroup& zonegroup, CephContext *cct);
1585
1586 void dump(Formatter *f) const;
1587 void decode_json(JSONObj *obj);
1588
1589 void reset() {
1590 zonegroups.clear();
1591 zonegroups_by_api.clear();
1592 master_zonegroup.clear();
1593 }
1594
1595 uint32_t get_zone_short_id(const string& zone_id) const;
1596 };
1597 WRITE_CLASS_ENCODER(RGWPeriodMap)
1598
1599 struct RGWPeriodConfig
1600 {
1601 RGWQuotaInfo bucket_quota;
1602 RGWQuotaInfo user_quota;
1603
1604 void encode(bufferlist& bl) const {
1605 ENCODE_START(1, 1, bl);
1606 ::encode(bucket_quota, bl);
1607 ::encode(user_quota, bl);
1608 ENCODE_FINISH(bl);
1609 }
1610
1611 void decode(bufferlist::iterator& bl) {
1612 DECODE_START(1, bl);
1613 ::decode(bucket_quota, bl);
1614 ::decode(user_quota, bl);
1615 DECODE_FINISH(bl);
1616 }
1617
1618 void dump(Formatter *f) const;
1619 void decode_json(JSONObj *obj);
1620
1621 // the period config must be stored in a local object outside of the period,
1622 // so that it can be used in a default configuration where no realm/period
1623 // exists
1624 int read(RGWRados *store, const std::string& realm_id);
1625 int write(RGWRados *store, const std::string& realm_id);
1626
1627 static std::string get_oid(const std::string& realm_id);
1628 static rgw_pool get_pool(CephContext *cct);
1629 };
1630 WRITE_CLASS_ENCODER(RGWPeriodConfig)
1631
1632 /* for backward comaptability */
1633 struct RGWRegionMap {
1634
1635 map<string, RGWZoneGroup> regions;
1636
1637 string master_region;
1638
1639 RGWQuotaInfo bucket_quota;
1640 RGWQuotaInfo user_quota;
1641
1642 void encode(bufferlist& bl) const;
1643 void decode(bufferlist::iterator& bl);
1644
1645 void dump(Formatter *f) const;
1646 void decode_json(JSONObj *obj);
1647 };
1648 WRITE_CLASS_ENCODER(RGWRegionMap)
1649
1650 struct RGWZoneGroupMap {
1651
1652 map<string, RGWZoneGroup> zonegroups;
1653 map<string, RGWZoneGroup> zonegroups_by_api;
1654
1655 string master_zonegroup;
1656
1657 RGWQuotaInfo bucket_quota;
1658 RGWQuotaInfo user_quota;
1659
1660 /* constract the map */
1661 int read(CephContext *cct, RGWRados *store);
1662
1663 void encode(bufferlist& bl) const;
1664 void decode(bufferlist::iterator& bl);
1665
1666 void dump(Formatter *f) const;
1667 void decode_json(JSONObj *obj);
1668 };
1669 WRITE_CLASS_ENCODER(RGWZoneGroupMap)
1670
1671 class RGWRealm;
1672
1673 struct objexp_hint_entry {
1674 string tenant;
1675 string bucket_name;
1676 string bucket_id;
1677 rgw_obj_key obj_key;
1678 ceph::real_time exp_time;
1679
1680 void encode(bufferlist& bl) const {
1681 ENCODE_START(2, 1, bl);
1682 ::encode(bucket_name, bl);
1683 ::encode(bucket_id, bl);
1684 ::encode(obj_key, bl);
1685 ::encode(exp_time, bl);
1686 ::encode(tenant, bl);
1687 ENCODE_FINISH(bl);
1688 }
1689
1690 void decode(bufferlist::iterator& bl) {
1691 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
1692 DECODE_START(2, bl);
1693 ::decode(bucket_name, bl);
1694 ::decode(bucket_id, bl);
1695 ::decode(obj_key, bl);
1696 ::decode(exp_time, bl);
1697 if (struct_v >= 2) {
1698 ::decode(tenant, bl);
1699 } else {
1700 tenant.clear();
1701 }
1702 DECODE_FINISH(bl);
1703 }
1704 };
1705 WRITE_CLASS_ENCODER(objexp_hint_entry)
1706
1707 class RGWPeriod;
1708
1709 class RGWRealm : public RGWSystemMetaObj
1710 {
1711 string current_period;
1712 epoch_t epoch{0}; //< realm epoch, incremented for each new period
1713
1714 int create_control(bool exclusive);
1715 int delete_control();
1716 public:
1717 RGWRealm() {}
1718 RGWRealm(const string& _id, const string& _name = "") : RGWSystemMetaObj(_id, _name) {}
1719 RGWRealm(CephContext *_cct, RGWRados *_store): RGWSystemMetaObj(_cct, _store) {}
1720 RGWRealm(const string& _name, CephContext *_cct, RGWRados *_store): RGWSystemMetaObj(_name, _cct, _store){}
1721
1722 void encode(bufferlist& bl) const override {
1723 ENCODE_START(1, 1, bl);
1724 RGWSystemMetaObj::encode(bl);
1725 ::encode(current_period, bl);
1726 ::encode(epoch, bl);
1727 ENCODE_FINISH(bl);
1728 }
1729
1730 void decode(bufferlist::iterator& bl) override {
1731 DECODE_START(1, bl);
1732 RGWSystemMetaObj::decode(bl);
1733 ::decode(current_period, bl);
1734 ::decode(epoch, bl);
1735 DECODE_FINISH(bl);
1736 }
1737
1738 int create(bool exclusive = true) override;
1739 int delete_obj();
1740 rgw_pool get_pool(CephContext *cct);
1741 const string get_default_oid(bool old_format = false) override;
1742 const string& get_names_oid_prefix() override;
1743 const string& get_info_oid_prefix(bool old_format = false) override;
1744 const string& get_predefined_name(CephContext *cct) override;
1745
1746 using RGWSystemMetaObj::read_id; // expose as public for radosgw-admin
1747
1748 void dump(Formatter *f) const;
1749 void decode_json(JSONObj *obj);
1750
1751 const string& get_current_period() const {
1752 return current_period;
1753 }
1754 int set_current_period(RGWPeriod& period);
1755 void clear_current_period_and_epoch() {
1756 current_period.clear();
1757 epoch = 0;
1758 }
1759 epoch_t get_epoch() const { return epoch; }
1760
1761 string get_control_oid();
1762 /// send a notify on the realm control object
1763 int notify_zone(bufferlist& bl);
1764 /// notify the zone of a new period
1765 int notify_new_period(const RGWPeriod& period);
1766 };
1767 WRITE_CLASS_ENCODER(RGWRealm)
1768
1769 struct RGWPeriodLatestEpochInfo {
1770 epoch_t epoch;
1771
1772 void encode(bufferlist& bl) const {
1773 ENCODE_START(1, 1, bl);
1774 ::encode(epoch, bl);
1775 ENCODE_FINISH(bl);
1776 }
1777
1778 void decode(bufferlist::iterator& bl) {
1779 DECODE_START(1, bl);
1780 ::decode(epoch, bl);
1781 DECODE_FINISH(bl);
1782 }
1783
1784 void dump(Formatter *f) const;
1785 void decode_json(JSONObj *obj);
1786 };
1787 WRITE_CLASS_ENCODER(RGWPeriodLatestEpochInfo)
1788
1789 class RGWPeriod
1790 {
1791 string id;
1792 epoch_t epoch;
1793 string predecessor_uuid;
1794 std::vector<std::string> sync_status;
1795 RGWPeriodMap period_map;
1796 RGWPeriodConfig period_config;
1797 string master_zonegroup;
1798 string master_zone;
1799
1800 string realm_id;
1801 string realm_name;
1802 epoch_t realm_epoch{1}; //< realm epoch when period was made current
1803
1804 CephContext *cct;
1805 RGWRados *store;
1806
1807 int read_info();
1808 int read_latest_epoch(RGWPeriodLatestEpochInfo& epoch_info,
1809 RGWObjVersionTracker *objv = nullptr);
1810 int use_latest_epoch();
1811 int use_current_period();
1812
1813 const string get_period_oid();
1814 const string get_period_oid_prefix();
1815
1816 // gather the metadata sync status for each shard; only for use on master zone
1817 int update_sync_status(const RGWPeriod &current_period,
1818 std::ostream& error_stream, bool force_if_stale);
1819
1820 public:
1821 RGWPeriod() : epoch(0), cct(NULL), store(NULL) {}
1822
1823 RGWPeriod(const string& period_id, epoch_t _epoch = 0)
1824 : id(period_id), epoch(_epoch),
1825 cct(NULL), store(NULL) {}
1826
1827 const string& get_id() const { return id; }
1828 epoch_t get_epoch() const { return epoch; }
1829 epoch_t get_realm_epoch() const { return realm_epoch; }
1830 const string& get_predecessor() const { return predecessor_uuid; }
1831 const string& get_master_zone() const { return master_zone; }
1832 const string& get_master_zonegroup() const { return master_zonegroup; }
1833 const string& get_realm() const { return realm_id; }
1834 const RGWPeriodMap& get_map() const { return period_map; }
1835 RGWPeriodConfig& get_config() { return period_config; }
1836 const RGWPeriodConfig& get_config() const { return period_config; }
1837 const std::vector<std::string>& get_sync_status() const { return sync_status; }
1838 rgw_pool get_pool(CephContext *cct);
1839 const string& get_latest_epoch_oid();
1840 const string& get_info_oid_prefix();
1841
1842 void set_user_quota(RGWQuotaInfo& user_quota) {
1843 period_config.user_quota = user_quota;
1844 }
1845
1846 void set_bucket_quota(RGWQuotaInfo& bucket_quota) {
1847 period_config.bucket_quota = bucket_quota;
1848 }
1849
1850 void set_id(const string& id) {
1851 this->id = id;
1852 period_map.id = id;
1853 }
1854 void set_epoch(epoch_t epoch) { this->epoch = epoch; }
1855 void set_realm_epoch(epoch_t epoch) { realm_epoch = epoch; }
1856
1857 void set_predecessor(const string& predecessor)
1858 {
1859 predecessor_uuid = predecessor;
1860 }
1861
1862 void set_realm_id(const string& _realm_id) {
1863 realm_id = _realm_id;
1864 }
1865
1866 int reflect();
1867
1868 int get_zonegroup(RGWZoneGroup& zonegroup,
1869 const string& zonegroup_id);
1870
1871 bool is_single_zonegroup()
1872 {
1873 return (period_map.zonegroups.size() == 1);
1874 }
1875
1876 /*
1877 returns true if there are several zone groups with a least one zone
1878 */
1879 bool is_multi_zonegroups_with_zones()
1880 {
1881 int count = 0;
1882 for (const auto& zg: period_map.zonegroups) {
1883 if (zg.second.zones.size() > 0) {
1884 if (count++ > 0) {
1885 return true;
1886 }
1887 }
1888 }
1889 return false;
1890 }
1891
1892 int get_latest_epoch(epoch_t& epoch);
1893 int set_latest_epoch(epoch_t epoch, bool exclusive = false,
1894 RGWObjVersionTracker *objv = nullptr);
1895 // update latest_epoch if the given epoch is higher, else return -EEXIST
1896 int update_latest_epoch(epoch_t epoch);
1897
1898 int init(CephContext *_cct, RGWRados *_store, const string &period_realm_id, const string &period_realm_name = "",
1899 bool setup_obj = true);
1900 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true);
1901
1902 int create(bool exclusive = true);
1903 int delete_obj();
1904 int store_info(bool exclusive);
1905 int add_zonegroup(const RGWZoneGroup& zonegroup);
1906
1907 void fork();
1908 int update();
1909
1910 // commit a staging period; only for use on master zone
1911 int commit(RGWRealm& realm, const RGWPeriod &current_period,
1912 std::ostream& error_stream, bool force_if_stale = false);
1913
1914 void encode(bufferlist& bl) const {
1915 ENCODE_START(1, 1, bl);
1916 ::encode(id, bl);
1917 ::encode(epoch, bl);
1918 ::encode(realm_epoch, bl);
1919 ::encode(predecessor_uuid, bl);
1920 ::encode(sync_status, bl);
1921 ::encode(period_map, bl);
1922 ::encode(master_zone, bl);
1923 ::encode(master_zonegroup, bl);
1924 ::encode(period_config, bl);
1925 ::encode(realm_id, bl);
1926 ::encode(realm_name, bl);
1927 ENCODE_FINISH(bl);
1928 }
1929
1930 void decode(bufferlist::iterator& bl) {
1931 DECODE_START(1, bl);
1932 ::decode(id, bl);
1933 ::decode(epoch, bl);
1934 ::decode(realm_epoch, bl);
1935 ::decode(predecessor_uuid, bl);
1936 ::decode(sync_status, bl);
1937 ::decode(period_map, bl);
1938 ::decode(master_zone, bl);
1939 ::decode(master_zonegroup, bl);
1940 ::decode(period_config, bl);
1941 ::decode(realm_id, bl);
1942 ::decode(realm_name, bl);
1943 DECODE_FINISH(bl);
1944 }
1945 void dump(Formatter *f) const;
1946 void decode_json(JSONObj *obj);
1947
1948 static string get_staging_id(const string& realm_id) {
1949 return realm_id + ":staging";
1950 }
1951 };
1952 WRITE_CLASS_ENCODER(RGWPeriod)
1953
1954 class RGWDataChangesLog;
1955 class RGWMetaSyncStatusManager;
1956 class RGWDataSyncStatusManager;
1957 class RGWReplicaLogger;
1958 class RGWCoroutinesManagerRegistry;
1959
1960 class RGWStateLog {
1961 RGWRados *store;
1962 int num_shards;
1963 string module_name;
1964
1965 void oid_str(int shard, string& oid);
1966 int get_shard_num(const string& object);
1967 string get_oid(const string& object);
1968 int open_ioctx(librados::IoCtx& ioctx);
1969
1970 struct list_state {
1971 int cur_shard;
1972 int max_shard;
1973 string marker;
1974 string client_id;
1975 string op_id;
1976 string object;
1977
1978 list_state() : cur_shard(0), max_shard(0) {}
1979 };
1980
1981 protected:
1982 virtual bool dump_entry_internal(const cls_statelog_entry& entry, Formatter *f) {
1983 return false;
1984 }
1985
1986 public:
1987 RGWStateLog(RGWRados *_store, int _num_shards, const string& _module_name) :
1988 store(_store), num_shards(_num_shards), module_name(_module_name) {}
1989 virtual ~RGWStateLog() {}
1990
1991 int store_entry(const string& client_id, const string& op_id, const string& object,
1992 uint32_t state, bufferlist *bl, uint32_t *check_state);
1993
1994 int remove_entry(const string& client_id, const string& op_id, const string& object);
1995
1996 void init_list_entries(const string& client_id, const string& op_id, const string& object,
1997 void **handle);
1998
1999 int list_entries(void *handle, int max_entries, list<cls_statelog_entry>& entries, bool *done);
2000
2001 void finish_list_entries(void *handle);
2002
2003 virtual void dump_entry(const cls_statelog_entry& entry, Formatter *f);
2004 };
2005
2006 /*
2007 * state transitions:
2008 *
2009 * unknown -> in-progress -> complete
2010 * -> error
2011 *
2012 * user can try setting the 'abort' state, and it can only succeed if state is
2013 * in-progress.
2014 *
2015 * state renewal cannot switch state (stays in the same state)
2016 *
2017 * rgw can switch from in-progress to complete
2018 * rgw can switch from in-progress to error
2019 *
2020 * rgw can switch from abort to cancelled
2021 *
2022 */
2023
2024 class RGWOpState : public RGWStateLog {
2025 protected:
2026 bool dump_entry_internal(const cls_statelog_entry& entry, Formatter *f) override;
2027 public:
2028
2029 enum OpState {
2030 OPSTATE_UNKNOWN = 0,
2031 OPSTATE_IN_PROGRESS = 1,
2032 OPSTATE_COMPLETE = 2,
2033 OPSTATE_ERROR = 3,
2034 OPSTATE_ABORT = 4,
2035 OPSTATE_CANCELLED = 5,
2036 };
2037
2038 explicit RGWOpState(RGWRados *_store);
2039
2040 int state_from_str(const string& s, OpState *state);
2041 int set_state(const string& client_id, const string& op_id, const string& object, OpState state);
2042 int renew_state(const string& client_id, const string& op_id, const string& object, OpState state);
2043 };
2044
2045 class RGWOpStateSingleOp
2046 {
2047 RGWOpState os;
2048 string client_id;
2049 string op_id;
2050 string object;
2051
2052 CephContext *cct;
2053
2054 RGWOpState::OpState cur_state;
2055 ceph::real_time last_update;
2056
2057 public:
2058 RGWOpStateSingleOp(RGWRados *store, const string& cid, const string& oid, const string& obj);
2059
2060 int set_state(RGWOpState::OpState state);
2061 int renew_state();
2062 };
2063
2064 class RGWGetBucketStats_CB : public RefCountedObject {
2065 protected:
2066 rgw_bucket bucket;
2067 map<RGWObjCategory, RGWStorageStats> *stats;
2068 public:
2069 explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
2070 ~RGWGetBucketStats_CB() override {}
2071 virtual void handle_response(int r) = 0;
2072 virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
2073 stats = _stats;
2074 }
2075 };
2076
2077 class RGWGetUserStats_CB : public RefCountedObject {
2078 protected:
2079 rgw_user user;
2080 RGWStorageStats stats;
2081 public:
2082 explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
2083 ~RGWGetUserStats_CB() override {}
2084 virtual void handle_response(int r) = 0;
2085 virtual void set_response(RGWStorageStats& _stats) {
2086 stats = _stats;
2087 }
2088 };
2089
2090 class RGWGetDirHeader_CB;
2091 class RGWGetUserHeader_CB;
2092
2093 struct rgw_rados_ref {
2094 rgw_pool pool;
2095 string oid;
2096 string key;
2097 librados::IoCtx ioctx;
2098 };
2099
2100 class RGWChainedCache {
2101 public:
2102 virtual ~RGWChainedCache() {}
2103 virtual void chain_cb(const string& key, void *data) = 0;
2104 virtual void invalidate(const string& key) = 0;
2105 virtual void invalidate_all() = 0;
2106
2107 struct Entry {
2108 RGWChainedCache *cache;
2109 const string& key;
2110 void *data;
2111
2112 Entry(RGWChainedCache *_c, const string& _k, void *_d) : cache(_c), key(_k), data(_d) {}
2113 };
2114 };
2115
2116 template <class T, class S>
2117 class RGWObjectCtxImpl {
2118 RGWRados *store;
2119 std::map<T, S> objs_state;
2120 RWLock lock;
2121
2122 public:
2123 RGWObjectCtxImpl(RGWRados *_store) : store(_store), lock("RGWObjectCtxImpl") {}
2124
2125 S *get_state(const T& obj) {
2126 S *result;
2127 typename std::map<T, S>::iterator iter;
2128 lock.get_read();
2129 assert (!obj.empty());
2130 iter = objs_state.find(obj);
2131 if (iter != objs_state.end()) {
2132 result = &iter->second;
2133 lock.unlock();
2134 } else {
2135 lock.unlock();
2136 lock.get_write();
2137 result = &objs_state[obj];
2138 lock.unlock();
2139 }
2140 return result;
2141 }
2142
2143 void set_atomic(T& obj) {
2144 RWLock::WLocker wl(lock);
2145 assert (!obj.empty());
2146 objs_state[obj].is_atomic = true;
2147 }
2148 void set_prefetch_data(T& obj) {
2149 RWLock::WLocker wl(lock);
2150 assert (!obj.empty());
2151 objs_state[obj].prefetch_data = true;
2152 }
2153 void invalidate(T& obj) {
2154 RWLock::WLocker wl(lock);
2155 auto iter = objs_state.find(obj);
2156 if (iter == objs_state.end()) {
2157 return;
2158 }
2159 bool is_atomic = iter->second.is_atomic;
2160 bool prefetch_data = iter->second.prefetch_data;
2161
2162 objs_state.erase(iter);
2163
2164 if (is_atomic || prefetch_data) {
2165 auto& s = objs_state[obj];
2166 s.is_atomic = is_atomic;
2167 s.prefetch_data = prefetch_data;
2168 }
2169 }
2170 };
2171
2172 template<>
2173 void RGWObjectCtxImpl<rgw_obj, RGWObjState>::invalidate(rgw_obj& obj);
2174
2175 template<>
2176 void RGWObjectCtxImpl<rgw_raw_obj, RGWRawObjState>::invalidate(rgw_raw_obj& obj);
2177
2178 struct RGWObjectCtx {
2179 RGWRados *store;
2180 void *user_ctx;
2181
2182 RGWObjectCtxImpl<rgw_obj, RGWObjState> obj;
2183 RGWObjectCtxImpl<rgw_raw_obj, RGWRawObjState> raw;
2184
2185 explicit RGWObjectCtx(RGWRados *_store) : store(_store), user_ctx(NULL), obj(store), raw(store) { }
2186 RGWObjectCtx(RGWRados *_store, void *_user_ctx) : store(_store), user_ctx(_user_ctx), obj(store), raw(store) { }
2187 };
2188
2189 class Finisher;
2190 class RGWAsyncRadosProcessor;
2191
2192 template <class T>
2193 class RGWChainedCacheImpl;
2194
2195 struct bucket_info_entry {
2196 RGWBucketInfo info;
2197 real_time mtime;
2198 map<string, bufferlist> attrs;
2199 };
2200
2201 struct tombstone_entry {
2202 ceph::real_time mtime;
2203 uint32_t zone_short_id;
2204 uint64_t pg_ver;
2205
2206 tombstone_entry() = default;
2207 tombstone_entry(const RGWObjState& state)
2208 : mtime(state.mtime), zone_short_id(state.zone_short_id),
2209 pg_ver(state.pg_ver) {}
2210 };
2211
2212 class RGWIndexCompletionManager;
2213
2214 class RGWRados
2215 {
2216 friend class RGWGC;
2217 friend class RGWMetaNotifier;
2218 friend class RGWDataNotifier;
2219 friend class RGWLC;
2220 friend class RGWObjectExpirer;
2221 friend class RGWMetaSyncProcessorThread;
2222 friend class RGWDataSyncProcessorThread;
2223 friend class RGWStateLog;
2224 friend class RGWReplicaLogger;
2225 friend class RGWReshard;
2226 friend class RGWBucketReshard;
2227 friend class BucketIndexLockGuard;
2228
2229 /** Open the pool used as root for this gateway */
2230 int open_root_pool_ctx();
2231 int open_gc_pool_ctx();
2232 int open_lc_pool_ctx();
2233 int open_objexp_pool_ctx();
2234 int open_reshard_pool_ctx();
2235
2236 int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx);
2237 int open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx);
2238 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid);
2239 int open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2240 string& bucket_oid_base);
2241 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2242 const string& obj_key, string *bucket_obj, int *shard_id);
2243 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2244 int shard_id, string *bucket_obj);
2245 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2246 map<int, string>& bucket_objs, int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
2247 template<typename T>
2248 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2249 map<int, string>& oids, map<int, T>& bucket_objs,
2250 int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
2251 void build_bucket_index_marker(const string& shard_id_str, const string& shard_marker,
2252 string *marker);
2253
2254 void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result);
2255
2256 std::atomic<int64_t> max_req_id = { 0 };
2257 Mutex lock;
2258 Mutex watchers_lock;
2259 SafeTimer *timer;
2260
2261 RGWGC *gc;
2262 RGWLC *lc;
2263 RGWObjectExpirer *obj_expirer;
2264 bool use_gc_thread;
2265 bool use_lc_thread;
2266 bool quota_threads;
2267 bool run_sync_thread;
2268 bool run_reshard_thread;
2269
2270 RGWAsyncRadosProcessor* async_rados;
2271
2272 RGWMetaNotifier *meta_notifier;
2273 RGWDataNotifier *data_notifier;
2274 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
2275 map<string, RGWDataSyncProcessorThread *> data_sync_processor_threads;
2276
2277 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
2278
2279 Mutex meta_sync_thread_lock;
2280 Mutex data_sync_thread_lock;
2281
2282 int num_watchers;
2283 RGWWatcher **watchers;
2284 std::set<int> watchers_set;
2285 librados::IoCtx root_pool_ctx; // .rgw
2286 librados::IoCtx control_pool_ctx; // .rgw.control
2287 bool watch_initialized;
2288
2289 friend class RGWWatcher;
2290
2291 Mutex bucket_id_lock;
2292
2293 // This field represents the number of bucket index object shards
2294 uint32_t bucket_index_max_shards;
2295
2296 int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
2297 int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
2298 int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
2299 uint64_t max_bucket_id;
2300
2301 int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2302 RGWObjState *olh_state, RGWObjState **target_state);
2303 int get_system_obj_state_impl(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker);
2304 int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
2305 bool follow_olh, bool assume_noent = false);
2306 int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2307 librados::ObjectOperation& op, RGWObjState **state);
2308
2309 int update_placement_map();
2310 int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
2311
2312 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
2313 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
2314 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
2315 protected:
2316 CephContext *cct;
2317
2318 std::vector<librados::Rados> rados;
2319 uint32_t next_rados_handle;
2320 RWLock handle_lock;
2321 std::map<pthread_t, int> rados_map;
2322
2323 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
2324 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
2325
2326 using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
2327 tombstone_cache_t *obj_tombstone_cache;
2328
2329 librados::IoCtx gc_pool_ctx; // .rgw.gc
2330 librados::IoCtx lc_pool_ctx; // .rgw.lc
2331 librados::IoCtx objexp_pool_ctx;
2332 librados::IoCtx reshard_pool_ctx;
2333
2334 bool pools_initialized;
2335
2336 string trans_id_suffix;
2337
2338 RGWQuotaHandler *quota_handler;
2339
2340 Finisher *finisher;
2341
2342 RGWCoroutinesManagerRegistry *cr_registry;
2343
2344 RGWSyncModulesManager *sync_modules_manager{nullptr};
2345 RGWSyncModuleInstanceRef sync_module;
2346 bool writeable_zone{false};
2347
2348 RGWZoneGroup zonegroup;
2349 RGWZone zone_public_config; /* external zone params, e.g., entrypoints, log flags, etc. */
2350 RGWZoneParams zone_params; /* internal zone params, e.g., rados pools */
2351 uint32_t zone_short_id;
2352
2353 RGWPeriod current_period;
2354
2355 RGWIndexCompletionManager *index_completion_manager{nullptr};
2356 public:
2357 RGWRados() : lock("rados_timer_lock"), watchers_lock("watchers_lock"), timer(NULL),
2358 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
2359 run_sync_thread(false), run_reshard_thread(false), async_rados(nullptr), meta_notifier(NULL),
2360 data_notifier(NULL), meta_sync_processor_thread(NULL),
2361 meta_sync_thread_lock("meta_sync_thread_lock"), data_sync_thread_lock("data_sync_thread_lock"),
2362 num_watchers(0), watchers(NULL),
2363 watch_initialized(false),
2364 bucket_id_lock("rados_bucket_id"),
2365 bucket_index_max_shards(0),
2366 max_bucket_id(0), cct(NULL),
2367 next_rados_handle(0),
2368 handle_lock("rados_handle_lock"),
2369 binfo_cache(NULL), obj_tombstone_cache(nullptr),
2370 pools_initialized(false),
2371 quota_handler(NULL),
2372 finisher(NULL),
2373 cr_registry(NULL),
2374 zone_short_id(0),
2375 rest_master_conn(NULL),
2376 meta_mgr(NULL), data_log(NULL), reshard(NULL) {}
2377
2378 uint64_t get_new_req_id() {
2379 return ++max_req_id;
2380 }
2381
2382 librados::IoCtx* get_lc_pool_ctx() {
2383 return &lc_pool_ctx;
2384 }
2385 void set_context(CephContext *_cct) {
2386 cct = _cct;
2387 }
2388
2389 /**
2390 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
2391 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
2392 */
2393 void init_host_id() {
2394 /* uint64_t needs 16, two '-' separators and a trailing null */
2395 const string& zone_name = get_zone().name;
2396 const string& zonegroup_name = zonegroup.get_name();
2397 char charbuf[16 + zone_name.size() + zonegroup_name.size() + 2 + 1];
2398 snprintf(charbuf, sizeof(charbuf), "%llx-%s-%s", (unsigned long long)instance_id(), zone_name.c_str(), zonegroup_name.c_str());
2399 string s(charbuf);
2400 host_id = s;
2401 }
2402
2403 string host_id;
2404
2405 RGWRealm realm;
2406
2407 RGWRESTConn *rest_master_conn;
2408 map<string, RGWRESTConn *> zone_conn_map;
2409 map<string, RGWRESTConn *> zone_data_sync_from_map;
2410 map<string, RGWRESTConn *> zone_data_notify_to_map;
2411 map<string, RGWRESTConn *> zonegroup_conn_map;
2412
2413 map<string, string> zone_id_by_name;
2414 map<string, RGWZone> zone_by_id;
2415
2416 RGWRESTConn *get_zone_conn_by_id(const string& id) {
2417 auto citer = zone_conn_map.find(id);
2418 if (citer == zone_conn_map.end()) {
2419 return NULL;
2420 }
2421
2422 return citer->second;
2423 }
2424
2425 RGWRESTConn *get_zone_conn_by_name(const string& name) {
2426 auto i = zone_id_by_name.find(name);
2427 if (i == zone_id_by_name.end()) {
2428 return NULL;
2429 }
2430
2431 return get_zone_conn_by_id(i->second);
2432 }
2433
2434 bool find_zone_id_by_name(const string& name, string *id) {
2435 auto i = zone_id_by_name.find(name);
2436 if (i == zone_id_by_name.end()) {
2437 return false;
2438 }
2439 *id = i->second;
2440 return true;
2441 }
2442
2443 int get_zonegroup(const string& id, RGWZoneGroup& zonegroup) {
2444 int ret = 0;
2445 if (id == get_zonegroup().get_id()) {
2446 zonegroup = get_zonegroup();
2447 } else if (!current_period.get_id().empty()) {
2448 ret = current_period.get_zonegroup(zonegroup, id);
2449 }
2450 return ret;
2451 }
2452
2453 RGWRealm& get_realm() {
2454 return realm;
2455 }
2456
2457 RGWZoneParams& get_zone_params() { return zone_params; }
2458 RGWZoneGroup& get_zonegroup() {
2459 return zonegroup;
2460 }
2461 RGWZone& get_zone() {
2462 return zone_public_config;
2463 }
2464
2465 bool zone_is_writeable() {
2466 return writeable_zone && !get_zone().is_read_only();
2467 }
2468
2469 uint32_t get_zone_short_id() const {
2470 return zone_short_id;
2471 }
2472
2473 bool zone_syncs_from(RGWZone& target_zone, RGWZone& source_zone);
2474
2475 const RGWQuotaInfo& get_bucket_quota() {
2476 return current_period.get_config().bucket_quota;
2477 }
2478
2479 const RGWQuotaInfo& get_user_quota() {
2480 return current_period.get_config().user_quota;
2481 }
2482
2483 const string& get_current_period_id() {
2484 return current_period.get_id();
2485 }
2486
2487 bool has_zonegroup_api(const std::string& api) const {
2488 if (!current_period.get_id().empty()) {
2489 const auto& zonegroups_by_api = current_period.get_map().zonegroups_by_api;
2490 if (zonegroups_by_api.find(api) != zonegroups_by_api.end())
2491 return true;
2492 }
2493 return false;
2494 }
2495
2496 // pulls missing periods for period_history
2497 std::unique_ptr<RGWPeriodPuller> period_puller;
2498 // maintains a connected history of periods
2499 std::unique_ptr<RGWPeriodHistory> period_history;
2500
2501 RGWAsyncRadosProcessor* get_async_rados() const { return async_rados; };
2502
2503 RGWMetadataManager *meta_mgr;
2504
2505 RGWDataChangesLog *data_log;
2506
2507 RGWReshard *reshard;
2508 std::shared_ptr<RGWReshardWait> reshard_wait;
2509
2510 virtual ~RGWRados() = default;
2511
2512 tombstone_cache_t *get_tombstone_cache() {
2513 return obj_tombstone_cache;
2514 }
2515
2516 RGWSyncModulesManager *get_sync_modules_manager() {
2517 return sync_modules_manager;
2518 }
2519 const RGWSyncModuleInstanceRef& get_sync_module() {
2520 return sync_module;
2521 }
2522
2523 int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
2524 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size);
2525 int get_max_chunk_size(const string& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size);
2526
2527 uint32_t get_max_bucket_shards() {
2528 return rgw_shards_max();
2529 }
2530
2531 int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
2532
2533 int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
2534 RGWListRawObjsCtx& ctx, list<string>& oids,
2535 bool *is_truncated);
2536
2537 int list_raw_prefixed_objs(const rgw_pool& pool, const string& prefix, list<string>& result);
2538 int list_zonegroups(list<string>& zonegroups);
2539 int list_regions(list<string>& regions);
2540 int list_zones(list<string>& zones);
2541 int list_realms(list<string>& realms);
2542 int list_periods(list<string>& periods);
2543 int list_periods(const string& current_period, list<string>& periods);
2544 void tick();
2545
2546 CephContext *ctx() { return cct; }
2547 /** do all necessary setup of the storage device */
2548 int initialize(CephContext *_cct, bool _use_gc_thread, bool _use_lc_thread, bool _quota_threads, bool _run_sync_thread, bool _run_reshard_thread) {
2549 set_context(_cct);
2550 use_gc_thread = _use_gc_thread;
2551 use_lc_thread = _use_lc_thread;
2552 quota_threads = _quota_threads;
2553 run_sync_thread = _run_sync_thread;
2554 run_reshard_thread = _run_reshard_thread;
2555 return initialize();
2556 }
2557 /** Initialize the RADOS instance and prepare to do other ops */
2558 virtual int init_rados();
2559 int init_zg_from_period(bool *initialized);
2560 int init_zg_from_local(bool *creating_defaults);
2561 int init_complete();
2562 int replace_region_with_zonegroup();
2563 int convert_regionmap();
2564 int initialize();
2565 void finalize();
2566
2567 int register_to_service_map(const string& daemon_type, const map<string, string>& meta);
2568
2569 void schedule_context(Context *c);
2570
2571 /** set up a bucket listing. handle is filled in. */
2572 int list_buckets_init(RGWAccessHandle *handle);
2573 /**
2574 * get the next bucket in the listing. obj is filled in,
2575 * handle is updated.
2576 */
2577 int list_buckets_next(rgw_bucket_dir_entry& obj, RGWAccessHandle *handle);
2578
2579 /// list logs
2580 int log_list_init(const string& prefix, RGWAccessHandle *handle);
2581 int log_list_next(RGWAccessHandle handle, string *name);
2582
2583 /// remove log
2584 int log_remove(const string& name);
2585
2586 /// show log
2587 int log_show_init(const string& name, RGWAccessHandle *handle);
2588 int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
2589
2590 // log bandwidth info
2591 int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
2592 int read_usage(const rgw_user& user, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
2593 bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage);
2594 int trim_usage(rgw_user& user, uint64_t start_epoch, uint64_t end_epoch);
2595
2596 int create_pool(const rgw_pool& pool);
2597
2598 /**
2599 * create a bucket with name bucket and the given list of attrs
2600 * returns 0 on success, -ERR# otherwise.
2601 */
2602 int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
2603 int select_bucket_placement(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
2604 string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
2605 int select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info);
2606 int select_new_bucket_location(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
2607 string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
2608 int select_bucket_location_by_rule(const string& location_rule, RGWZonePlacementInfo *rule_info);
2609 void create_bucket_id(string *bucket_id);
2610
2611 bool get_obj_data_pool(const string& placement_rule, const rgw_obj& obj, rgw_pool *pool);
2612 bool obj_to_raw(const string& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
2613
2614 int create_bucket(RGWUserInfo& owner, rgw_bucket& bucket,
2615 const string& zonegroup_id,
2616 const string& placement_rule,
2617 const string& swift_ver_location,
2618 const RGWQuotaInfo * pquota_info,
2619 map<std::string,bufferlist>& attrs,
2620 RGWBucketInfo& bucket_info,
2621 obj_version *pobjv,
2622 obj_version *pep_objv,
2623 ceph::real_time creation_time,
2624 rgw_bucket *master_bucket,
2625 uint32_t *master_num_shards,
2626 bool exclusive = true);
2627 int add_bucket_placement(const rgw_pool& new_pool);
2628 int remove_bucket_placement(const rgw_pool& new_pool);
2629 int list_placement_set(set<rgw_pool>& names);
2630 int create_pools(vector<rgw_pool>& pools, vector<int>& retcodes);
2631
2632 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
2633
2634 class SystemObject {
2635 RGWRados *store;
2636 RGWObjectCtx& ctx;
2637 rgw_raw_obj obj;
2638
2639 RGWObjState *state;
2640
2641 protected:
2642 int get_state(RGWRawObjState **pstate, RGWObjVersionTracker *objv_tracker);
2643
2644 public:
2645 SystemObject(RGWRados *_store, RGWObjectCtx& _ctx, rgw_raw_obj& _obj) : store(_store), ctx(_ctx), obj(_obj), state(NULL) {}
2646
2647 void invalidate_state();
2648
2649 RGWRados *get_store() { return store; }
2650 rgw_raw_obj& get_obj() { return obj; }
2651 RGWObjectCtx& get_ctx() { return ctx; }
2652
2653 struct Read {
2654 RGWRados::SystemObject *source;
2655
2656 struct GetObjState {
2657 rgw_rados_ref ref;
2658 bool has_ref{false};
2659 uint64_t last_ver{0};
2660
2661 GetObjState() {}
2662
2663 int get_ref(RGWRados *store, rgw_raw_obj& obj, rgw_rados_ref **pref);
2664 } state;
2665
2666 struct StatParams {
2667 ceph::real_time *lastmod;
2668 uint64_t *obj_size;
2669 map<string, bufferlist> *attrs;
2670
2671 StatParams() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
2672 } stat_params;
2673
2674 struct ReadParams {
2675 rgw_cache_entry_info *cache_info{nullptr};
2676 map<string, bufferlist> *attrs;
2677
2678 ReadParams() : attrs(NULL) {}
2679 } read_params;
2680
2681 explicit Read(RGWRados::SystemObject *_source) : source(_source) {}
2682
2683 int stat(RGWObjVersionTracker *objv_tracker);
2684 int read(int64_t ofs, int64_t end, bufferlist& bl, RGWObjVersionTracker *objv_tracker);
2685 int get_attr(const char *name, bufferlist& dest);
2686 };
2687 };
2688
2689 struct BucketShard {
2690 RGWRados *store;
2691 rgw_bucket bucket;
2692 int shard_id;
2693 librados::IoCtx index_ctx;
2694 string bucket_obj;
2695
2696 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
2697 int init(const rgw_bucket& _bucket, const rgw_obj& obj);
2698 int init(const rgw_bucket& _bucket, int sid);
2699 };
2700
2701 class Object {
2702 RGWRados *store;
2703 RGWBucketInfo bucket_info;
2704 RGWObjectCtx& ctx;
2705 rgw_obj obj;
2706
2707 BucketShard bs;
2708
2709 RGWObjState *state;
2710
2711 bool versioning_disabled;
2712
2713 bool bs_initialized;
2714
2715 protected:
2716 int get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent = false);
2717 void invalidate_state();
2718
2719 int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
2720 const char *ifmatch, const char *ifnomatch, bool removal_op);
2721 int complete_atomic_modification();
2722
2723 public:
2724 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
2725 ctx(_ctx), obj(_obj), bs(store),
2726 state(NULL), versioning_disabled(false),
2727 bs_initialized(false) {}
2728
2729 RGWRados *get_store() { return store; }
2730 rgw_obj& get_obj() { return obj; }
2731 RGWObjectCtx& get_ctx() { return ctx; }
2732 RGWBucketInfo& get_bucket_info() { return bucket_info; }
2733 int get_manifest(RGWObjManifest **pmanifest);
2734
2735 int get_bucket_shard(BucketShard **pbs) {
2736 if (!bs_initialized) {
2737 int r = bs.init(bucket_info.bucket, obj);
2738 if (r < 0) {
2739 return r;
2740 }
2741 bs_initialized = true;
2742 }
2743 *pbs = &bs;
2744 return 0;
2745 }
2746
2747 void set_versioning_disabled(bool status) {
2748 versioning_disabled = status;
2749 }
2750
2751 bool versioning_enabled() {
2752 return (!versioning_disabled && bucket_info.versioning_enabled());
2753 }
2754
2755 struct Read {
2756 RGWRados::Object *source;
2757
2758 struct GetObjState {
2759 librados::IoCtx io_ctx;
2760 rgw_obj obj;
2761 rgw_raw_obj head_obj;
2762 } state;
2763
2764 struct ConditionParams {
2765 const ceph::real_time *mod_ptr;
2766 const ceph::real_time *unmod_ptr;
2767 bool high_precision_time;
2768 uint32_t mod_zone_id;
2769 uint64_t mod_pg_ver;
2770 const char *if_match;
2771 const char *if_nomatch;
2772
2773 ConditionParams() :
2774 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
2775 if_match(NULL), if_nomatch(NULL) {}
2776 } conds;
2777
2778 struct Params {
2779 ceph::real_time *lastmod;
2780 uint64_t *obj_size;
2781 map<string, bufferlist> *attrs;
2782
2783 Params() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
2784 } params;
2785
2786 explicit Read(RGWRados::Object *_source) : source(_source) {}
2787
2788 int prepare();
2789 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
2790 int read(int64_t ofs, int64_t end, bufferlist& bl);
2791 int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb);
2792 int get_attr(const char *name, bufferlist& dest);
2793 };
2794
2795 struct Write {
2796 RGWRados::Object *target;
2797
2798 struct MetaParams {
2799 ceph::real_time *mtime;
2800 map<std::string, bufferlist>* rmattrs;
2801 const bufferlist *data;
2802 RGWObjManifest *manifest;
2803 const string *ptag;
2804 list<rgw_obj_index_key> *remove_objs;
2805 ceph::real_time set_mtime;
2806 rgw_user owner;
2807 RGWObjCategory category;
2808 int flags;
2809 const char *if_match;
2810 const char *if_nomatch;
2811 uint64_t olh_epoch;
2812 ceph::real_time delete_at;
2813 bool canceled;
2814 const string *user_data;
2815 rgw_zone_set *zones_trace;
2816
2817 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
2818 remove_objs(NULL), category(RGW_OBJ_CATEGORY_MAIN), flags(0),
2819 if_match(NULL), if_nomatch(NULL), olh_epoch(0), canceled(false), user_data(nullptr), zones_trace(nullptr) {}
2820 } meta;
2821
2822 explicit Write(RGWRados::Object *_target) : target(_target) {}
2823
2824 int _do_write_meta(uint64_t size, uint64_t accounted_size,
2825 map<std::string, bufferlist>& attrs,
2826 bool assume_noent,
2827 void *index_op);
2828 int write_meta(uint64_t size, uint64_t accounted_size,
2829 map<std::string, bufferlist>& attrs);
2830 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
2831 };
2832
2833 struct Delete {
2834 RGWRados::Object *target;
2835
2836 struct DeleteParams {
2837 rgw_user bucket_owner;
2838 int versioning_status;
2839 ACLOwner obj_owner; /* needed for creation of deletion marker */
2840 uint64_t olh_epoch;
2841 string marker_version_id;
2842 uint32_t bilog_flags;
2843 list<rgw_obj_index_key> *remove_objs;
2844 ceph::real_time expiration_time;
2845 ceph::real_time unmod_since;
2846 ceph::real_time mtime; /* for setting delete marker mtime */
2847 bool high_precision_time;
2848 rgw_zone_set *zones_trace;
2849
2850 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr) {}
2851 } params;
2852
2853 struct DeleteResult {
2854 bool delete_marker;
2855 string version_id;
2856
2857 DeleteResult() : delete_marker(false) {}
2858 } result;
2859
2860 explicit Delete(RGWRados::Object *_target) : target(_target) {}
2861
2862 int delete_obj();
2863 };
2864
2865 struct Stat {
2866 RGWRados::Object *source;
2867
2868 struct Result {
2869 rgw_obj obj;
2870 RGWObjManifest manifest;
2871 bool has_manifest;
2872 uint64_t size;
2873 struct timespec mtime;
2874 map<string, bufferlist> attrs;
2875
2876 Result() : has_manifest(false), size(0) {}
2877 } result;
2878
2879 struct State {
2880 librados::IoCtx io_ctx;
2881 librados::AioCompletion *completion;
2882 int ret;
2883
2884 State() : completion(NULL), ret(0) {}
2885 } state;
2886
2887
2888 explicit Stat(RGWRados::Object *_source) : source(_source) {}
2889
2890 int stat_async();
2891 int wait();
2892 int stat();
2893 private:
2894 int finish();
2895 };
2896 };
2897
2898 class Bucket {
2899 RGWRados *store;
2900 RGWBucketInfo bucket_info;
2901 rgw_bucket& bucket;
2902 int shard_id;
2903
2904 public:
2905 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
2906 shard_id(RGW_NO_SHARD) {}
2907 RGWRados *get_store() { return store; }
2908 rgw_bucket& get_bucket() { return bucket; }
2909 RGWBucketInfo& get_bucket_info() { return bucket_info; }
2910
2911 int update_bucket_id(const string& new_bucket_id);
2912
2913 int get_shard_id() { return shard_id; }
2914 void set_shard_id(int id) {
2915 shard_id = id;
2916 }
2917
2918 class UpdateIndex {
2919 RGWRados::Bucket *target;
2920 string optag;
2921 rgw_obj obj;
2922 uint16_t bilog_flags{0};
2923 BucketShard bs;
2924 bool bs_initialized{false};
2925 bool blind;
2926 bool prepared{false};
2927 rgw_zone_set *zones_trace{nullptr};
2928
2929 int init_bs() {
2930 int r = bs.init(target->get_bucket(), obj);
2931 if (r < 0) {
2932 return r;
2933 }
2934 bs_initialized = true;
2935 return 0;
2936 }
2937
2938 void invalidate_bs() {
2939 bs_initialized = false;
2940 }
2941
2942 int guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call);
2943 public:
2944
2945 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
2946 bs(target->get_store()) {
2947 blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
2948 }
2949
2950 int get_bucket_shard(BucketShard **pbs) {
2951 if (!bs_initialized) {
2952 int r = init_bs();
2953 if (r < 0) {
2954 return r;
2955 }
2956 }
2957 *pbs = &bs;
2958 return 0;
2959 }
2960
2961 void set_bilog_flags(uint16_t flags) {
2962 bilog_flags = flags;
2963 }
2964
2965 void set_zones_trace(rgw_zone_set *_zones_trace) {
2966 zones_trace = _zones_trace;
2967 }
2968
2969 int prepare(RGWModifyOp, const string *write_tag);
2970 int complete(int64_t poolid, uint64_t epoch, uint64_t size,
2971 uint64_t accounted_size, ceph::real_time& ut,
2972 const string& etag, const string& content_type,
2973 bufferlist *acl_bl, RGWObjCategory category,
2974 list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr);
2975 int complete_del(int64_t poolid, uint64_t epoch,
2976 ceph::real_time& removed_mtime, /* mtime of removed object */
2977 list<rgw_obj_index_key> *remove_objs);
2978 int cancel();
2979
2980 const string *get_optag() { return &optag; }
2981
2982 bool is_prepared() { return prepared; }
2983 };
2984
2985 struct List {
2986 RGWRados::Bucket *target;
2987 rgw_obj_key next_marker;
2988
2989 struct Params {
2990 string prefix;
2991 string delim;
2992 rgw_obj_key marker;
2993 rgw_obj_key end_marker;
2994 string ns;
2995 bool enforce_ns;
2996 RGWAccessListFilter *filter;
2997 bool list_versions;
2998
2999 Params() : enforce_ns(true), filter(NULL), list_versions(false) {}
3000 } params;
3001
3002 public:
3003 explicit List(RGWRados::Bucket *_target) : target(_target) {}
3004
3005 int list_objects(int max, vector<rgw_bucket_dir_entry> *result, map<string, bool> *common_prefixes, bool *is_truncated);
3006 rgw_obj_key& get_next_marker() {
3007 return next_marker;
3008 }
3009 };
3010 };
3011
3012 /** Write/overwrite an object to the bucket storage. */
3013 virtual int put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, ceph::real_time *mtime,
3014 map<std::string, bufferlist>& attrs, int flags,
3015 bufferlist& data,
3016 RGWObjVersionTracker *objv_tracker,
3017 ceph::real_time set_mtime /* 0 for don't set */);
3018
3019 virtual int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
3020 off_t ofs, bool exclusive,
3021 RGWObjVersionTracker *objv_tracker = nullptr);
3022 int aio_put_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
3023 off_t ofs, bool exclusive, void **handle);
3024
3025 int put_system_obj(void *ctx, rgw_raw_obj& obj, const char *data, size_t len, bool exclusive,
3026 ceph::real_time *mtime, map<std::string, bufferlist>& attrs, RGWObjVersionTracker *objv_tracker,
3027 ceph::real_time set_mtime) {
3028 bufferlist bl;
3029 bl.append(data, len);
3030 int flags = PUT_OBJ_CREATE;
3031 if (exclusive)
3032 flags |= PUT_OBJ_EXCL;
3033
3034 return put_system_obj_impl(obj, len, mtime, attrs, flags, bl, objv_tracker, set_mtime);
3035 }
3036 int aio_wait(void *handle);
3037 bool aio_completed(void *handle);
3038
3039 int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
3040 const std::string& obj_prefix,
3041 const std::string& obj_delim,
3042 std::function<int(const rgw_bucket_dir_entry&)> handler);
3043
3044 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
3045 return bucket_info.has_swift_versioning() &&
3046 bucket_info.swift_ver_location.size();
3047 }
3048
3049 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
3050 const rgw_user& user, /* in */
3051 RGWBucketInfo& bucket_info, /* in */
3052 rgw_obj& obj); /* in */
3053 int swift_versioning_restore(RGWObjectCtx& obj_ctx, /* in/out */
3054 const rgw_user& user, /* in */
3055 RGWBucketInfo& bucket_info, /* in */
3056 rgw_obj& obj, /* in */
3057 bool& restored); /* out */
3058 int copy_obj_to_remote_dest(RGWObjState *astate,
3059 map<string, bufferlist>& src_attrs,
3060 RGWRados::Object::Read& read_op,
3061 const rgw_user& user_id,
3062 rgw_obj& dest_obj,
3063 ceph::real_time *mtime);
3064
3065 enum AttrsMod {
3066 ATTRSMOD_NONE = 0,
3067 ATTRSMOD_REPLACE = 1,
3068 ATTRSMOD_MERGE = 2
3069 };
3070
3071 int rewrite_obj(RGWBucketInfo& dest_bucket_info, rgw_obj& obj);
3072
3073 int stat_remote_obj(RGWObjectCtx& obj_ctx,
3074 const rgw_user& user_id,
3075 const string& client_id,
3076 req_info *info,
3077 const string& source_zone,
3078 rgw_obj& src_obj,
3079 RGWBucketInfo& src_bucket_info,
3080 real_time *src_mtime,
3081 uint64_t *psize,
3082 const real_time *mod_ptr,
3083 const real_time *unmod_ptr,
3084 bool high_precision_time,
3085 const char *if_match,
3086 const char *if_nomatch,
3087 map<string, bufferlist> *pattrs,
3088 string *version_id,
3089 string *ptag,
3090 string *petag);
3091
3092 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
3093 const rgw_user& user_id,
3094 const string& client_id,
3095 const string& op_id,
3096 bool record_op_state,
3097 req_info *info,
3098 const string& source_zone,
3099 rgw_obj& dest_obj,
3100 rgw_obj& src_obj,
3101 RGWBucketInfo& dest_bucket_info,
3102 RGWBucketInfo& src_bucket_info,
3103 ceph::real_time *src_mtime,
3104 ceph::real_time *mtime,
3105 const ceph::real_time *mod_ptr,
3106 const ceph::real_time *unmod_ptr,
3107 bool high_precision_time,
3108 const char *if_match,
3109 const char *if_nomatch,
3110 AttrsMod attrs_mod,
3111 bool copy_if_newer,
3112 map<string, bufferlist>& attrs,
3113 RGWObjCategory category,
3114 uint64_t olh_epoch,
3115 ceph::real_time delete_at,
3116 string *version_id,
3117 string *ptag,
3118 ceph::buffer::list *petag,
3119 void (*progress_cb)(off_t, void *),
3120 void *progress_data,
3121 rgw_zone_set *zones_trace= nullptr);
3122 /**
3123 * Copy an object.
3124 * dest_obj: the object to copy into
3125 * src_obj: the object to copy from
3126 * attrs: usage depends on attrs_mod parameter
3127 * attrs_mod: the modification mode of the attrs, may have the following values:
3128 * ATTRSMOD_NONE - the attributes of the source object will be
3129 * copied without modifications, attrs parameter is ignored;
3130 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
3131 * parameter, source object attributes are not copied;
3132 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
3133 * are overwritten by values contained in attrs parameter.
3134 * Returns: 0 on success, -ERR# otherwise.
3135 */
3136 int copy_obj(RGWObjectCtx& obj_ctx,
3137 const rgw_user& user_id,
3138 const string& client_id,
3139 const string& op_id,
3140 req_info *info,
3141 const string& source_zone,
3142 rgw_obj& dest_obj,
3143 rgw_obj& src_obj,
3144 RGWBucketInfo& dest_bucket_info,
3145 RGWBucketInfo& src_bucket_info,
3146 ceph::real_time *src_mtime,
3147 ceph::real_time *mtime,
3148 const ceph::real_time *mod_ptr,
3149 const ceph::real_time *unmod_ptr,
3150 bool high_precision_time,
3151 const char *if_match,
3152 const char *if_nomatch,
3153 AttrsMod attrs_mod,
3154 bool copy_if_newer,
3155 map<std::string, bufferlist>& attrs,
3156 RGWObjCategory category,
3157 uint64_t olh_epoch,
3158 ceph::real_time delete_at,
3159 string *version_id,
3160 string *ptag,
3161 ceph::buffer::list *petag,
3162 void (*progress_cb)(off_t, void *),
3163 void *progress_data);
3164
3165 int copy_obj_data(RGWObjectCtx& obj_ctx,
3166 RGWBucketInfo& dest_bucket_info,
3167 RGWRados::Object::Read& read_op, off_t end,
3168 rgw_obj& dest_obj,
3169 rgw_obj& src_obj,
3170 uint64_t max_chunk_size,
3171 ceph::real_time *mtime,
3172 ceph::real_time set_mtime,
3173 map<string, bufferlist>& attrs,
3174 RGWObjCategory category,
3175 uint64_t olh_epoch,
3176 ceph::real_time delete_at,
3177 string *version_id,
3178 string *ptag,
3179 ceph::buffer::list *petag);
3180
3181 int check_bucket_empty(RGWBucketInfo& bucket_info);
3182
3183 /**
3184 * Delete a bucket.
3185 * bucket: the name of the bucket to delete
3186 * Returns 0 on success, -ERR# otherwise.
3187 */
3188 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty = true);
3189
3190 bool is_meta_master();
3191
3192 /**
3193 * Check to see if the bucket metadata is synced
3194 */
3195 bool is_syncing_bucket_meta(const rgw_bucket& bucket);
3196 void wakeup_meta_sync_shards(set<int>& shard_ids);
3197 void wakeup_data_sync_shards(const string& source_zone, map<int, set<string> >& shard_ids);
3198
3199 RGWMetaSyncStatusManager* get_meta_sync_manager();
3200 RGWDataSyncStatusManager* get_data_sync_manager(const std::string& source_zone);
3201
3202 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
3203 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
3204 int bucket_suspended(rgw_bucket& bucket, bool *suspended);
3205
3206 /** Delete an object.*/
3207 int delete_obj(RGWObjectCtx& obj_ctx,
3208 const RGWBucketInfo& bucket_owner,
3209 const rgw_obj& src_obj,
3210 int versioning_status,
3211 uint16_t bilog_flags = 0,
3212 const ceph::real_time& expiration_time = ceph::real_time(),
3213 rgw_zone_set *zones_trace = nullptr);
3214
3215 /** Delete a raw object.*/
3216 int delete_raw_obj(const rgw_raw_obj& obj);
3217
3218 /* Delete a system object */
3219 virtual int delete_system_obj(rgw_raw_obj& src_obj, RGWObjVersionTracker *objv_tracker = NULL);
3220
3221 /** Remove an object from the bucket index */
3222 int delete_obj_index(const rgw_obj& obj);
3223
3224 /**
3225 * Get an attribute for a system object.
3226 * obj: the object to get attr
3227 * name: name of the attr to retrieve
3228 * dest: bufferlist to store the result in
3229 * Returns: 0 on success, -ERR# otherwise.
3230 */
3231 virtual int system_obj_get_attr(rgw_raw_obj& obj, const char *name, bufferlist& dest);
3232
3233 int system_obj_set_attr(void *ctx, rgw_raw_obj& obj, const char *name, bufferlist& bl,
3234 RGWObjVersionTracker *objv_tracker);
3235 virtual int system_obj_set_attrs(void *ctx, rgw_raw_obj& obj,
3236 map<string, bufferlist>& attrs,
3237 map<string, bufferlist>* rmattrs,
3238 RGWObjVersionTracker *objv_tracker);
3239
3240 /**
3241 * Set an attr on an object.
3242 * bucket: name of the bucket holding the object
3243 * obj: name of the object to set the attr on
3244 * name: the attr to set
3245 * bl: the contents of the attr
3246 * Returns: 0 on success, -ERR# otherwise.
3247 */
3248 int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
3249
3250 int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
3251 map<string, bufferlist>& attrs,
3252 map<string, bufferlist>* rmattrs);
3253
3254 int get_system_obj_state(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker);
3255 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
3256 bool follow_olh, bool assume_noent = false);
3257 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state) {
3258 return get_obj_state(rctx, bucket_info, obj, state, true);
3259 }
3260
3261 virtual int stat_system_obj(RGWObjectCtx& obj_ctx,
3262 RGWRados::SystemObject::Read::GetObjState& state,
3263 rgw_raw_obj& obj,
3264 map<string, bufferlist> *attrs,
3265 ceph::real_time *lastmod,
3266 uint64_t *obj_size,
3267 RGWObjVersionTracker *objv_tracker);
3268
3269 virtual int get_system_obj(RGWObjectCtx& obj_ctx, RGWRados::SystemObject::Read::GetObjState& read_state,
3270 RGWObjVersionTracker *objv_tracker, rgw_raw_obj& obj,
3271 bufferlist& bl, off_t ofs, off_t end,
3272 map<string, bufferlist> *attrs,
3273 rgw_cache_entry_info *cache_info);
3274
3275 virtual void register_chained_cache(RGWChainedCache *cache) {}
3276 virtual bool chain_cache_entry(list<rgw_cache_entry_info *>& cache_info_entries, RGWChainedCache::Entry *chained_entry) { return false; }
3277
3278 int iterate_obj(RGWObjectCtx& ctx,
3279 const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3280 off_t ofs, off_t end,
3281 uint64_t max_chunk_size,
3282 int (*iterate_obj_cb)(const RGWBucketInfo& bucket_info, const rgw_obj& obj, const rgw_raw_obj&, off_t, off_t, off_t, bool, RGWObjState *, void *),
3283 void *arg);
3284
3285 int flush_read_list(struct get_obj_data *d);
3286
3287 int get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate,
3288 const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3289 const rgw_raw_obj& read_obj,
3290 off_t obj_ofs, off_t read_ofs, off_t len,
3291 bool is_head_obj, void *arg);
3292
3293 void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
3294
3295 /**
3296 * a simple object read without keeping state
3297 */
3298
3299 virtual int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
3300 map<string, bufferlist> *attrs, bufferlist *first_chunk,
3301 RGWObjVersionTracker *objv_tracker);
3302
3303 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
3304 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
3305
3306 int guard_reshard(BucketShard *bs, const rgw_obj& obj_instance, std::function<int(BucketShard *)> call);
3307 int block_while_resharding(RGWRados::BucketShard *bs, string *new_bucket_id);
3308
3309 void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
3310 int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
3311 int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
3312 int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
3313 const rgw_obj& obj_instance, bool delete_marker,
3314 const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
3315 uint64_t olh_epoch,
3316 ceph::real_time unmod_since, bool high_precision_time, rgw_zone_set *zones_trace = nullptr);
3317 int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
3318 int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
3319 map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
3320 int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
3321 int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
3322 int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3323 bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
3324 uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
3325 int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
3326 int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
3327 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time, rgw_zone_set *zones_trace = nullptr);
3328 int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
3329 uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
3330
3331 void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
3332 int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
3333 int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
3334 int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
3335
3336 void gen_rand_obj_instance_name(rgw_obj *target);
3337
3338 int omap_get_vals(rgw_raw_obj& obj, bufferlist& header, const std::string& marker, uint64_t count, std::map<string, bufferlist>& m);
3339 int omap_get_all(rgw_raw_obj& obj, bufferlist& header, std::map<string, bufferlist>& m);
3340 int omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl);
3341 int omap_set(rgw_raw_obj& obj, map<std::string, bufferlist>& m);
3342 int omap_del(rgw_raw_obj& obj, const std::string& key);
3343 int update_containers_stats(map<string, RGWBucketEnt>& m);
3344 int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
3345
3346 int watch(const string& oid, uint64_t *watch_handle, librados::WatchCtx2 *ctx);
3347 int unwatch(uint64_t watch_handle);
3348 void add_watcher(int i);
3349 void remove_watcher(int i);
3350 virtual bool need_watch_notify() { return false; }
3351 int init_watch();
3352 void finalize_watch();
3353 int distribute(const string& key, bufferlist& bl);
3354 virtual int watch_cb(uint64_t notify_id,
3355 uint64_t cookie,
3356 uint64_t notifier_id,
3357 bufferlist& bl) { return 0; }
3358 void pick_control_oid(const string& key, string& notify_oid);
3359
3360 virtual void set_cache_enabled(bool state) {}
3361
3362 void set_atomic(void *ctx, rgw_obj& obj) {
3363 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3364 rctx->obj.set_atomic(obj);
3365 }
3366 void set_prefetch_data(void *ctx, rgw_obj& obj) {
3367 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3368 rctx->obj.set_prefetch_data(obj);
3369 }
3370 void set_prefetch_data(void *ctx, rgw_raw_obj& obj) {
3371 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3372 rctx->raw.set_prefetch_data(obj);
3373 }
3374
3375 int decode_policy(bufferlist& bl, ACLOwner *owner);
3376 int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
3377 map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker);
3378 int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
3379 int get_user_stats(const rgw_user& user, RGWStorageStats& stats);
3380 int get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *cb);
3381 void get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj);
3382 void get_bucket_meta_oid(const rgw_bucket& bucket, string& oid);
3383
3384 int put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
3385 bool exclusive, RGWObjVersionTracker& objv_tracker, ceph::real_time mtime,
3386 map<string, bufferlist> *pattrs);
3387 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
3388 int get_bucket_entrypoint_info(RGWObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name,
3389 RGWBucketEntryPoint& entry_point, RGWObjVersionTracker *objv_tracker,
3390 ceph::real_time *pmtime, map<string, bufferlist> *pattrs, rgw_cache_entry_info *cache_info = NULL);
3391 int get_bucket_instance_info(RGWObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
3392 int get_bucket_instance_info(RGWObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
3393 int get_bucket_instance_from_oid(RGWObjectCtx& obj_ctx, const string& oid, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs,
3394 rgw_cache_entry_info *cache_info = NULL);
3395
3396 int convert_old_bucket_info(RGWObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name);
3397 static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
3398 int get_bucket_info(RGWObjectCtx& obj_ctx,
3399 const string& tenant_name, const string& bucket_name,
3400 RGWBucketInfo& info,
3401 ceph::real_time *pmtime, map<string, bufferlist> *pattrs = NULL);
3402 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
3403 map<string, bufferlist> *pattrs, bool create_entry_point);
3404
3405 int cls_rgw_init_index(librados::IoCtx& io_ctx, librados::ObjectWriteOperation& op, string& oid);
3406 int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3407 int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
3408 rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3409 int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
3410 RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3411 int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
3412 ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3413 int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
3414 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
3415 int cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_index_key& start, const string& prefix,
3416 uint32_t num_entries, bool list_versions, map<string, rgw_bucket_dir_entry>& m,
3417 bool *is_truncated, rgw_obj_index_key *last_entry,
3418 bool (*force_check_filter)(const string& name) = NULL);
3419 int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, map<string, struct rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
3420 int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
3421 int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
3422 int trim_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, string& end_marker);
3423 int get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id, map<int, string>& max_marker);
3424
3425 int bi_get_instance(const RGWBucketInfo& bucket_info, rgw_obj& obj, rgw_bucket_dir_entry *dirent);
3426 int bi_get(rgw_bucket& bucket, rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
3427 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
3428 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
3429 int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
3430 int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3431 int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3432 int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
3433 list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3434 int bi_remove(BucketShard& bs);
3435
3436 int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
3437 int cls_obj_usage_log_read(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
3438 string& read_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage, bool *is_truncated);
3439 int cls_obj_usage_log_trim(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch);
3440
3441 int key_to_shard_id(const string& key, int max_shards);
3442 void shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id);
3443 void shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name);
3444 void shard_name(const string& prefix, unsigned shard_id, string& name);
3445 int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
3446 void time_log_prepare_entry(cls_log_entry& entry, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
3447 int time_log_add_init(librados::IoCtx& io_ctx);
3448 int time_log_add(const string& oid, list<cls_log_entry>& entries,
3449 librados::AioCompletion *completion, bool monotonic_inc = true);
3450 int time_log_add(const string& oid, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
3451 int time_log_list(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
3452 int max_entries, list<cls_log_entry>& entries,
3453 const string& marker, string *out_marker, bool *truncated);
3454 int time_log_info(const string& oid, cls_log_header *header);
3455 int time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion);
3456 int time_log_trim(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
3457 const string& from_marker, const string& to_marker,
3458 librados::AioCompletion *completion = nullptr);
3459
3460 string objexp_hint_get_shardname(int shard_num);
3461 int objexp_key_shard(const rgw_obj_index_key& key);
3462 void objexp_get_shard(int shard_num,
3463 string& shard); /* out */
3464 int objexp_hint_add(const ceph::real_time& delete_at,
3465 const string& tenant_name,
3466 const string& bucket_name,
3467 const string& bucket_id,
3468 const rgw_obj_index_key& obj_key);
3469 int objexp_hint_list(const string& oid,
3470 const ceph::real_time& start_time,
3471 const ceph::real_time& end_time,
3472 const int max_entries,
3473 const string& marker,
3474 list<cls_timeindex_entry>& entries, /* out */
3475 string *out_marker, /* out */
3476 bool *truncated); /* out */
3477 int objexp_hint_parse(cls_timeindex_entry &ti_entry,
3478 objexp_hint_entry& hint_entry); /* out */
3479 int objexp_hint_trim(const string& oid,
3480 const ceph::real_time& start_time,
3481 const ceph::real_time& end_time,
3482 const string& from_marker = std::string(),
3483 const string& to_marker = std::string());
3484
3485 int lock_exclusive(rgw_pool& pool, const string& oid, ceph::timespan& duration, string& zone_id, string& owner_id);
3486 int unlock(rgw_pool& pool, const string& oid, string& zone_id, string& owner_id);
3487
3488 void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
3489 int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool sync);
3490 int gc_operate(string& oid, librados::ObjectWriteOperation *op);
3491 int gc_aio_operate(string& oid, librados::ObjectWriteOperation *op);
3492 int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
3493
3494 int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
3495 int process_gc();
3496 int process_expire_objects();
3497 int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
3498
3499 int process_lc();
3500 int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
3501
3502 int bucket_check_index(RGWBucketInfo& bucket_info,
3503 map<RGWObjCategory, RGWStorageStats> *existing_stats,
3504 map<RGWObjCategory, RGWStorageStats> *calculated_stats);
3505 int bucket_rebuild_index(RGWBucketInfo& bucket_info);
3506 int bucket_set_reshard(RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
3507 int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
3508 int move_rados_obj(librados::IoCtx& src_ioctx,
3509 const string& src_oid, const string& src_locator,
3510 librados::IoCtx& dst_ioctx,
3511 const string& dst_oid, const string& dst_locator);
3512 int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
3513 int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix);
3514
3515 int cls_user_get_header(const string& user_id, cls_user_header *header);
3516 int cls_user_get_header_async(const string& user_id, RGWGetUserHeader_CB *ctx);
3517 int cls_user_sync_bucket_stats(rgw_raw_obj& user_obj, const RGWBucketInfo& bucket_info);
3518 int cls_user_list_buckets(rgw_raw_obj& obj,
3519 const string& in_marker,
3520 const string& end_marker,
3521 int max_entries,
3522 list<cls_user_bucket_entry>& entries,
3523 string *out_marker,
3524 bool *truncated);
3525 int cls_user_add_bucket(rgw_raw_obj& obj, const cls_user_bucket_entry& entry);
3526 int cls_user_update_buckets(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add);
3527 int cls_user_complete_stats_sync(rgw_raw_obj& obj);
3528 int complete_sync_user_stats(const rgw_user& user_id);
3529 int cls_user_add_bucket(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries);
3530 int cls_user_remove_bucket(rgw_raw_obj& obj, const cls_user_bucket& bucket);
3531
3532 int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
3533 RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size);
3534
3535 int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
3536 RGWQuotaInfo& bucket_quota);
3537
3538 int add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
3539
3540 uint64_t instance_id();
3541 const string& zone_name() {
3542 return get_zone_params().get_name();
3543 }
3544 const string& zone_id() {
3545 return get_zone_params().get_id();
3546 }
3547 string unique_id(uint64_t unique_num) {
3548 char buf[32];
3549 snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)instance_id(), (unsigned long long)unique_num);
3550 string s = get_zone_params().get_id() + buf;
3551 return s;
3552 }
3553
3554 void init_unique_trans_id_deps() {
3555 char buf[16 + 2 + 1]; /* uint64_t needs 16, 2 hyphens add further 2 */
3556
3557 snprintf(buf, sizeof(buf), "-%llx-", (unsigned long long)instance_id());
3558 url_encode(string(buf) + get_zone_params().get_name(), trans_id_suffix);
3559 }
3560
3561 /* In order to preserve compability with Swift API, transaction ID
3562 * should contain at least 32 characters satisfying following spec:
3563 * - first 21 chars must be in range [0-9a-f]. Swift uses this
3564 * space for storing fragment of UUID obtained through a call to
3565 * uuid4() function of Python's uuid module;
3566 * - char no. 22 must be a hyphen;
3567 * - at least 10 next characters constitute hex-formatted timestamp
3568 * padded with zeroes if necessary. All bytes must be in [0-9a-f]
3569 * range;
3570 * - last, optional part of transaction ID is any url-encoded string
3571 * without restriction on length. */
3572 string unique_trans_id(const uint64_t unique_num) {
3573 char buf[41]; /* 2 + 21 + 1 + 16 (timestamp can consume up to 16) + 1 */
3574 time_t timestamp = time(NULL);
3575
3576 snprintf(buf, sizeof(buf), "tx%021llx-%010llx",
3577 (unsigned long long)unique_num,
3578 (unsigned long long)timestamp);
3579
3580 return string(buf) + trans_id_suffix;
3581 }
3582
3583 void get_log_pool(rgw_pool& pool) {
3584 pool = get_zone_params().log_pool;
3585 }
3586
3587 bool need_to_log_data() {
3588 return get_zone().log_data;
3589 }
3590
3591 bool need_to_log_metadata() {
3592 return is_meta_master() &&
3593 (get_zonegroup().zones.size() > 1 || current_period.is_multi_zonegroups_with_zones());
3594 }
3595
3596 librados::Rados* get_rados_handle();
3597
3598 int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
3599 int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
3600 list<librados::AioCompletion *>& handles, bool keep_index_consistent);
3601 private:
3602 /**
3603 * This is a helper method, it generates a list of bucket index objects with the given
3604 * bucket base oid and number of shards.
3605 *
3606 * bucket_oid_base [in] - base name of the bucket index object;
3607 * num_shards [in] - number of bucket index object shards.
3608 * bucket_objs [out] - filled by this method, a list of bucket index objects.
3609 */
3610 void get_bucket_index_objects(const string& bucket_oid_base, uint32_t num_shards,
3611 map<int, string>& bucket_objs, int shard_id = -1);
3612
3613 /**
3614 * Get the bucket index object with the given base bucket index object and object key,
3615 * and the number of bucket index shards.
3616 *
3617 * bucket_oid_base [in] - bucket object base name.
3618 * obj_key [in] - object key.
3619 * num_shards [in] - number of bucket index shards.
3620 * hash_type [in] - type of hash to find the shard ID.
3621 * bucket_obj [out] - the bucket index object for the given object.
3622 *
3623 * Return 0 on success, a failure code otherwise.
3624 */
3625 int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
3626 uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj, int *shard);
3627
3628 void get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards,
3629 int shard_id, string *bucket_obj);
3630
3631 /**
3632 * Check the actual on-disk state of the object specified
3633 * by list_state, and fill in the time and size of object.
3634 * Then append any changes to suggested_updates for
3635 * the rgw class' dir_suggest_changes function.
3636 *
3637 * Note that this can maul list_state; don't use it afterwards. Also
3638 * it expects object to already be filled in from list_state; it only
3639 * sets the size and mtime.
3640 *
3641 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
3642 * and -errno on other failures. (-ENOENT is not a failure, and it
3643 * will encode that info as a suggested update.)
3644 */
3645 int check_disk_state(librados::IoCtx io_ctx,
3646 const RGWBucketInfo& bucket_info,
3647 rgw_bucket_dir_entry& list_state,
3648 rgw_bucket_dir_entry& object,
3649 bufferlist& suggested_updates);
3650
3651 /**
3652 * Init pool iteration
3653 * pool: pool to use for the ctx initialization
3654 * ctx: context object to use for the iteration
3655 * Returns: 0 on success, -ERR# otherwise.
3656 */
3657 int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
3658
3659 /**
3660 * Iterate over pool return object names, use optional filter
3661 * ctx: iteration context, initialized with pool_iterate_begin()
3662 * num: max number of objects to return
3663 * objs: a vector that the results will append into
3664 * is_truncated: if not NULL, will hold true iff iteration is complete
3665 * filter: if not NULL, will be used to filter returned objects
3666 * Returns: 0 on success, -ERR# otherwise.
3667 */
3668 int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
3669 bool *is_truncated, RGWAccessListFilter *filter);
3670
3671 uint64_t next_bucket_id();
3672 };
3673
3674 class RGWStoreManager {
3675 public:
3676 RGWStoreManager() {}
3677 static RGWRados *get_storage(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread) {
3678 RGWRados *store = init_storage_provider(cct, use_gc_thread, use_lc_thread, quota_threads, run_sync_thread,
3679 run_reshard_thread);
3680 return store;
3681 }
3682 static RGWRados *get_raw_storage(CephContext *cct) {
3683 RGWRados *store = init_raw_storage_provider(cct);
3684 return store;
3685 }
3686 static RGWRados *init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread);
3687 static RGWRados *init_raw_storage_provider(CephContext *cct);
3688 static void close_storage(RGWRados *store);
3689
3690 };
3691
3692 template <class T>
3693 class RGWChainedCacheImpl : public RGWChainedCache {
3694 RWLock lock;
3695
3696 map<string, T> entries;
3697
3698 public:
3699 RGWChainedCacheImpl() : lock("RGWChainedCacheImpl::lock") {}
3700
3701 void init(RGWRados *store) {
3702 store->register_chained_cache(this);
3703 }
3704
3705 bool find(const string& key, T *entry) {
3706 RWLock::RLocker rl(lock);
3707 typename map<string, T>::iterator iter = entries.find(key);
3708 if (iter == entries.end()) {
3709 return false;
3710 }
3711
3712 *entry = iter->second;
3713 return true;
3714 }
3715
3716 bool put(RGWRados *store, const string& key, T *entry, list<rgw_cache_entry_info *>& cache_info_entries) {
3717 Entry chain_entry(this, key, entry);
3718
3719 /* we need the store cache to call us under its lock to maintain lock ordering */
3720 return store->chain_cache_entry(cache_info_entries, &chain_entry);
3721 }
3722
3723 void chain_cb(const string& key, void *data) override {
3724 T *entry = static_cast<T *>(data);
3725 RWLock::WLocker wl(lock);
3726 entries[key] = *entry;
3727 }
3728
3729 void invalidate(const string& key) override {
3730 RWLock::WLocker wl(lock);
3731 entries.erase(key);
3732 }
3733
3734 void invalidate_all() override {
3735 RWLock::WLocker wl(lock);
3736 entries.clear();
3737 }
3738 }; /* RGWChainedCacheImpl */
3739
3740 /**
3741 * Base of PUT operation.
3742 * Allow to create chained data transformers like compresors and encryptors.
3743 */
3744 class RGWPutObjDataProcessor
3745 {
3746 public:
3747 RGWPutObjDataProcessor(){}
3748 virtual ~RGWPutObjDataProcessor(){}
3749 virtual int handle_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool *again) = 0;
3750 virtual int throttle_data(void *handle, const rgw_raw_obj& obj, uint64_t size, bool need_to_wait) = 0;
3751 }; /* RGWPutObjDataProcessor */
3752
3753
3754 class RGWPutObjProcessor : public RGWPutObjDataProcessor
3755 {
3756 protected:
3757 RGWRados *store;
3758 RGWObjectCtx& obj_ctx;
3759 bool is_complete;
3760 RGWBucketInfo bucket_info;
3761 bool canceled;
3762
3763 virtual int do_complete(size_t accounted_size, const string& etag,
3764 ceph::real_time *mtime, ceph::real_time set_mtime,
3765 map<string, bufferlist>& attrs, ceph::real_time delete_at,
3766 const char *if_match, const char *if_nomatch, const string *user_data,
3767 rgw_zone_set* zones_trace = nullptr) = 0;
3768
3769 public:
3770 RGWPutObjProcessor(RGWObjectCtx& _obj_ctx, RGWBucketInfo& _bi) : store(NULL),
3771 obj_ctx(_obj_ctx),
3772 is_complete(false),
3773 bucket_info(_bi),
3774 canceled(false) {}
3775 ~RGWPutObjProcessor() override {}
3776 virtual int prepare(RGWRados *_store, string *oid_rand) {
3777 store = _store;
3778 return 0;
3779 }
3780
3781 int complete(size_t accounted_size, const string& etag,
3782 ceph::real_time *mtime, ceph::real_time set_mtime,
3783 map<string, bufferlist>& attrs, ceph::real_time delete_at,
3784 const char *if_match = NULL, const char *if_nomatch = NULL, const string *user_data = nullptr,
3785 rgw_zone_set *zones_trace = nullptr);
3786
3787 CephContext *ctx();
3788
3789 bool is_canceled() { return canceled; }
3790 }; /* RGWPutObjProcessor */
3791
3792 struct put_obj_aio_info {
3793 void *handle;
3794 rgw_raw_obj obj;
3795 uint64_t size;
3796 };
3797
3798 #define RGW_PUT_OBJ_MIN_WINDOW_SIZE_DEFAULT (16 * 1024 * 1024)
3799
3800 class RGWPutObjProcessor_Aio : public RGWPutObjProcessor
3801 {
3802 list<struct put_obj_aio_info> pending;
3803 uint64_t window_size{RGW_PUT_OBJ_MIN_WINDOW_SIZE_DEFAULT};
3804 uint64_t pending_size{0};
3805
3806 struct put_obj_aio_info pop_pending();
3807 int wait_pending_front();
3808 bool pending_has_completed();
3809
3810 rgw_raw_obj last_written_obj;
3811
3812 protected:
3813 uint64_t obj_len{0};
3814
3815 set<rgw_raw_obj> written_objs;
3816 rgw_obj head_obj;
3817
3818 void add_written_obj(const rgw_raw_obj& obj) {
3819 written_objs.insert(obj);
3820 }
3821
3822 int drain_pending();
3823 int handle_obj_data(rgw_raw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle, bool exclusive);
3824
3825 public:
3826 int prepare(RGWRados *store, string *oid_rand) override;
3827 int throttle_data(void *handle, const rgw_raw_obj& obj, uint64_t size, bool need_to_wait) override;
3828
3829 RGWPutObjProcessor_Aio(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info) : RGWPutObjProcessor(obj_ctx, bucket_info) {}
3830 ~RGWPutObjProcessor_Aio() override;
3831 }; /* RGWPutObjProcessor_Aio */
3832
3833 class RGWPutObjProcessor_Atomic : public RGWPutObjProcessor_Aio
3834 {
3835 bufferlist first_chunk;
3836 uint64_t part_size;
3837 off_t cur_part_ofs;
3838 off_t next_part_ofs;
3839 int cur_part_id;
3840 off_t data_ofs;
3841
3842 bufferlist pending_data_bl;
3843 uint64_t max_chunk_size;
3844
3845 bool versioned_object;
3846 uint64_t olh_epoch;
3847 string version_id;
3848
3849 protected:
3850 rgw_bucket bucket;
3851 string obj_str;
3852
3853 string unique_tag;
3854
3855 rgw_raw_obj cur_obj;
3856 RGWObjManifest manifest;
3857 RGWObjManifest::generator manifest_gen;
3858
3859 int write_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool exclusive);
3860 int do_complete(size_t accounted_size, const string& etag,
3861 ceph::real_time *mtime, ceph::real_time set_mtime,
3862 map<string, bufferlist>& attrs, ceph::real_time delete_at,
3863 const char *if_match, const char *if_nomatch, const string *user_data, rgw_zone_set *zones_trace) override;
3864
3865 int prepare_next_part(off_t ofs);
3866 int complete_parts();
3867 int complete_writing_data();
3868
3869 int prepare_init(RGWRados *store, string *oid_rand);
3870
3871 public:
3872 ~RGWPutObjProcessor_Atomic() override {}
3873 RGWPutObjProcessor_Atomic(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info,
3874 rgw_bucket& _b, const string& _o, uint64_t _p, const string& _t, bool versioned) :
3875 RGWPutObjProcessor_Aio(obj_ctx, bucket_info),
3876 part_size(_p),
3877 cur_part_ofs(0),
3878 next_part_ofs(_p),
3879 cur_part_id(0),
3880 data_ofs(0),
3881 max_chunk_size(0),
3882 versioned_object(versioned),
3883 olh_epoch(0),
3884 bucket(_b),
3885 obj_str(_o),
3886 unique_tag(_t) {}
3887 int prepare(RGWRados *store, string *oid_rand) override;
3888 virtual bool immutable_head() { return false; }
3889 int handle_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool *again) override;
3890
3891 void set_olh_epoch(uint64_t epoch) {
3892 olh_epoch = epoch;
3893 }
3894
3895 void set_version_id(const string& vid) {
3896 version_id = vid;
3897 }
3898 }; /* RGWPutObjProcessor_Atomic */
3899
3900 #define MP_META_SUFFIX ".meta"
3901
3902 class RGWMPObj {
3903 string oid;
3904 string prefix;
3905 string meta;
3906 string upload_id;
3907 public:
3908 RGWMPObj() {}
3909 RGWMPObj(const string& _oid, const string& _upload_id) {
3910 init(_oid, _upload_id, _upload_id);
3911 }
3912 void init(const string& _oid, const string& _upload_id) {
3913 init(_oid, _upload_id, _upload_id);
3914 }
3915 void init(const string& _oid, const string& _upload_id, const string& part_unique_str) {
3916 if (_oid.empty()) {
3917 clear();
3918 return;
3919 }
3920 oid = _oid;
3921 upload_id = _upload_id;
3922 prefix = oid + ".";
3923 meta = prefix + upload_id + MP_META_SUFFIX;
3924 prefix.append(part_unique_str);
3925 }
3926 string& get_meta() { return meta; }
3927 string get_part(int num) {
3928 char buf[16];
3929 snprintf(buf, 16, ".%d", num);
3930 string s = prefix;
3931 s.append(buf);
3932 return s;
3933 }
3934 string get_part(string& part) {
3935 string s = prefix;
3936 s.append(".");
3937 s.append(part);
3938 return s;
3939 }
3940 string& get_upload_id() {
3941 return upload_id;
3942 }
3943 string& get_key() {
3944 return oid;
3945 }
3946 bool from_meta(string& meta) {
3947 int end_pos = meta.rfind('.'); // search for ".meta"
3948 if (end_pos < 0)
3949 return false;
3950 int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id>
3951 if (mid_pos < 0)
3952 return false;
3953 oid = meta.substr(0, mid_pos);
3954 upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
3955 init(oid, upload_id, upload_id);
3956 return true;
3957 }
3958 void clear() {
3959 oid = "";
3960 prefix = "";
3961 meta = "";
3962 upload_id = "";
3963 }
3964 };
3965
3966 class RGWPutObjProcessor_Multipart : public RGWPutObjProcessor_Atomic
3967 {
3968 string part_num;
3969 RGWMPObj mp;
3970 req_state *s;
3971 string upload_id;
3972
3973 protected:
3974 int prepare(RGWRados *store, string *oid_rand);
3975 int do_complete(size_t accounted_size, const string& etag,
3976 ceph::real_time *mtime, ceph::real_time set_mtime,
3977 map<string, bufferlist>& attrs, ceph::real_time delete_at,
3978 const char *if_match, const char *if_nomatch, const string *user_data,
3979 rgw_zone_set *zones_trace) override;
3980 public:
3981 bool immutable_head() { return true; }
3982 RGWPutObjProcessor_Multipart(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, uint64_t _p, req_state *_s) :
3983 RGWPutObjProcessor_Atomic(obj_ctx, bucket_info, _s->bucket, _s->object.name, _p, _s->req_id, false), s(_s) {}
3984 void get_mp(RGWMPObj** _mp);
3985 }; /* RGWPutObjProcessor_Multipart */
3986 #endif