]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/rgw_rados.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rgw / rgw_rados.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#ifndef CEPH_RGWRADOS_H
5#define CEPH_RGWRADOS_H
6
7#include <functional>
8
9#include "include/rados/librados.hpp"
10#include "include/Context.h"
11#include "common/RefCountedObj.h"
12#include "common/RWLock.h"
13#include "common/ceph_time.h"
14#include "common/lru_map.h"
15#include "rgw_common.h"
16#include "cls/rgw/cls_rgw_types.h"
17#include "cls/version/cls_version_types.h"
18#include "cls/log/cls_log_types.h"
19#include "cls/statelog/cls_statelog_types.h"
20#include "cls/timeindex/cls_timeindex_types.h"
21#include "rgw_log.h"
22#include "rgw_metadata.h"
23#include "rgw_meta_sync_status.h"
24#include "rgw_period_puller.h"
25#include "rgw_sync_module.h"
26
27class RGWWatcher;
28class SafeTimer;
29class ACLOwner;
30class RGWGC;
31class RGWMetaNotifier;
32class RGWDataNotifier;
33class RGWLC;
34class RGWObjectExpirer;
35class RGWMetaSyncProcessorThread;
36class RGWDataSyncProcessorThread;
37class RGWSyncLogTrimThread;
38class RGWRESTConn;
39struct RGWZoneGroup;
40struct RGWZoneParams;
41
42/* flags for put_obj_meta() */
43#define PUT_OBJ_CREATE 0x01
44#define PUT_OBJ_EXCL 0x02
45#define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
46
47#define RGW_OBJ_NS_MULTIPART "multipart"
48#define RGW_OBJ_NS_SHADOW "shadow"
49
50#define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
51
52#define RGW_NO_SHARD -1
53
54#define MAX_BUCKET_INDEX_SHARDS_PRIME 7877
55
56static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
57{
58 if (bucket.marker.empty() || orig_oid.empty()) {
59 oid = orig_oid;
60 } else {
61 oid = bucket.marker;
62 oid.append("_");
63 oid.append(orig_oid);
64 }
65}
66
67static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
68{
69 const rgw_bucket& bucket = obj.bucket;
70 prepend_bucket_marker(bucket, obj.get_oid(), oid);
71 const string& loc = obj.key.get_loc();
72 if (!loc.empty()) {
73 prepend_bucket_marker(bucket, loc, locator);
74 } else {
75 locator.clear();
76 }
77}
78
79int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool, librados::IoCtx& ioctx, bool create = false);
80
81int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
82
83static inline bool rgw_raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj)
84{
85 ssize_t pos = raw_obj.oid.find('_');
86 if (pos < 0) {
87 return false;
88 }
89
90 if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) {
91 return false;
92 }
93 obj->bucket = bucket;
94
95 return true;
96}
97
98struct rgw_bucket_placement {
99 string placement_rule;
100 rgw_bucket bucket;
101
102 void dump(Formatter *f) const;
103};
104
105class rgw_obj_select {
106 string placement_rule;
107 rgw_obj obj;
108 rgw_raw_obj raw_obj;
109 bool is_raw;
110
111public:
112 rgw_obj_select() : is_raw(false) {}
113 rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {}
114 rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {}
115 rgw_obj_select(const rgw_obj_select& rhs) {
116 is_raw = rhs.is_raw;
117 if (is_raw) {
118 raw_obj = rhs.raw_obj;
119 } else {
120 obj = rhs.obj;
121 }
122 }
123
124 rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const;
125 rgw_raw_obj get_raw_obj(RGWRados *store) const;
126
127 rgw_obj_select& operator=(const rgw_obj& rhs) {
128 obj = rhs;
129 is_raw = false;
130 return *this;
131 }
132
133 rgw_obj_select& operator=(const rgw_raw_obj& rhs) {
134 raw_obj = rhs;
135 is_raw = true;
136 return *this;
137 }
138
139 void set_placement_rule(const string& rule) {
140 placement_rule = rule;
141 }
142};
143
144struct compression_block {
145 uint64_t old_ofs;
146 uint64_t new_ofs;
147 uint64_t len;
148
149 void encode(bufferlist& bl) const {
150 ENCODE_START(1, 1, bl);
151 ::encode(old_ofs, bl);
152 ::encode(new_ofs, bl);
153 ::encode(len, bl);
154 ENCODE_FINISH(bl);
155 }
156
157 void decode(bufferlist::iterator& bl) {
158 DECODE_START(1, bl);
159 ::decode(old_ofs, bl);
160 ::decode(new_ofs, bl);
161 ::decode(len, bl);
162 DECODE_FINISH(bl);
163 }
164};
165WRITE_CLASS_ENCODER(compression_block)
166
167struct RGWCompressionInfo {
168 string compression_type;
169 uint64_t orig_size;
170 vector<compression_block> blocks;
171
172 RGWCompressionInfo() : compression_type("none"), orig_size(0) {}
173 RGWCompressionInfo(const RGWCompressionInfo& cs_info) : compression_type(cs_info.compression_type),
174 orig_size(cs_info.orig_size),
175 blocks(cs_info.blocks) {}
176
177 void encode(bufferlist& bl) const {
178 ENCODE_START(1, 1, bl);
179 ::encode(compression_type, bl);
180 ::encode(orig_size, bl);
181 ::encode(blocks, bl);
182 ENCODE_FINISH(bl);
183 }
184
185 void decode(bufferlist::iterator& bl) {
186 DECODE_START(1, bl);
187 ::decode(compression_type, bl);
188 ::decode(orig_size, bl);
189 ::decode(blocks, bl);
190 DECODE_FINISH(bl);
191 }
192};
193WRITE_CLASS_ENCODER(RGWCompressionInfo)
194
195int rgw_compression_info_from_attrset(map<string, bufferlist>& attrs, bool& need_decompress, RGWCompressionInfo& cs_info);
196
197struct RGWOLHInfo {
198 rgw_obj target;
199 bool removed;
200
201 RGWOLHInfo() : removed(false) {}
202
203 void encode(bufferlist& bl) const {
204 ENCODE_START(1, 1, bl);
205 ::encode(target, bl);
206 ::encode(removed, bl);
207 ENCODE_FINISH(bl);
208 }
209
210 void decode(bufferlist::iterator& bl) {
211 DECODE_START(1, bl);
212 ::decode(target, bl);
213 ::decode(removed, bl);
214 DECODE_FINISH(bl);
215 }
216 static void generate_test_instances(list<RGWOLHInfo*>& o);
217 void dump(Formatter *f) const;
218};
219WRITE_CLASS_ENCODER(RGWOLHInfo)
220
221struct RGWOLHPendingInfo {
222 ceph::real_time time;
223
224 RGWOLHPendingInfo() {}
225
226 void encode(bufferlist& bl) const {
227 ENCODE_START(1, 1, bl);
228 ::encode(time, bl);
229 ENCODE_FINISH(bl);
230 }
231
232 void decode(bufferlist::iterator& bl) {
233 DECODE_START(1, bl);
234 ::decode(time, bl);
235 DECODE_FINISH(bl);
236 }
237
238 void dump(Formatter *f) const;
239};
240WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
241
242struct RGWUsageBatch {
243 map<ceph::real_time, rgw_usage_log_entry> m;
244
245 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
246 bool exists = m.find(t) != m.end();
247 *account = !exists;
248 m[t].aggregate(entry);
249 }
250};
251
252struct RGWUsageIter {
253 string read_iter;
254 uint32_t index;
255
256 RGWUsageIter() : index(0) {}
257};
258
259class RGWGetDataCB {
260protected:
261 uint64_t extra_data_len;
262public:
263 virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
264 RGWGetDataCB() : extra_data_len(0) {}
265 virtual ~RGWGetDataCB() {}
266 virtual void set_extra_data_len(uint64_t len) {
267 extra_data_len = len;
268 }
269 /**
270 * Flushes any cached data. Used by RGWGetObjFilter.
271 * Return logic same as handle_data.
272 */
273 virtual int flush() {
274 return 0;
275 }
276 /**
277 * Allows to extend fetch range of RGW object. Used by RGWGetObjFilter.
278 */
279 virtual int fixup_range(off_t& bl_ofs, off_t& bl_end) {
280 return 0;
281 }
282};
283
284class RGWAccessListFilter {
285public:
286 virtual ~RGWAccessListFilter() {}
287 virtual bool filter(string& name, string& key) = 0;
288};
289
290struct RGWCloneRangeInfo {
291 rgw_obj src;
292 off_t src_ofs;
293 off_t dst_ofs;
294 uint64_t len;
295};
296
297struct RGWObjManifestPart {
298 rgw_obj loc; /* the object where the data is located */
299 uint64_t loc_ofs; /* the offset at that object where the data is located */
300 uint64_t size; /* the part size */
301
302 RGWObjManifestPart() : loc_ofs(0), size(0) {}
303
304 void encode(bufferlist& bl) const {
305 ENCODE_START(2, 2, bl);
306 ::encode(loc, bl);
307 ::encode(loc_ofs, bl);
308 ::encode(size, bl);
309 ENCODE_FINISH(bl);
310 }
311
312 void decode(bufferlist::iterator& bl) {
313 DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
314 ::decode(loc, bl);
315 ::decode(loc_ofs, bl);
316 ::decode(size, bl);
317 DECODE_FINISH(bl);
318 }
319
320 void dump(Formatter *f) const;
321 static void generate_test_instances(list<RGWObjManifestPart*>& o);
322};
323WRITE_CLASS_ENCODER(RGWObjManifestPart)
324
325/*
326 The manifest defines a set of rules for structuring the object parts.
327 There are a few terms to note:
328 - head: the head part of the object, which is the part that contains
329 the first chunk of data. An object might not have a head (as in the
330 case of multipart-part objects).
331 - stripe: data portion of a single rgw object that resides on a single
332 rados object.
333 - part: a collection of stripes that make a contiguous part of an
334 object. A regular object will only have one part (although might have
335 many stripes), a multipart object might have many parts. Each part
336 has a fixed stripe size, although the last stripe of a part might
337 be smaller than that. Consecutive parts may be merged if their stripe
338 value is the same.
339*/
340
341struct RGWObjManifestRule {
342 uint32_t start_part_num;
343 uint64_t start_ofs;
344 uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */
345 uint64_t stripe_max_size; /* underlying obj max size */
346 string override_prefix;
347
348 RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
349 RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) :
350 start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {}
351
352 void encode(bufferlist& bl) const {
353 ENCODE_START(2, 1, bl);
354 ::encode(start_part_num, bl);
355 ::encode(start_ofs, bl);
356 ::encode(part_size, bl);
357 ::encode(stripe_max_size, bl);
358 ::encode(override_prefix, bl);
359 ENCODE_FINISH(bl);
360 }
361
362 void decode(bufferlist::iterator& bl) {
363 DECODE_START(2, bl);
364 ::decode(start_part_num, bl);
365 ::decode(start_ofs, bl);
366 ::decode(part_size, bl);
367 ::decode(stripe_max_size, bl);
368 if (struct_v >= 2)
369 ::decode(override_prefix, bl);
370 DECODE_FINISH(bl);
371 }
372 void dump(Formatter *f) const;
373};
374WRITE_CLASS_ENCODER(RGWObjManifestRule)
375
376class RGWObjManifest {
377protected:
378 bool explicit_objs; /* old manifest? */
379 map<uint64_t, RGWObjManifestPart> objs;
380
381 uint64_t obj_size;
382
383 rgw_obj obj;
384 uint64_t head_size;
385 string head_placement_rule;
386
387 uint64_t max_head_size;
388 string prefix;
389 rgw_bucket_placement tail_placement; /* might be different than the original bucket,
390 as object might have been copied across pools */
391 map<uint64_t, RGWObjManifestRule> rules;
392
393 string tail_instance; /* tail object's instance */
394
395 void convert_to_explicit(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
396 int append_explicit(RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
397 void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter, string *override_prefix);
398
399 void update_iterators() {
400 begin_iter.seek(0);
401 end_iter.seek(obj_size);
402 }
403public:
404
405 RGWObjManifest() : explicit_objs(false), obj_size(0), head_size(0), max_head_size(0),
406 begin_iter(this), end_iter(this) {}
407 RGWObjManifest(const RGWObjManifest& rhs) {
408 *this = rhs;
409 }
410 RGWObjManifest& operator=(const RGWObjManifest& rhs) {
411 explicit_objs = rhs.explicit_objs;
412 objs = rhs.objs;
413 obj_size = rhs.obj_size;
414 obj = rhs.obj;
415 head_size = rhs.head_size;
416 max_head_size = rhs.max_head_size;
417 prefix = rhs.prefix;
418 tail_placement = rhs.tail_placement;
419 rules = rhs.rules;
420 tail_instance = rhs.tail_instance;
421
422 begin_iter.set_manifest(this);
423 end_iter.set_manifest(this);
424
425 begin_iter.seek(rhs.begin_iter.get_ofs());
426 end_iter.seek(rhs.end_iter.get_ofs());
427
428 return *this;
429 }
430
431 map<uint64_t, RGWObjManifestPart>& get_explicit_objs() {
432 return objs;
433 }
434
435
436 void set_explicit(uint64_t _size, map<uint64_t, RGWObjManifestPart>& _objs) {
437 explicit_objs = true;
438 obj_size = _size;
439 objs.swap(_objs);
440 }
441
442 void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj_select *location);
443
444 void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) {
445 RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size);
446 rules[0] = rule;
447 max_head_size = tail_ofs;
448 }
449
450 void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) {
451 RGWObjManifestRule rule(0, 0, 0, stripe_max_size);
452 rule.start_part_num = part_num;
453 rules[0] = rule;
454 max_head_size = 0;
455 }
456
457 void encode(bufferlist& bl) const {
458 ENCODE_START(7, 6, bl);
459 ::encode(obj_size, bl);
460 ::encode(objs, bl);
461 ::encode(explicit_objs, bl);
462 ::encode(obj, bl);
463 ::encode(head_size, bl);
464 ::encode(max_head_size, bl);
465 ::encode(prefix, bl);
466 ::encode(rules, bl);
467 bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket);
468 ::encode(encode_tail_bucket, bl);
469 if (encode_tail_bucket) {
470 ::encode(tail_placement.bucket, bl);
471 }
472 bool encode_tail_instance = (tail_instance != obj.key.instance);
473 ::encode(encode_tail_instance, bl);
474 if (encode_tail_instance) {
475 ::encode(tail_instance, bl);
476 }
477 ::encode(head_placement_rule, bl);
478 ::encode(tail_placement.placement_rule, bl);
479 ENCODE_FINISH(bl);
480 }
481
482 void decode(bufferlist::iterator& bl) {
483 DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl);
484 ::decode(obj_size, bl);
485 ::decode(objs, bl);
486 if (struct_v >= 3) {
487 ::decode(explicit_objs, bl);
488 ::decode(obj, bl);
489 ::decode(head_size, bl);
490 ::decode(max_head_size, bl);
491 ::decode(prefix, bl);
492 ::decode(rules, bl);
493 } else {
494 explicit_objs = true;
495 if (!objs.empty()) {
496 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
497 obj = iter->second.loc;
498 head_size = iter->second.size;
499 max_head_size = head_size;
500 }
501 }
502
503 if (explicit_objs && head_size > 0 && !objs.empty()) {
504 /* patch up manifest due to issue 16435:
505 * the first object in the explicit objs list might not be the one we need to access, use the
506 * head object instead if set. This would happen if we had an old object that was created
507 * when the explicit objs manifest was around, and it got copied.
508 */
509 rgw_obj& obj_0 = objs[0].loc;
510 if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) {
511 objs[0].loc = obj;
512 objs[0].size = head_size;
513 }
514 }
515
516 if (struct_v >= 4) {
517 if (struct_v < 6) {
518 ::decode(tail_placement.bucket, bl);
519 } else {
520 bool need_to_decode;
521 ::decode(need_to_decode, bl);
522 if (need_to_decode) {
523 ::decode(tail_placement.bucket, bl);
524 } else {
525 tail_placement.bucket = obj.bucket;
526 }
527 }
528 }
529
530 if (struct_v >= 5) {
531 if (struct_v < 6) {
532 ::decode(tail_instance, bl);
533 } else {
534 bool need_to_decode;
535 ::decode(need_to_decode, bl);
536 if (need_to_decode) {
537 ::decode(tail_instance, bl);
538 } else {
539 tail_instance = obj.key.instance;
540 }
541 }
542 } else { // old object created before 'tail_instance' field added to manifest
543 tail_instance = obj.key.instance;
544 }
545
546 if (struct_v >= 7) {
547 ::decode(head_placement_rule, bl);
548 ::decode(tail_placement.placement_rule, bl);
549 }
550
551 update_iterators();
552 DECODE_FINISH(bl);
553 }
554
555 void dump(Formatter *f) const;
556 static void generate_test_instances(list<RGWObjManifest*>& o);
557
558 int append(RGWObjManifest& m, RGWZoneGroup& zonegroup, RGWZoneParams& zone_params);
559 int append(RGWObjManifest& m, RGWRados *store);
560
561 bool get_rule(uint64_t ofs, RGWObjManifestRule *rule);
562
563 bool empty() {
564 if (explicit_objs)
565 return objs.empty();
566 return rules.empty();
567 }
568
569 bool has_explicit_objs() {
570 return explicit_objs;
571 }
572
573 bool has_tail() {
574 if (explicit_objs) {
575 if (objs.size() == 1) {
576 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
577 rgw_obj& o = iter->second.loc;
578 return !(obj == o);
579 }
580 return (objs.size() >= 2);
581 }
582 return (obj_size > head_size);
583 }
584
585 void set_head(const string& placement_rule, const rgw_obj& _o, uint64_t _s) {
586 head_placement_rule = placement_rule;
587 obj = _o;
588 head_size = _s;
589
590 if (explicit_objs && head_size > 0) {
591 objs[0].loc = obj;
592 objs[0].size = head_size;
593 }
594 }
595
596 const rgw_obj& get_obj() {
597 return obj;
598 }
599
600 void set_tail_placement(const string& placement_rule, const rgw_bucket& _b) {
601 tail_placement.placement_rule = placement_rule;
602 tail_placement.bucket = _b;
603 }
604
605 const rgw_bucket_placement& get_tail_placement() {
606 return tail_placement;
607 }
608
609 const string& get_head_placement_rule() {
610 return head_placement_rule;
611 }
612
613 void set_prefix(const string& _p) {
614 prefix = _p;
615 }
616
617 const string& get_prefix() {
618 return prefix;
619 }
620
621 void set_tail_instance(const string& _ti) {
622 tail_instance = _ti;
623 }
624
625 const string& get_tail_instance() {
626 return tail_instance;
627 }
628
629 void set_head_size(uint64_t _s) {
630 head_size = _s;
631 }
632
633 void set_obj_size(uint64_t s) {
634 obj_size = s;
635
636 update_iterators();
637 }
638
639 uint64_t get_obj_size() {
640 return obj_size;
641 }
642
643 uint64_t get_head_size() {
644 return head_size;
645 }
646
647 void set_max_head_size(uint64_t s) {
648 max_head_size = s;
649 }
650
651 uint64_t get_max_head_size() {
652 return max_head_size;
653 }
654
655 class obj_iterator {
656 RGWObjManifest *manifest;
657 uint64_t part_ofs; /* where current part starts */
658 uint64_t stripe_ofs; /* where current stripe starts */
659 uint64_t ofs; /* current position within the object */
660 uint64_t stripe_size; /* current part size */
661
662 int cur_part_id;
663 int cur_stripe;
664 string cur_override_prefix;
665
666 rgw_obj_select location;
667
668 map<uint64_t, RGWObjManifestRule>::iterator rule_iter;
669 map<uint64_t, RGWObjManifestRule>::iterator next_rule_iter;
670
671 map<uint64_t, RGWObjManifestPart>::iterator explicit_iter;
672
673 void init() {
674 part_ofs = 0;
675 stripe_ofs = 0;
676 ofs = 0;
677 stripe_size = 0;
678 cur_part_id = 0;
679 cur_stripe = 0;
680 }
681
682 void update_explicit_pos();
683
684
685 protected:
686
687 void set_manifest(RGWObjManifest *m) {
688 manifest = m;
689 }
690
691 public:
692 obj_iterator() : manifest(NULL) {
693 init();
694 }
695 explicit obj_iterator(RGWObjManifest *_m) : manifest(_m) {
696 init();
697 if (!manifest->empty()) {
698 seek(0);
699 }
700 }
701 obj_iterator(RGWObjManifest *_m, uint64_t _ofs) : manifest(_m) {
702 init();
703 if (!manifest->empty()) {
704 seek(_ofs);
705 }
706 }
707 void seek(uint64_t ofs);
708
709 void operator++();
710 bool operator==(const obj_iterator& rhs) {
711 return (ofs == rhs.ofs);
712 }
713 bool operator!=(const obj_iterator& rhs) {
714 return (ofs != rhs.ofs);
715 }
716 const rgw_obj_select& get_location() {
717 return location;
718 }
719
720 /* start of current stripe */
721 uint64_t get_stripe_ofs() {
722 if (manifest->explicit_objs) {
723 return explicit_iter->first;
724 }
725 return stripe_ofs;
726 }
727
728 /* current ofs relative to start of rgw object */
729 uint64_t get_ofs() const {
730 return ofs;
731 }
732
733 /* stripe number */
734 int get_cur_stripe() const {
735 return cur_stripe;
736 }
737
738 /* current stripe size */
739 uint64_t get_stripe_size() {
740 if (manifest->explicit_objs) {
741 return explicit_iter->second.size;
742 }
743 return stripe_size;
744 }
745
746 /* offset where data starts within current stripe */
747 uint64_t location_ofs() {
748 if (manifest->explicit_objs) {
749 return explicit_iter->second.loc_ofs;
750 }
751 return 0; /* all stripes start at zero offset */
752 }
753
754 void update_location();
755
756 friend class RGWObjManifest;
757 };
758
759 const obj_iterator& obj_begin();
760 const obj_iterator& obj_end();
761 obj_iterator obj_find(uint64_t ofs);
762
763 obj_iterator begin_iter;
764 obj_iterator end_iter;
765
766 /*
767 * simple object generator. Using a simple single rule manifest.
768 */
769 class generator {
770 RGWObjManifest *manifest;
771 uint64_t last_ofs;
772 uint64_t cur_part_ofs;
773 int cur_part_id;
774 int cur_stripe;
775 uint64_t cur_stripe_size;
776 string cur_oid;
777
778 string oid_prefix;
779
780 rgw_obj_select cur_obj;
781 rgw_pool pool;
782
783
784 RGWObjManifestRule rule;
785
786 public:
787 generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0),
788 cur_stripe(0), cur_stripe_size(0) {}
789 int create_begin(CephContext *cct, RGWObjManifest *manifest, const string& placement_rule, rgw_bucket& bucket, rgw_obj& obj);
790
791 int create_next(uint64_t ofs);
792
793 rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); }
794 rgw_raw_obj get_cur_obj(RGWRados *store) { return cur_obj.get_raw_obj(store); }
795
796 /* total max size of current stripe (including head obj) */
797 uint64_t cur_stripe_max_size() {
798 return cur_stripe_size;
799 }
800 };
801};
802WRITE_CLASS_ENCODER(RGWObjManifest)
803
804struct RGWUploadPartInfo {
805 uint32_t num;
806 uint64_t size;
807 uint64_t accounted_size{0};
808 string etag;
809 ceph::real_time modified;
810 RGWObjManifest manifest;
811 RGWCompressionInfo cs_info;
812
813 RGWUploadPartInfo() : num(0), size(0) {}
814
815 void encode(bufferlist& bl) const {
816 ENCODE_START(4, 2, bl);
817 ::encode(num, bl);
818 ::encode(size, bl);
819 ::encode(etag, bl);
820 ::encode(modified, bl);
821 ::encode(manifest, bl);
822 ::encode(cs_info, bl);
823 ::encode(accounted_size, bl);
824 ENCODE_FINISH(bl);
825 }
826 void decode(bufferlist::iterator& bl) {
827 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
828 ::decode(num, bl);
829 ::decode(size, bl);
830 ::decode(etag, bl);
831 ::decode(modified, bl);
832 if (struct_v >= 3)
833 ::decode(manifest, bl);
834 if (struct_v >= 4) {
835 ::decode(cs_info, bl);
836 ::decode(accounted_size, bl);
837 } else {
838 accounted_size = size;
839 }
840 DECODE_FINISH(bl);
841 }
842 void dump(Formatter *f) const;
843 static void generate_test_instances(list<RGWUploadPartInfo*>& o);
844};
845WRITE_CLASS_ENCODER(RGWUploadPartInfo)
846
847struct RGWObjState {
848 rgw_obj obj;
849 bool is_atomic;
850 bool has_attrs;
851 bool exists;
852 uint64_t size; //< size of raw object
853 uint64_t accounted_size{0}; //< size before compression, encryption
854 ceph::real_time mtime;
855 uint64_t epoch;
856 bufferlist obj_tag;
857 string write_tag;
858 bool fake_tag;
859 RGWObjManifest manifest;
860 bool has_manifest;
861 string shadow_obj;
862 bool has_data;
863 bufferlist data;
864 bool prefetch_data;
865 bool keep_tail;
866 bool is_olh;
867 bufferlist olh_tag;
868 uint64_t pg_ver;
869 uint32_t zone_short_id;
870
871 /* important! don't forget to update copy constructor */
872
873 RGWObjVersionTracker objv_tracker;
874
875 map<string, bufferlist> attrset;
876 RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
877 size(0), epoch(0), fake_tag(false), has_manifest(false),
878 has_data(false), prefetch_data(false), keep_tail(false), is_olh(false),
879 pg_ver(0), zone_short_id(0) {}
880 RGWObjState(const RGWObjState& rhs) : obj (rhs.obj) {
881 is_atomic = rhs.is_atomic;
882 has_attrs = rhs.has_attrs;
883 exists = rhs.exists;
884 size = rhs.size;
885 accounted_size = rhs.accounted_size;
886 mtime = rhs.mtime;
887 epoch = rhs.epoch;
888 if (rhs.obj_tag.length()) {
889 obj_tag = rhs.obj_tag;
890 }
891 write_tag = rhs.write_tag;
892 fake_tag = rhs.fake_tag;
893 if (rhs.has_manifest) {
894 manifest = rhs.manifest;
895 }
896 has_manifest = rhs.has_manifest;
897 shadow_obj = rhs.shadow_obj;
898 has_data = rhs.has_data;
899 if (rhs.data.length()) {
900 data = rhs.data;
901 }
902 prefetch_data = rhs.prefetch_data;
903 keep_tail = rhs.keep_tail;
904 is_olh = rhs.is_olh;
905 objv_tracker = rhs.objv_tracker;
906 pg_ver = rhs.pg_ver;
907 }
908
909 bool get_attr(string name, bufferlist& dest) {
910 map<string, bufferlist>::iterator iter = attrset.find(name);
911 if (iter != attrset.end()) {
912 dest = iter->second;
913 return true;
914 }
915 return false;
916 }
917};
918
919struct RGWRawObjState {
920 rgw_raw_obj obj;
921 bool has_attrs{false};
922 bool exists{false};
923 uint64_t size{0};
924 ceph::real_time mtime;
925 uint64_t epoch;
926 bufferlist obj_tag;
927 bool has_data{false};
928 bufferlist data;
929 bool prefetch_data{false};
930 uint64_t pg_ver{0};
931
932 /* important! don't forget to update copy constructor */
933
934 RGWObjVersionTracker objv_tracker;
935
936 map<string, bufferlist> attrset;
937 RGWRawObjState() {}
938 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
939 has_attrs = rhs.has_attrs;
940 exists = rhs.exists;
941 size = rhs.size;
942 mtime = rhs.mtime;
943 epoch = rhs.epoch;
944 if (rhs.obj_tag.length()) {
945 obj_tag = rhs.obj_tag;
946 }
947 has_data = rhs.has_data;
948 if (rhs.data.length()) {
949 data = rhs.data;
950 }
951 prefetch_data = rhs.prefetch_data;
952 pg_ver = rhs.pg_ver;
953 objv_tracker = rhs.objv_tracker;
954 }
955};
956
957struct RGWPoolIterCtx {
958 librados::IoCtx io_ctx;
959 librados::NObjectIterator iter;
960};
961
962struct RGWListRawObjsCtx {
963 bool initialized;
964 RGWPoolIterCtx iter_ctx;
965
966 RGWListRawObjsCtx() : initialized(false) {}
967};
968
969struct RGWDefaultSystemMetaObjInfo {
970 string default_id;
971
972 void encode(bufferlist& bl) const {
973 ENCODE_START(1, 1, bl);
974 ::encode(default_id, bl);
975 ENCODE_FINISH(bl);
976 }
977
978 void decode(bufferlist::iterator& bl) {
979 DECODE_START(1, bl);
980 ::decode(default_id, bl);
981 DECODE_FINISH(bl);
982 }
983
984 void dump(Formatter *f) const;
985 void decode_json(JSONObj *obj);
986};
987WRITE_CLASS_ENCODER(RGWDefaultSystemMetaObjInfo)
988
989struct RGWNameToId {
990 string obj_id;
991
992 void encode(bufferlist& bl) const {
993 ENCODE_START(1, 1, bl);
994 ::encode(obj_id, bl);
995 ENCODE_FINISH(bl);
996 }
997
998 void decode(bufferlist::iterator& bl) {
999 DECODE_START(1, bl);
1000 ::decode(obj_id, bl);
1001 DECODE_FINISH(bl);
1002 }
1003
1004 void dump(Formatter *f) const;
1005 void decode_json(JSONObj *obj);
1006};
1007WRITE_CLASS_ENCODER(RGWNameToId)
1008
1009class RGWSystemMetaObj {
1010protected:
1011 string id;
1012 string name;
1013
1014 CephContext *cct;
1015 RGWRados *store;
1016
1017 int store_name(bool exclusive);
1018 int store_info(bool exclusive);
1019 int read_info(const string& obj_id, bool old_format = false);
1020 int read_id(const string& obj_name, string& obj_id);
1021 int read_default(RGWDefaultSystemMetaObjInfo& default_info,
1022 const string& oid);
1023 /* read and use default id */
1024 int use_default(bool old_format = false);
1025
1026public:
1027 RGWSystemMetaObj() : cct(NULL), store(NULL) {}
1028 RGWSystemMetaObj(const string& _name): name(_name), cct(NULL), store(NULL) {}
1029 RGWSystemMetaObj(const string& _id, const string& _name) : id(_id), name(_name), cct(NULL), store(NULL) {}
1030 RGWSystemMetaObj(CephContext *_cct, RGWRados *_store): cct(_cct), store(_store){}
1031 RGWSystemMetaObj(const string& _name, CephContext *_cct, RGWRados *_store): name(_name), cct(_cct), store(_store){}
1032 const string& get_name() const { return name; }
1033 const string& get_id() const { return id; }
1034
1035 void set_name(const string& _name) { name = _name;}
1036 void set_id(const string& _id) { id = _id;}
1037 void clear_id() { id.clear(); }
1038
1039 virtual ~RGWSystemMetaObj() {}
1040
1041 virtual void encode(bufferlist& bl) const {
1042 ENCODE_START(1, 1, bl);
1043 ::encode(id, bl);
1044 ::encode(name, bl);
1045 ENCODE_FINISH(bl);
1046 }
1047
1048 virtual void decode(bufferlist::iterator& bl) {
1049 DECODE_START(1, bl);
1050 ::decode(id, bl);
1051 ::decode(name, bl);
1052 DECODE_FINISH(bl);
1053 }
1054
1055 void reinit_instance(CephContext *_cct, RGWRados *_store) {
1056 cct = _cct;
1057 store = _store;
1058 }
1059 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true, bool old_format = false);
1060 virtual int read_default_id(string& default_id, bool old_format = false);
1061 virtual int set_as_default(bool exclusive = false);
1062 int delete_default();
1063 virtual int create(bool exclusive = true);
1064 int delete_obj(bool old_format = false);
1065 int rename(const string& new_name);
1066 int update() { return store_info(false);}
1067 int update_name() { return store_name(false);}
1068 int read();
1069 int write(bool exclusive);
1070
1071 virtual rgw_pool get_pool(CephContext *cct) = 0;
1072 virtual const string get_default_oid(bool old_format = false) = 0;
1073 virtual const string& get_names_oid_prefix() = 0;
1074 virtual const string& get_info_oid_prefix(bool old_format = false) = 0;
1075 virtual const string& get_predefined_name(CephContext *cct) = 0;
1076
1077 void dump(Formatter *f) const;
1078 void decode_json(JSONObj *obj);
1079};
1080WRITE_CLASS_ENCODER(RGWSystemMetaObj)
1081
1082struct RGWZonePlacementInfo {
1083 rgw_pool index_pool;
1084 rgw_pool data_pool;
1085 rgw_pool data_extra_pool; /* if not set we should use data_pool */
1086 RGWBucketIndexType index_type;
1087 std::string compression_type;
1088
1089 RGWZonePlacementInfo() : index_type(RGWBIType_Normal) {}
1090
1091 void encode(bufferlist& bl) const {
1092 ENCODE_START(6, 1, bl);
1093 ::encode(index_pool.to_str(), bl);
1094 ::encode(data_pool.to_str(), bl);
1095 ::encode(data_extra_pool.to_str(), bl);
1096 ::encode((uint32_t)index_type, bl);
1097 ::encode(compression_type, bl);
1098 ENCODE_FINISH(bl);
1099 }
1100
1101 void decode(bufferlist::iterator& bl) {
1102 DECODE_START(6, bl);
1103 string index_pool_str;
1104 string data_pool_str;
1105 ::decode(index_pool_str, bl);
1106 index_pool = rgw_pool(index_pool_str);
1107 ::decode(data_pool_str, bl);
1108 data_pool = rgw_pool(data_pool_str);
1109 if (struct_v >= 4) {
1110 string data_extra_pool_str;
1111 ::decode(data_extra_pool_str, bl);
1112 data_extra_pool = rgw_pool(data_extra_pool_str);
1113 }
1114 if (struct_v >= 5) {
1115 uint32_t it;
1116 ::decode(it, bl);
1117 index_type = (RGWBucketIndexType)it;
1118 }
1119 if (struct_v >= 6) {
1120 ::decode(compression_type, bl);
1121 }
1122 DECODE_FINISH(bl);
1123 }
1124 const rgw_pool& get_data_extra_pool() {
1125 if (data_extra_pool.empty()) {
1126 return data_pool;
1127 }
1128 return data_extra_pool;
1129 }
1130 void dump(Formatter *f) const;
1131 void decode_json(JSONObj *obj);
1132};
1133WRITE_CLASS_ENCODER(RGWZonePlacementInfo)
1134
1135struct RGWZoneParams : RGWSystemMetaObj {
1136 rgw_pool domain_root;
1137 rgw_pool metadata_heap;
1138 rgw_pool control_pool;
1139 rgw_pool gc_pool;
1140 rgw_pool lc_pool;
1141 rgw_pool log_pool;
1142 rgw_pool intent_log_pool;
1143 rgw_pool usage_log_pool;
1144
1145 rgw_pool user_keys_pool;
1146 rgw_pool user_email_pool;
1147 rgw_pool user_swift_pool;
1148 rgw_pool user_uid_pool;
1149 rgw_pool roles_pool;
1150
1151 RGWAccessKey system_key;
1152
1153 map<string, RGWZonePlacementInfo> placement_pools;
1154
1155 string realm_id;
1156
1157 map<string, string> tier_config;
1158
1159 RGWZoneParams() : RGWSystemMetaObj() {}
1160 RGWZoneParams(const string& name) : RGWSystemMetaObj(name){}
1161 RGWZoneParams(const string& id, const string& name) : RGWSystemMetaObj(id, name) {}
1162 RGWZoneParams(const string& id, const string& name, const string& _realm_id)
1163 : RGWSystemMetaObj(id, name), realm_id(_realm_id) {}
1164
1165 rgw_pool get_pool(CephContext *cct);
1166 const string get_default_oid(bool old_format = false) override;
1167 const string& get_names_oid_prefix() override;
1168 const string& get_info_oid_prefix(bool old_format = false) override;
1169 const string& get_predefined_name(CephContext *cct) override;
1170
1171 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true,
1172 bool old_format = false);
1173 using RGWSystemMetaObj::init;
1174 int read_default_id(string& default_id, bool old_format = false) override;
1175 int set_as_default(bool exclusive = false) override;
1176 int create_default(bool old_format = false);
1177 int create(bool exclusive = true) override;
1178 int fix_pool_names();
1179
1180 const string& get_compression_type(const string& placement_rule) const;
1181
1182 void encode(bufferlist& bl) const override {
1183 ENCODE_START(9, 1, bl);
1184 ::encode(domain_root, bl);
1185 ::encode(control_pool, bl);
1186 ::encode(gc_pool, bl);
1187 ::encode(log_pool, bl);
1188 ::encode(intent_log_pool, bl);
1189 ::encode(usage_log_pool, bl);
1190 ::encode(user_keys_pool, bl);
1191 ::encode(user_email_pool, bl);
1192 ::encode(user_swift_pool, bl);
1193 ::encode(user_uid_pool, bl);
1194 RGWSystemMetaObj::encode(bl);
1195 ::encode(system_key, bl);
1196 ::encode(placement_pools, bl);
1197 ::encode(metadata_heap, bl);
1198 ::encode(realm_id, bl);
1199 ::encode(lc_pool, bl);
1200 ::encode(tier_config, bl);
1201 ::encode(roles_pool, bl);
1202 ENCODE_FINISH(bl);
1203 }
1204
1205 void decode(bufferlist::iterator& bl) override {
1206 DECODE_START(9, bl);
1207 ::decode(domain_root, bl);
1208 ::decode(control_pool, bl);
1209 ::decode(gc_pool, bl);
1210 ::decode(log_pool, bl);
1211 ::decode(intent_log_pool, bl);
1212 ::decode(usage_log_pool, bl);
1213 ::decode(user_keys_pool, bl);
1214 ::decode(user_email_pool, bl);
1215 ::decode(user_swift_pool, bl);
1216 ::decode(user_uid_pool, bl);
1217 if (struct_v >= 6) {
1218 RGWSystemMetaObj::decode(bl);
1219 } else if (struct_v >= 2) {
1220 ::decode(name, bl);
1221 id = name;
1222 }
1223 if (struct_v >= 3)
1224 ::decode(system_key, bl);
1225 if (struct_v >= 4)
1226 ::decode(placement_pools, bl);
1227 if (struct_v >= 5)
1228 ::decode(metadata_heap, bl);
1229 if (struct_v >= 6) {
1230 ::decode(realm_id, bl);
1231 }
1232 if (struct_v >= 7) {
1233 ::decode(lc_pool, bl);
1234 } else {
1235 lc_pool.init(name + ".rgw.lc");
1236 }
1237 if (struct_v >= 8) {
1238 ::decode(tier_config, bl);
1239 }
1240 if (struct_v >= 9) {
1241 ::decode(roles_pool, bl);
1242 } else {
1243 roles_pool = name + ".rgw.roles";
1244 }
1245 DECODE_FINISH(bl);
1246 }
1247 void dump(Formatter *f) const;
1248 void decode_json(JSONObj *obj);
1249 static void generate_test_instances(list<RGWZoneParams*>& o);
1250
1251 bool find_placement(const rgw_data_placement_target& placement, string *placement_id) {
1252 for (const auto& pp : placement_pools) {
1253 const RGWZonePlacementInfo& info = pp.second;
1254 if (info.index_pool == placement.index_pool.to_str() &&
1255 info.data_pool == placement.data_pool.to_str() &&
1256 info.data_extra_pool == placement.data_extra_pool.to_str()) {
1257 *placement_id = pp.first;
1258 return true;
1259 }
1260 }
1261 return false;
1262 }
1263
1264 bool get_placement(const string& placement_id, RGWZonePlacementInfo *placement) const {
1265 auto iter = placement_pools.find(placement_id);
1266 if (iter == placement_pools.end()) {
1267 return false;
1268 }
1269 *placement = iter->second;
1270 return true;
1271 }
1272
1273 /*
1274 * return data pool of the head object
1275 */
1276 bool get_head_data_pool(const string& placement_id, const rgw_obj& obj, rgw_pool *pool) const {
1277 const rgw_data_placement_target& explicit_placement = obj.bucket.explicit_placement;
1278 if (!explicit_placement.data_pool.empty()) {
1279 if (!obj.in_extra_data) {
1280 *pool = explicit_placement.data_pool;
1281 } else {
1282 *pool = explicit_placement.get_data_extra_pool();
1283 }
1284 return true;
1285 }
1286 if (placement_id.empty()) {
1287 return false;
1288 }
1289 auto iter = placement_pools.find(placement_id);
1290 if (iter == placement_pools.end()) {
1291 return false;
1292 }
1293 if (!obj.in_extra_data) {
1294 *pool = iter->second.data_pool;
1295 } else {
1296 *pool = iter->second.data_extra_pool;
1297 }
1298 return true;
1299 }
1300};
1301WRITE_CLASS_ENCODER(RGWZoneParams)
1302
1303struct RGWZone {
1304 string id;
1305 string name;
1306 list<string> endpoints;
1307 bool log_meta;
1308 bool log_data;
1309 bool read_only;
1310 string tier_type;
1311
1312/**
1313 * Represents the number of shards for the bucket index object, a value of zero
1314 * indicates there is no sharding. By default (no sharding, the name of the object
1315 * is '.dir.{marker}', with sharding, the name is '.dir.{marker}.{sharding_id}',
1316 * sharding_id is zero-based value. It is not recommended to set a too large value
1317 * (e.g. thousand) as it increases the cost for bucket listing.
1318 */
1319 uint32_t bucket_index_max_shards;
1320
1321 bool sync_from_all;
1322 set<string> sync_from; /* list of zones to sync from */
1323
1324 RGWZone() : log_meta(false), log_data(false), read_only(false), bucket_index_max_shards(0),
1325 sync_from_all(true) {}
1326
1327 void encode(bufferlist& bl) const {
1328 ENCODE_START(6, 1, bl);
1329 ::encode(name, bl);
1330 ::encode(endpoints, bl);
1331 ::encode(log_meta, bl);
1332 ::encode(log_data, bl);
1333 ::encode(bucket_index_max_shards, bl);
1334 ::encode(id, bl);
1335 ::encode(read_only, bl);
1336 ::encode(tier_type, bl);
1337 ::encode(sync_from_all, bl);
1338 ::encode(sync_from, bl);
1339 ENCODE_FINISH(bl);
1340 }
1341
1342 void decode(bufferlist::iterator& bl) {
1343 DECODE_START(6, bl);
1344 ::decode(name, bl);
1345 if (struct_v < 4) {
1346 id = name;
1347 }
1348 ::decode(endpoints, bl);
1349 if (struct_v >= 2) {
1350 ::decode(log_meta, bl);
1351 ::decode(log_data, bl);
1352 }
1353 if (struct_v >= 3) {
1354 ::decode(bucket_index_max_shards, bl);
1355 }
1356 if (struct_v >= 4) {
1357 ::decode(id, bl);
1358 ::decode(read_only, bl);
1359 }
1360 if (struct_v >= 5) {
1361 ::decode(tier_type, bl);
1362 }
1363 if (struct_v >= 6) {
1364 ::decode(sync_from_all, bl);
1365 ::decode(sync_from, bl);
1366 }
1367 DECODE_FINISH(bl);
1368 }
1369 void dump(Formatter *f) const;
1370 void decode_json(JSONObj *obj);
1371 static void generate_test_instances(list<RGWZone*>& o);
1372
1373 bool is_read_only() { return read_only; }
1374
1375 bool syncs_from(const string& zone_id) {
1376 return (sync_from_all || sync_from.find(zone_id) != sync_from.end());
1377 }
1378};
1379WRITE_CLASS_ENCODER(RGWZone)
1380
1381struct RGWDefaultZoneGroupInfo {
1382 string default_zonegroup;
1383
1384 void encode(bufferlist& bl) const {
1385 ENCODE_START(1, 1, bl);
1386 ::encode(default_zonegroup, bl);
1387 ENCODE_FINISH(bl);
1388 }
1389
1390 void decode(bufferlist::iterator& bl) {
1391 DECODE_START(1, bl);
1392 ::decode(default_zonegroup, bl);
1393 DECODE_FINISH(bl);
1394 }
1395 void dump(Formatter *f) const;
1396 void decode_json(JSONObj *obj);
1397 //todo: implement ceph-dencoder
1398};
1399WRITE_CLASS_ENCODER(RGWDefaultZoneGroupInfo)
1400
1401struct RGWZoneGroupPlacementTarget {
1402 string name;
1403 set<string> tags;
1404
1405 bool user_permitted(list<string>& user_tags) {
1406 if (tags.empty()) {
1407 return true;
1408 }
1409 for (auto& rule : user_tags) {
1410 if (tags.find(rule) != tags.end()) {
1411 return true;
1412 }
1413 }
1414 return false;
1415 }
1416
1417 void encode(bufferlist& bl) const {
1418 ENCODE_START(1, 1, bl);
1419 ::encode(name, bl);
1420 ::encode(tags, bl);
1421 ENCODE_FINISH(bl);
1422 }
1423
1424 void decode(bufferlist::iterator& bl) {
1425 DECODE_START(1, bl);
1426 ::decode(name, bl);
1427 ::decode(tags, bl);
1428 DECODE_FINISH(bl);
1429 }
1430 void dump(Formatter *f) const;
1431 void decode_json(JSONObj *obj);
1432};
1433WRITE_CLASS_ENCODER(RGWZoneGroupPlacementTarget)
1434
1435
1436struct RGWZoneGroup : public RGWSystemMetaObj {
1437 string api_name;
1438 list<string> endpoints;
1439 bool is_master;
1440
1441 string master_zone;
1442 map<string, RGWZone> zones;
1443
1444 map<string, RGWZoneGroupPlacementTarget> placement_targets;
1445 string default_placement;
1446
1447 list<string> hostnames;
1448 list<string> hostnames_s3website;
1449 // TODO: Maybe convert hostnames to a map<string,list<string>> for
1450 // endpoint_type->hostnames
1451/*
145220:05 < _robbat21irssi> maybe I do someting like: if (hostname_map.empty()) { populate all map keys from hostnames; };
145320:05 < _robbat21irssi> but that's a later compatability migration planning bit
145420:06 < yehudasa> more like if (!hostnames.empty()) {
145520:06 < yehudasa> for (list<string>::iterator iter = hostnames.begin(); iter != hostnames.end(); ++iter) {
145620:06 < yehudasa> hostname_map["s3"].append(iter->second);
145720:07 < yehudasa> hostname_map["s3website"].append(iter->second);
145820:07 < yehudasa> s/append/push_back/g
145920:08 < _robbat21irssi> inner loop over APIs
146020:08 < yehudasa> yeah, probably
146120:08 < _robbat21irssi> s3, s3website, swift, swith_auth, swift_website
1462*/
1463 map<string, list<string> > api_hostname_map;
1464 map<string, list<string> > api_endpoints_map;
1465
1466 string realm_id;
1467
1468 RGWZoneGroup(): is_master(false){}
1469 RGWZoneGroup(const std::string &id, const std::string &name):RGWSystemMetaObj(id, name) {}
1470 RGWZoneGroup(const std::string &_name):RGWSystemMetaObj(_name) {}
1471 RGWZoneGroup(const std::string &_name, bool _is_master, CephContext *cct, RGWRados* store,
1472 const string& _realm_id, const list<string>& _endpoints)
1473 : RGWSystemMetaObj(_name, cct , store), endpoints(_endpoints), is_master(_is_master),
1474 realm_id(_realm_id) {}
1475
1476 bool is_master_zonegroup() const { return is_master;}
1477 void update_master(bool _is_master) {
1478 is_master = _is_master;
1479 post_process_params();
1480 }
1481 void post_process_params();
1482
1483 void encode(bufferlist& bl) const override {
1484 ENCODE_START(4, 1, bl);
1485 ::encode(name, bl);
1486 ::encode(api_name, bl);
1487 ::encode(is_master, bl);
1488 ::encode(endpoints, bl);
1489 ::encode(master_zone, bl);
1490 ::encode(zones, bl);
1491 ::encode(placement_targets, bl);
1492 ::encode(default_placement, bl);
1493 ::encode(hostnames, bl);
1494 ::encode(hostnames_s3website, bl);
1495 RGWSystemMetaObj::encode(bl);
1496 ::encode(realm_id, bl);
1497 ENCODE_FINISH(bl);
1498 }
1499
1500 void decode(bufferlist::iterator& bl) override {
1501 DECODE_START(4, bl);
1502 ::decode(name, bl);
1503 ::decode(api_name, bl);
1504 ::decode(is_master, bl);
1505 ::decode(endpoints, bl);
1506 ::decode(master_zone, bl);
1507 ::decode(zones, bl);
1508 ::decode(placement_targets, bl);
1509 ::decode(default_placement, bl);
1510 if (struct_v >= 2) {
1511 ::decode(hostnames, bl);
1512 }
1513 if (struct_v >= 3) {
1514 ::decode(hostnames_s3website, bl);
1515 }
1516 if (struct_v >= 4) {
1517 RGWSystemMetaObj::decode(bl);
1518 ::decode(realm_id, bl);
1519 } else {
1520 id = name;
1521 }
1522 DECODE_FINISH(bl);
1523 }
1524
1525 int read_default_id(string& default_id, bool old_format = false) override;
1526 int set_as_default(bool exclusive = false) override;
1527 int create_default(bool old_format = false);
1528 int equals(const string& other_zonegroup) const;
1529 int add_zone(const RGWZoneParams& zone_params, bool *is_master, bool *read_only,
1530 const list<string>& endpoints, const string *ptier_type,
1531 bool *psync_from_all, list<string>& sync_from, list<string>& sync_from_rm);
1532 int remove_zone(const std::string& zone_id);
1533 int rename_zone(const RGWZoneParams& zone_params);
1534 rgw_pool get_pool(CephContext *cct);
1535 const string get_default_oid(bool old_region_format = false) override;
1536 const string& get_info_oid_prefix(bool old_region_format = false) override;
1537 const string& get_names_oid_prefix() override;
1538 const string& get_predefined_name(CephContext *cct) override;
1539
1540 void dump(Formatter *f) const;
1541 void decode_json(JSONObj *obj);
1542 static void generate_test_instances(list<RGWZoneGroup*>& o);
1543};
1544WRITE_CLASS_ENCODER(RGWZoneGroup)
1545
1546struct RGWPeriodMap
1547{
1548 string id;
1549 map<string, RGWZoneGroup> zonegroups;
1550 map<string, RGWZoneGroup> zonegroups_by_api;
1551 map<string, uint32_t> short_zone_ids;
1552
1553 string master_zonegroup;
1554
1555 void encode(bufferlist& bl) const;
1556 void decode(bufferlist::iterator& bl);
1557
1558 int update(const RGWZoneGroup& zonegroup, CephContext *cct);
1559
1560 void dump(Formatter *f) const;
1561 void decode_json(JSONObj *obj);
1562
1563 void reset() {
1564 zonegroups.clear();
1565 zonegroups_by_api.clear();
1566 master_zonegroup.clear();
1567 }
1568
1569 uint32_t get_zone_short_id(const string& zone_id) const;
1570};
1571WRITE_CLASS_ENCODER(RGWPeriodMap)
1572
1573struct RGWPeriodConfig
1574{
1575 RGWQuotaInfo bucket_quota;
1576 RGWQuotaInfo user_quota;
1577
1578 void encode(bufferlist& bl) const {
1579 ENCODE_START(1, 1, bl);
1580 ::encode(bucket_quota, bl);
1581 ::encode(user_quota, bl);
1582 ENCODE_FINISH(bl);
1583 }
1584
1585 void decode(bufferlist::iterator& bl) {
1586 DECODE_START(1, bl);
1587 ::decode(bucket_quota, bl);
1588 ::decode(user_quota, bl);
1589 DECODE_FINISH(bl);
1590 }
1591
1592 void dump(Formatter *f) const;
1593 void decode_json(JSONObj *obj);
1594
1595 // the period config must be stored in a local object outside of the period,
1596 // so that it can be used in a default configuration where no realm/period
1597 // exists
1598 int read(RGWRados *store, const std::string& realm_id);
1599 int write(RGWRados *store, const std::string& realm_id);
1600
1601 static std::string get_oid(const std::string& realm_id);
1602 static rgw_pool get_pool(CephContext *cct);
1603};
1604WRITE_CLASS_ENCODER(RGWPeriodConfig)
1605
1606/* for backward comaptability */
1607struct RGWRegionMap {
1608
1609 map<string, RGWZoneGroup> regions;
1610
1611 string master_region;
1612
1613 RGWQuotaInfo bucket_quota;
1614 RGWQuotaInfo user_quota;
1615
1616 void encode(bufferlist& bl) const;
1617 void decode(bufferlist::iterator& bl);
1618
1619 void dump(Formatter *f) const;
1620 void decode_json(JSONObj *obj);
1621};
1622WRITE_CLASS_ENCODER(RGWRegionMap)
1623
1624struct RGWZoneGroupMap {
1625
1626 map<string, RGWZoneGroup> zonegroups;
1627 map<string, RGWZoneGroup> zonegroups_by_api;
1628
1629 string master_zonegroup;
1630
1631 RGWQuotaInfo bucket_quota;
1632 RGWQuotaInfo user_quota;
1633
1634 /* constract the map */
1635 int read(CephContext *cct, RGWRados *store);
1636
1637 void encode(bufferlist& bl) const;
1638 void decode(bufferlist::iterator& bl);
1639
1640 void dump(Formatter *f) const;
1641 void decode_json(JSONObj *obj);
1642};
1643WRITE_CLASS_ENCODER(RGWZoneGroupMap)
1644
1645class RGWRealm;
1646
1647struct objexp_hint_entry {
1648 string tenant;
1649 string bucket_name;
1650 string bucket_id;
1651 rgw_obj_key obj_key;
1652 ceph::real_time exp_time;
1653
1654 void encode(bufferlist& bl) const {
1655 ENCODE_START(2, 1, bl);
1656 ::encode(bucket_name, bl);
1657 ::encode(bucket_id, bl);
1658 ::encode(obj_key, bl);
1659 ::encode(exp_time, bl);
1660 ::encode(tenant, bl);
1661 ENCODE_FINISH(bl);
1662 }
1663
1664 void decode(bufferlist::iterator& bl) {
1665 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
1666 DECODE_START(2, bl);
1667 ::decode(bucket_name, bl);
1668 ::decode(bucket_id, bl);
1669 ::decode(obj_key, bl);
1670 ::decode(exp_time, bl);
1671 if (struct_v >= 2) {
1672 ::decode(tenant, bl);
1673 } else {
1674 tenant.clear();
1675 }
1676 DECODE_FINISH(bl);
1677 }
1678};
1679WRITE_CLASS_ENCODER(objexp_hint_entry)
1680
1681class RGWPeriod;
1682
1683class RGWRealm : public RGWSystemMetaObj
1684{
1685 string current_period;
1686 epoch_t epoch{0}; //< realm epoch, incremented for each new period
1687
1688 int create_control(bool exclusive);
1689 int delete_control();
1690public:
1691 RGWRealm() {}
1692 RGWRealm(const string& _id, const string& _name = "") : RGWSystemMetaObj(_id, _name) {}
1693 RGWRealm(CephContext *_cct, RGWRados *_store): RGWSystemMetaObj(_cct, _store) {}
1694 RGWRealm(const string& _name, CephContext *_cct, RGWRados *_store): RGWSystemMetaObj(_name, _cct, _store){}
1695
1696 void encode(bufferlist& bl) const override {
1697 ENCODE_START(1, 1, bl);
1698 RGWSystemMetaObj::encode(bl);
1699 ::encode(current_period, bl);
1700 ::encode(epoch, bl);
1701 ENCODE_FINISH(bl);
1702 }
1703
1704 void decode(bufferlist::iterator& bl) override {
1705 DECODE_START(1, bl);
1706 RGWSystemMetaObj::decode(bl);
1707 ::decode(current_period, bl);
1708 ::decode(epoch, bl);
1709 DECODE_FINISH(bl);
1710 }
1711
1712 int create(bool exclusive = true) override;
1713 int delete_obj();
1714 rgw_pool get_pool(CephContext *cct);
1715 const string get_default_oid(bool old_format = false) override;
1716 const string& get_names_oid_prefix() override;
1717 const string& get_info_oid_prefix(bool old_format = false) override;
1718 const string& get_predefined_name(CephContext *cct) override;
1719
1720 using RGWSystemMetaObj::read_id; // expose as public for radosgw-admin
1721
1722 void dump(Formatter *f) const;
1723 void decode_json(JSONObj *obj);
1724
1725 const string& get_current_period() const {
1726 return current_period;
1727 }
1728 int set_current_period(RGWPeriod& period);
1729 void clear_current_period_and_epoch() {
1730 current_period.clear();
1731 epoch = 0;
1732 }
1733 epoch_t get_epoch() const { return epoch; }
1734
1735 string get_control_oid();
1736 /// send a notify on the realm control object
1737 int notify_zone(bufferlist& bl);
1738 /// notify the zone of a new period
1739 int notify_new_period(const RGWPeriod& period);
1740};
1741WRITE_CLASS_ENCODER(RGWRealm)
1742
1743struct RGWPeriodLatestEpochInfo {
1744 epoch_t epoch;
1745
1746 void encode(bufferlist& bl) const {
1747 ENCODE_START(1, 1, bl);
1748 ::encode(epoch, bl);
1749 ENCODE_FINISH(bl);
1750 }
1751
1752 void decode(bufferlist::iterator& bl) {
1753 DECODE_START(1, bl);
1754 ::decode(epoch, bl);
1755 DECODE_FINISH(bl);
1756 }
1757
1758 void dump(Formatter *f) const;
1759 void decode_json(JSONObj *obj);
1760};
1761WRITE_CLASS_ENCODER(RGWPeriodLatestEpochInfo)
1762
1763class RGWPeriod
1764{
1765 string id;
1766 epoch_t epoch;
1767 string predecessor_uuid;
1768 std::vector<std::string> sync_status;
1769 RGWPeriodMap period_map;
1770 RGWPeriodConfig period_config;
1771 string master_zonegroup;
1772 string master_zone;
1773
1774 string realm_id;
1775 string realm_name;
1776 epoch_t realm_epoch{1}; //< realm epoch when period was made current
1777
1778 CephContext *cct;
1779 RGWRados *store;
1780
1781 int read_info();
1782 int read_latest_epoch(RGWPeriodLatestEpochInfo& epoch_info);
1783 int use_latest_epoch();
1784 int use_current_period();
1785
1786 const string get_period_oid();
1787 const string get_period_oid_prefix();
1788
1789 // gather the metadata sync status for each shard; only for use on master zone
1790 int update_sync_status(const RGWPeriod &current_period,
1791 std::ostream& error_stream, bool force_if_stale);
1792
1793public:
1794 RGWPeriod() : epoch(0), cct(NULL), store(NULL) {}
1795
1796 RGWPeriod(const string& period_id, epoch_t _epoch = 0)
1797 : id(period_id), epoch(_epoch),
1798 cct(NULL), store(NULL) {}
1799
1800 const string& get_id() const { return id; }
1801 epoch_t get_epoch() const { return epoch; }
1802 epoch_t get_realm_epoch() const { return realm_epoch; }
1803 const string& get_predecessor() const { return predecessor_uuid; }
1804 const string& get_master_zone() const { return master_zone; }
1805 const string& get_master_zonegroup() const { return master_zonegroup; }
1806 const string& get_realm() const { return realm_id; }
1807 const RGWPeriodMap& get_map() const { return period_map; }
1808 RGWPeriodConfig& get_config() { return period_config; }
1809 const RGWPeriodConfig& get_config() const { return period_config; }
1810 const std::vector<std::string>& get_sync_status() const { return sync_status; }
1811 rgw_pool get_pool(CephContext *cct);
1812 const string& get_latest_epoch_oid();
1813 const string& get_info_oid_prefix();
1814
1815 void set_user_quota(RGWQuotaInfo& user_quota) {
1816 period_config.user_quota = user_quota;
1817 }
1818
1819 void set_bucket_quota(RGWQuotaInfo& bucket_quota) {
1820 period_config.bucket_quota = bucket_quota;
1821 }
1822
1823 void set_id(const string& id) {
1824 this->id = id;
1825 period_map.id = id;
1826 }
1827 void set_epoch(epoch_t epoch) { this->epoch = epoch; }
1828 void set_realm_epoch(epoch_t epoch) { realm_epoch = epoch; }
1829
1830 void set_predecessor(const string& predecessor)
1831 {
1832 predecessor_uuid = predecessor;
1833 }
1834
1835 void set_realm_id(const string& _realm_id) {
1836 realm_id = _realm_id;
1837 }
1838
1839 int reflect();
1840
1841 int get_zonegroup(RGWZoneGroup& zonegroup,
1842 const string& zonegroup_id);
1843
1844 bool is_single_zonegroup(CephContext *cct, RGWRados *store);
1845
1846 int get_latest_epoch(epoch_t& epoch);
1847 int set_latest_epoch(epoch_t epoch, bool exclusive = false);
1848
1849 int init(CephContext *_cct, RGWRados *_store, const string &period_realm_id, const string &period_realm_name = "",
1850 bool setup_obj = true);
1851 int init(CephContext *_cct, RGWRados *_store, bool setup_obj = true);
1852 int use_next_epoch();
1853
1854 int create(bool exclusive = true);
1855 int delete_obj();
1856 int store_info(bool exclusive);
1857 int add_zonegroup(const RGWZoneGroup& zonegroup);
1858
1859 void fork();
1860 int update();
1861
1862 // commit a staging period; only for use on master zone
1863 int commit(RGWRealm& realm, const RGWPeriod &current_period,
1864 std::ostream& error_stream, bool force_if_stale = false);
1865
1866 void encode(bufferlist& bl) const {
1867 ENCODE_START(1, 1, bl);
1868 ::encode(id, bl);
1869 ::encode(epoch, bl);
1870 ::encode(realm_epoch, bl);
1871 ::encode(predecessor_uuid, bl);
1872 ::encode(sync_status, bl);
1873 ::encode(period_map, bl);
1874 ::encode(master_zone, bl);
1875 ::encode(master_zonegroup, bl);
1876 ::encode(period_config, bl);
1877 ::encode(realm_id, bl);
1878 ::encode(realm_name, bl);
1879 ENCODE_FINISH(bl);
1880 }
1881
1882 void decode(bufferlist::iterator& bl) {
1883 DECODE_START(1, bl);
1884 ::decode(id, bl);
1885 ::decode(epoch, bl);
1886 ::decode(realm_epoch, bl);
1887 ::decode(predecessor_uuid, bl);
1888 ::decode(sync_status, bl);
1889 ::decode(period_map, bl);
1890 ::decode(master_zone, bl);
1891 ::decode(master_zonegroup, bl);
1892 ::decode(period_config, bl);
1893 ::decode(realm_id, bl);
1894 ::decode(realm_name, bl);
1895 DECODE_FINISH(bl);
1896 }
1897 void dump(Formatter *f) const;
1898 void decode_json(JSONObj *obj);
1899
1900 static string get_staging_id(const string& realm_id) {
1901 return realm_id + ":staging";
1902 }
1903};
1904WRITE_CLASS_ENCODER(RGWPeriod)
1905
1906class RGWDataChangesLog;
1907class RGWMetaSyncStatusManager;
1908class RGWDataSyncStatusManager;
1909class RGWReplicaLogger;
1910class RGWCoroutinesManagerRegistry;
1911
1912class RGWStateLog {
1913 RGWRados *store;
1914 int num_shards;
1915 string module_name;
1916
1917 void oid_str(int shard, string& oid);
1918 int get_shard_num(const string& object);
1919 string get_oid(const string& object);
1920 int open_ioctx(librados::IoCtx& ioctx);
1921
1922 struct list_state {
1923 int cur_shard;
1924 int max_shard;
1925 string marker;
1926 string client_id;
1927 string op_id;
1928 string object;
1929
1930 list_state() : cur_shard(0), max_shard(0) {}
1931 };
1932
1933protected:
1934 virtual bool dump_entry_internal(const cls_statelog_entry& entry, Formatter *f) {
1935 return false;
1936 }
1937
1938public:
1939 RGWStateLog(RGWRados *_store, int _num_shards, const string& _module_name) :
1940 store(_store), num_shards(_num_shards), module_name(_module_name) {}
1941 virtual ~RGWStateLog() {}
1942
1943 int store_entry(const string& client_id, const string& op_id, const string& object,
1944 uint32_t state, bufferlist *bl, uint32_t *check_state);
1945
1946 int remove_entry(const string& client_id, const string& op_id, const string& object);
1947
1948 void init_list_entries(const string& client_id, const string& op_id, const string& object,
1949 void **handle);
1950
1951 int list_entries(void *handle, int max_entries, list<cls_statelog_entry>& entries, bool *done);
1952
1953 void finish_list_entries(void *handle);
1954
1955 virtual void dump_entry(const cls_statelog_entry& entry, Formatter *f);
1956};
1957
1958/*
1959 * state transitions:
1960 *
1961 * unknown -> in-progress -> complete
1962 * -> error
1963 *
1964 * user can try setting the 'abort' state, and it can only succeed if state is
1965 * in-progress.
1966 *
1967 * state renewal cannot switch state (stays in the same state)
1968 *
1969 * rgw can switch from in-progress to complete
1970 * rgw can switch from in-progress to error
1971 *
1972 * rgw can switch from abort to cancelled
1973 *
1974 */
1975
1976class RGWOpState : public RGWStateLog {
1977protected:
1978 bool dump_entry_internal(const cls_statelog_entry& entry, Formatter *f) override;
1979public:
1980
1981 enum OpState {
1982 OPSTATE_UNKNOWN = 0,
1983 OPSTATE_IN_PROGRESS = 1,
1984 OPSTATE_COMPLETE = 2,
1985 OPSTATE_ERROR = 3,
1986 OPSTATE_ABORT = 4,
1987 OPSTATE_CANCELLED = 5,
1988 };
1989
1990 explicit RGWOpState(RGWRados *_store);
1991
1992 int state_from_str(const string& s, OpState *state);
1993 int set_state(const string& client_id, const string& op_id, const string& object, OpState state);
1994 int renew_state(const string& client_id, const string& op_id, const string& object, OpState state);
1995};
1996
1997class RGWOpStateSingleOp
1998{
1999 RGWOpState os;
2000 string client_id;
2001 string op_id;
2002 string object;
2003
2004 CephContext *cct;
2005
2006 RGWOpState::OpState cur_state;
2007 ceph::real_time last_update;
2008
2009public:
2010 RGWOpStateSingleOp(RGWRados *store, const string& cid, const string& oid, const string& obj);
2011
2012 int set_state(RGWOpState::OpState state);
2013 int renew_state();
2014};
2015
2016class RGWGetBucketStats_CB : public RefCountedObject {
2017protected:
2018 rgw_bucket bucket;
2019 map<RGWObjCategory, RGWStorageStats> *stats;
2020public:
2021 explicit RGWGetBucketStats_CB(rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
2022 ~RGWGetBucketStats_CB() override {}
2023 virtual void handle_response(int r) = 0;
2024 virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
2025 stats = _stats;
2026 }
2027};
2028
2029class RGWGetUserStats_CB : public RefCountedObject {
2030protected:
2031 rgw_user user;
2032 RGWStorageStats stats;
2033public:
2034 explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
2035 ~RGWGetUserStats_CB() override {}
2036 virtual void handle_response(int r) = 0;
2037 virtual void set_response(RGWStorageStats& _stats) {
2038 stats = _stats;
2039 }
2040};
2041
2042class RGWGetDirHeader_CB;
2043class RGWGetUserHeader_CB;
2044
2045struct rgw_rados_ref {
2046 rgw_pool pool;
2047 string oid;
2048 string key;
2049 librados::IoCtx ioctx;
2050};
2051
2052class RGWChainedCache {
2053public:
2054 virtual ~RGWChainedCache() {}
2055 virtual void chain_cb(const string& key, void *data) = 0;
2056 virtual void invalidate(const string& key) = 0;
2057 virtual void invalidate_all() = 0;
2058
2059 struct Entry {
2060 RGWChainedCache *cache;
2061 const string& key;
2062 void *data;
2063
2064 Entry(RGWChainedCache *_c, const string& _k, void *_d) : cache(_c), key(_k), data(_d) {}
2065 };
2066};
2067
2068template <class T, class S>
2069class RGWObjectCtxImpl {
2070 RGWRados *store;
2071 std::map<T, S> objs_state;
2072 RWLock lock;
2073
2074public:
2075 RGWObjectCtxImpl(RGWRados *_store) : store(_store), lock("RGWObjectCtxImpl") {}
2076
2077 S *get_state(const T& obj) {
2078 S *result;
2079 typename std::map<T, S>::iterator iter;
2080 lock.get_read();
2081 assert (!obj.empty());
2082 iter = objs_state.find(obj);
2083 if (iter != objs_state.end()) {
2084 result = &iter->second;
2085 lock.unlock();
2086 } else {
2087 lock.unlock();
2088 lock.get_write();
2089 result = &objs_state[obj];
2090 lock.unlock();
2091 }
2092 return result;
2093 }
2094
2095 void set_atomic(T& obj) {
2096 RWLock::WLocker wl(lock);
2097 assert (!obj.empty());
2098 objs_state[obj].is_atomic = true;
2099 }
2100 void set_prefetch_data(T& obj) {
2101 RWLock::WLocker wl(lock);
2102 assert (!obj.empty());
2103 objs_state[obj].prefetch_data = true;
2104 }
2105 void invalidate(T& obj) {
2106 RWLock::WLocker wl(lock);
2107 auto iter = objs_state.find(obj);
2108 if (iter == objs_state.end()) {
2109 return;
2110 }
2111 bool is_atomic = iter->second.is_atomic;
2112 bool prefetch_data = iter->second.prefetch_data;
2113
2114 objs_state.erase(iter);
2115
2116 if (is_atomic || prefetch_data) {
2117 auto& s = objs_state[obj];
2118 s.is_atomic = is_atomic;
2119 s.prefetch_data = prefetch_data;
2120 }
2121 }
2122};
2123
2124template<>
2125void RGWObjectCtxImpl<rgw_obj, RGWObjState>::invalidate(rgw_obj& obj);
2126
2127template<>
2128void RGWObjectCtxImpl<rgw_raw_obj, RGWRawObjState>::invalidate(rgw_raw_obj& obj);
2129
2130struct RGWObjectCtx {
2131 RGWRados *store;
2132 void *user_ctx;
2133
2134 RGWObjectCtxImpl<rgw_obj, RGWObjState> obj;
2135 RGWObjectCtxImpl<rgw_raw_obj, RGWRawObjState> raw;
2136
2137 explicit RGWObjectCtx(RGWRados *_store) : store(_store), user_ctx(NULL), obj(store), raw(store) { }
2138 RGWObjectCtx(RGWRados *_store, void *_user_ctx) : store(_store), user_ctx(_user_ctx), obj(store), raw(store) { }
2139};
2140
2141class Finisher;
2142class RGWAsyncRadosProcessor;
2143
2144template <class T>
2145class RGWChainedCacheImpl;
2146
2147struct bucket_info_entry {
2148 RGWBucketInfo info;
2149 real_time mtime;
2150 map<string, bufferlist> attrs;
2151};
2152
2153struct tombstone_entry {
2154 ceph::real_time mtime;
2155 uint32_t zone_short_id;
2156 uint64_t pg_ver;
2157
2158 tombstone_entry() = default;
2159 tombstone_entry(const RGWObjState& state)
2160 : mtime(state.mtime), zone_short_id(state.zone_short_id),
2161 pg_ver(state.pg_ver) {}
2162};
2163
2164class RGWRados
2165{
2166 friend class RGWGC;
2167 friend class RGWMetaNotifier;
2168 friend class RGWDataNotifier;
2169 friend class RGWLC;
2170 friend class RGWObjectExpirer;
2171 friend class RGWMetaSyncProcessorThread;
2172 friend class RGWDataSyncProcessorThread;
2173 friend class RGWStateLog;
2174 friend class RGWReplicaLogger;
2175
2176 /** Open the pool used as root for this gateway */
2177 int open_root_pool_ctx();
2178 int open_gc_pool_ctx();
2179 int open_lc_pool_ctx();
2180 int open_objexp_pool_ctx();
2181
2182 int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx);
2183 int open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx);
2184 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid);
2185 int open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2186 string& bucket_oid_base);
2187 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2188 const string& obj_key, string *bucket_obj, int *shard_id);
2189 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2190 int shard_id, string *bucket_obj);
2191 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2192 map<int, string>& bucket_objs, int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
2193 template<typename T>
2194 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
2195 map<int, string>& oids, map<int, T>& bucket_objs,
2196 int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
2197 void build_bucket_index_marker(const string& shard_id_str, const string& shard_marker,
2198 string *marker);
2199
2200 void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result);
2201
2202 std::atomic<int64_t> max_req_id = { 0 };
2203 Mutex lock;
2204 Mutex watchers_lock;
2205 SafeTimer *timer;
2206
2207 RGWGC *gc;
2208 RGWLC *lc;
2209 RGWObjectExpirer *obj_expirer;
2210 bool use_gc_thread;
2211 bool use_lc_thread;
2212 bool quota_threads;
2213 bool run_sync_thread;
2214
2215 RGWAsyncRadosProcessor* async_rados;
2216
2217 RGWMetaNotifier *meta_notifier;
2218 RGWDataNotifier *data_notifier;
2219 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
2220 map<string, RGWDataSyncProcessorThread *> data_sync_processor_threads;
2221
2222 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
2223
2224 Mutex meta_sync_thread_lock;
2225 Mutex data_sync_thread_lock;
2226
2227 int num_watchers;
2228 RGWWatcher **watchers;
2229 std::set<int> watchers_set;
2230 librados::IoCtx root_pool_ctx; // .rgw
2231 librados::IoCtx control_pool_ctx; // .rgw.control
2232 bool watch_initialized;
2233
2234 friend class RGWWatcher;
2235
2236 Mutex bucket_id_lock;
2237
2238 // This field represents the number of bucket index object shards
2239 uint32_t bucket_index_max_shards;
2240
2241 int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
2242 int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
2243 int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref, rgw_pool *pool = NULL);
2244 uint64_t max_bucket_id;
2245
2246 int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2247 RGWObjState *olh_state, RGWObjState **target_state);
2248 int get_system_obj_state_impl(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker);
2249 int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
2250 bool follow_olh, bool assume_noent = false);
2251 int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2252 librados::ObjectOperation& op, RGWObjState **state);
2253
2254 int update_placement_map();
2255 int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
2256
2257 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
2258 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
2259 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
2260protected:
2261 CephContext *cct;
2262
2263 std::vector<librados::Rados> rados;
2264 uint32_t next_rados_handle;
2265 RWLock handle_lock;
2266 std::map<pthread_t, int> rados_map;
2267
2268 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
2269 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
2270
2271 using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
2272 tombstone_cache_t *obj_tombstone_cache;
2273
2274 librados::IoCtx gc_pool_ctx; // .rgw.gc
2275 librados::IoCtx lc_pool_ctx; // .rgw.lc
2276 librados::IoCtx objexp_pool_ctx;
2277
2278 bool pools_initialized;
2279
2280 string zonegroup_id;
2281 string zone_name;
2282 string trans_id_suffix;
2283
2284 RGWQuotaHandler *quota_handler;
2285
2286 Finisher *finisher;
2287
2288 RGWCoroutinesManagerRegistry *cr_registry;
2289
2290 RGWSyncModulesManager *sync_modules_manager{nullptr};
2291 RGWSyncModuleInstanceRef sync_module;
2292 bool writeable_zone{false};
2293
2294 RGWZoneGroup zonegroup;
2295 RGWZone zone_public_config; /* external zone params, e.g., entrypoints, log flags, etc. */
2296 RGWZoneParams zone_params; /* internal zone params, e.g., rados pools */
2297 uint32_t zone_short_id;
2298
2299 RGWPeriod current_period;
2300public:
2301 RGWRados() : lock("rados_timer_lock"), watchers_lock("watchers_lock"), timer(NULL),
2302 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
2303 run_sync_thread(false), async_rados(nullptr), meta_notifier(NULL),
2304 data_notifier(NULL), meta_sync_processor_thread(NULL),
2305 meta_sync_thread_lock("meta_sync_thread_lock"), data_sync_thread_lock("data_sync_thread_lock"),
2306 num_watchers(0), watchers(NULL),
2307 watch_initialized(false),
2308 bucket_id_lock("rados_bucket_id"),
2309 bucket_index_max_shards(0),
2310 max_bucket_id(0), cct(NULL),
2311 next_rados_handle(0),
2312 handle_lock("rados_handle_lock"),
2313 binfo_cache(NULL), obj_tombstone_cache(nullptr),
2314 pools_initialized(false),
2315 quota_handler(NULL),
2316 finisher(NULL),
2317 cr_registry(NULL),
2318 zone_short_id(0),
2319 rest_master_conn(NULL),
2320 meta_mgr(NULL), data_log(NULL) {}
2321
2322 uint64_t get_new_req_id() {
2323 return ++max_req_id;
2324 }
2325
2326 librados::IoCtx* get_lc_pool_ctx() {
2327 return &lc_pool_ctx;
2328 }
2329 void set_context(CephContext *_cct) {
2330 cct = _cct;
2331 }
2332
2333 /**
2334 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
2335 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
2336 */
2337 void init_host_id() {
2338 /* uint64_t needs 16, two '-' separators and a trailing null */
2339 const string& zone_name = get_zone().name;
2340 const string& zonegroup_name = zonegroup.get_name();
2341 char charbuf[16 + zone_name.size() + zonegroup_name.size() + 2 + 1];
2342 snprintf(charbuf, sizeof(charbuf), "%llx-%s-%s", (unsigned long long)instance_id(), zone_name.c_str(), zonegroup_name.c_str());
2343 string s(charbuf);
2344 host_id = s;
2345 }
2346
2347 string host_id;
2348
2349 RGWRealm realm;
2350
2351 RGWRESTConn *rest_master_conn;
2352 map<string, RGWRESTConn *> zone_conn_map;
2353 map<string, RGWRESTConn *> zone_data_sync_from_map;
2354 map<string, RGWRESTConn *> zone_data_notify_to_map;
2355 map<string, RGWRESTConn *> zonegroup_conn_map;
2356
2357 map<string, string> zone_id_by_name;
2358 map<string, RGWZone> zone_by_id;
2359
2360 RGWRESTConn *get_zone_conn_by_id(const string& id) {
2361 auto citer = zone_conn_map.find(id);
2362 if (citer == zone_conn_map.end()) {
2363 return NULL;
2364 }
2365
2366 return citer->second;
2367 }
2368
2369 RGWRESTConn *get_zone_conn_by_name(const string& name) {
2370 auto i = zone_id_by_name.find(name);
2371 if (i == zone_id_by_name.end()) {
2372 return NULL;
2373 }
2374
2375 return get_zone_conn_by_id(i->second);
2376 }
2377
2378 bool find_zone_id_by_name(const string& name, string *id) {
2379 auto i = zone_id_by_name.find(name);
2380 if (i == zone_id_by_name.end()) {
2381 return false;
2382 }
2383 *id = i->second;
2384 return true;
2385 }
2386
2387 int get_zonegroup(const string& id, RGWZoneGroup& zonegroup) {
2388 int ret = 0;
2389 if (id == get_zonegroup().get_id()) {
2390 zonegroup = get_zonegroup();
2391 } else if (!current_period.get_id().empty()) {
2392 ret = current_period.get_zonegroup(zonegroup, id);
2393 }
2394 return ret;
2395 }
2396
2397 RGWRealm& get_realm() {
2398 return realm;
2399 }
2400
2401 RGWZoneParams& get_zone_params() { return zone_params; }
2402 RGWZoneGroup& get_zonegroup() {
2403 return zonegroup;
2404 }
2405 RGWZone& get_zone() {
2406 return zone_public_config;
2407 }
2408
2409 bool zone_is_writeable() {
2410 return writeable_zone && !get_zone().is_read_only();
2411 }
2412
2413 uint32_t get_zone_short_id() const {
2414 return zone_short_id;
2415 }
2416
2417 bool zone_syncs_from(RGWZone& target_zone, RGWZone& source_zone);
2418
2419 const RGWQuotaInfo& get_bucket_quota() {
2420 return current_period.get_config().bucket_quota;
2421 }
2422
2423 const RGWQuotaInfo& get_user_quota() {
2424 return current_period.get_config().user_quota;
2425 }
2426
2427 const string& get_current_period_id() {
2428 return current_period.get_id();
2429 }
2430 // pulls missing periods for period_history
2431 std::unique_ptr<RGWPeriodPuller> period_puller;
2432 // maintains a connected history of periods
2433 std::unique_ptr<RGWPeriodHistory> period_history;
2434
2435 RGWAsyncRadosProcessor* get_async_rados() const { return async_rados; };
2436
2437 RGWMetadataManager *meta_mgr;
2438
2439 RGWDataChangesLog *data_log;
2440
2441 virtual ~RGWRados() = default;
2442
2443 tombstone_cache_t *get_tombstone_cache() {
2444 return obj_tombstone_cache;
2445 }
2446
2447 RGWSyncModulesManager *get_sync_modules_manager() {
2448 return sync_modules_manager;
2449 }
2450 const RGWSyncModuleInstanceRef& get_sync_module() {
2451 return sync_module;
2452 }
2453
2454 int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
2455 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size);
2456 int get_max_chunk_size(const string& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size);
2457
2458 uint32_t get_max_bucket_shards() {
2459 return MAX_BUCKET_INDEX_SHARDS_PRIME;
2460 }
2461
2462 int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref, rgw_pool *pool = NULL);
2463
2464 int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
2465 RGWListRawObjsCtx& ctx, list<string>& oids,
2466 bool *is_truncated);
2467
2468 int list_raw_prefixed_objs(const rgw_pool& pool, const string& prefix, list<string>& result);
2469 int list_zonegroups(list<string>& zonegroups);
2470 int list_regions(list<string>& regions);
2471 int list_zones(list<string>& zones);
2472 int list_realms(list<string>& realms);
2473 int list_periods(list<string>& periods);
2474 int list_periods(const string& current_period, list<string>& periods);
2475 void tick();
2476
2477 CephContext *ctx() { return cct; }
2478 /** do all necessary setup of the storage device */
2479 int initialize(CephContext *_cct, bool _use_gc_thread, bool _use_lc_thread, bool _quota_threads, bool _run_sync_thread) {
2480 set_context(_cct);
2481 use_gc_thread = _use_gc_thread;
2482 use_lc_thread = _use_lc_thread;
2483 quota_threads = _quota_threads;
2484 run_sync_thread = _run_sync_thread;
2485 return initialize();
2486 }
2487 /** Initialize the RADOS instance and prepare to do other ops */
2488 virtual int init_rados();
2489 int init_zg_from_period(bool *initialized);
2490 int init_zg_from_local(bool *creating_defaults);
2491 int init_complete();
2492 int replace_region_with_zonegroup();
2493 int convert_regionmap();
2494 int initialize();
2495 void finalize();
2496
2497 void schedule_context(Context *c);
2498
2499 /** set up a bucket listing. handle is filled in. */
2500 int list_buckets_init(RGWAccessHandle *handle);
2501 /**
2502 * get the next bucket in the listing. obj is filled in,
2503 * handle is updated.
2504 */
2505 int list_buckets_next(rgw_bucket_dir_entry& obj, RGWAccessHandle *handle);
2506
2507 /// list logs
2508 int log_list_init(const string& prefix, RGWAccessHandle *handle);
2509 int log_list_next(RGWAccessHandle handle, string *name);
2510
2511 /// remove log
2512 int log_remove(const string& name);
2513
2514 /// show log
2515 int log_show_init(const string& name, RGWAccessHandle *handle);
2516 int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
2517
2518 // log bandwidth info
2519 int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
2520 int read_usage(const rgw_user& user, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
2521 bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage);
2522 int trim_usage(rgw_user& user, uint64_t start_epoch, uint64_t end_epoch);
2523
2524 int create_pool(const rgw_pool& pool);
2525
2526 /**
2527 * create a bucket with name bucket and the given list of attrs
2528 * returns 0 on success, -ERR# otherwise.
2529 */
2530 int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
2531 int select_bucket_placement(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
2532 string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
2533 int select_legacy_bucket_placement(RGWZonePlacementInfo *rule_info);
2534 int select_new_bucket_location(RGWUserInfo& user_info, const string& zonegroup_id, const string& rule,
2535 string *pselected_rule_name, RGWZonePlacementInfo *rule_info);
2536 int select_bucket_location_by_rule(const string& location_rule, RGWZonePlacementInfo *rule_info);
2537 void create_bucket_id(string *bucket_id);
2538
2539 bool get_obj_data_pool(const string& placement_rule, const rgw_obj& obj, rgw_pool *pool);
2540 bool obj_to_raw(const string& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
2541
2542 int create_bucket(RGWUserInfo& owner, rgw_bucket& bucket,
2543 const string& zonegroup_id,
2544 const string& placement_rule,
2545 const string& swift_ver_location,
2546 const RGWQuotaInfo * pquota_info,
2547 map<std::string,bufferlist>& attrs,
2548 RGWBucketInfo& bucket_info,
2549 obj_version *pobjv,
2550 obj_version *pep_objv,
2551 ceph::real_time creation_time,
2552 rgw_bucket *master_bucket,
2553 uint32_t *master_num_shards,
2554 bool exclusive = true);
2555 int add_bucket_placement(const rgw_pool& new_pool);
2556 int remove_bucket_placement(const rgw_pool& new_pool);
2557 int list_placement_set(set<rgw_pool>& names);
2558 int create_pools(vector<rgw_pool>& pools, vector<int>& retcodes);
2559
2560 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
2561
2562 class SystemObject {
2563 RGWRados *store;
2564 RGWObjectCtx& ctx;
2565 rgw_raw_obj obj;
2566
2567 RGWObjState *state;
2568
2569 protected:
2570 int get_state(RGWRawObjState **pstate, RGWObjVersionTracker *objv_tracker);
2571
2572 public:
2573 SystemObject(RGWRados *_store, RGWObjectCtx& _ctx, rgw_raw_obj& _obj) : store(_store), ctx(_ctx), obj(_obj), state(NULL) {}
2574
2575 void invalidate_state();
2576
2577 RGWRados *get_store() { return store; }
2578 rgw_raw_obj& get_obj() { return obj; }
2579 RGWObjectCtx& get_ctx() { return ctx; }
2580
2581 struct Read {
2582 RGWRados::SystemObject *source;
2583
2584 struct GetObjState {
2585 rgw_rados_ref ref;
2586 bool has_ref{false};
2587 uint64_t last_ver{0};
2588
2589 GetObjState() {}
2590
2591 int get_ref(RGWRados *store, rgw_raw_obj& obj, rgw_rados_ref **pref);
2592 } state;
2593
2594 struct StatParams {
2595 ceph::real_time *lastmod;
2596 uint64_t *obj_size;
2597 map<string, bufferlist> *attrs;
2598 struct rgw_err *perr;
2599
2600 StatParams() : lastmod(NULL), obj_size(NULL), attrs(NULL), perr(NULL) {}
2601 } stat_params;
2602
2603 struct ReadParams {
2604 rgw_cache_entry_info *cache_info;
2605 map<string, bufferlist> *attrs;
2606
2607 ReadParams() : attrs(NULL) {}
2608 } read_params;
2609
2610 explicit Read(RGWRados::SystemObject *_source) : source(_source) {}
2611
2612 int stat(RGWObjVersionTracker *objv_tracker);
2613 int read(int64_t ofs, int64_t end, bufferlist& bl, RGWObjVersionTracker *objv_tracker);
2614 int get_attr(const char *name, bufferlist& dest);
2615 };
2616 };
2617
2618 struct BucketShard {
2619 RGWRados *store;
2620 rgw_bucket bucket;
2621 int shard_id;
2622 librados::IoCtx index_ctx;
2623 string bucket_obj;
2624
2625 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
2626 int init(const rgw_bucket& _bucket, const rgw_obj& obj);
2627 int init(const rgw_bucket& _bucket, int sid);
2628 };
2629
2630 class Object {
2631 RGWRados *store;
2632 RGWBucketInfo bucket_info;
2633 RGWObjectCtx& ctx;
2634 rgw_obj obj;
2635
2636 BucketShard bs;
2637
2638 RGWObjState *state;
2639
2640 bool versioning_disabled;
2641
2642 bool bs_initialized;
2643
2644 protected:
2645 int get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent = false);
2646 void invalidate_state();
2647
2648 int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
2649 const char *ifmatch, const char *ifnomatch, bool removal_op);
2650 int complete_atomic_modification();
2651
2652 public:
2653 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
2654 ctx(_ctx), obj(_obj), bs(store),
2655 state(NULL), versioning_disabled(false),
2656 bs_initialized(false) {}
2657
2658 RGWRados *get_store() { return store; }
2659 rgw_obj& get_obj() { return obj; }
2660 RGWObjectCtx& get_ctx() { return ctx; }
2661 RGWBucketInfo& get_bucket_info() { return bucket_info; }
2662 int get_manifest(RGWObjManifest **pmanifest);
2663
2664 int get_bucket_shard(BucketShard **pbs) {
2665 if (!bs_initialized) {
2666 int r = bs.init(bucket_info.bucket, obj);
2667 if (r < 0) {
2668 return r;
2669 }
2670 bs_initialized = true;
2671 }
2672 *pbs = &bs;
2673 return 0;
2674 }
2675
2676 void set_versioning_disabled(bool status) {
2677 versioning_disabled = status;
2678 }
2679
2680 bool versioning_enabled() {
2681 return (!versioning_disabled && bucket_info.versioning_enabled());
2682 }
2683
2684 struct Read {
2685 RGWRados::Object *source;
2686
2687 struct GetObjState {
2688 librados::IoCtx io_ctx;
2689 rgw_obj obj;
2690 rgw_raw_obj head_obj;
2691 } state;
2692
2693 struct ConditionParams {
2694 const ceph::real_time *mod_ptr;
2695 const ceph::real_time *unmod_ptr;
2696 bool high_precision_time;
2697 uint32_t mod_zone_id;
2698 uint64_t mod_pg_ver;
2699 const char *if_match;
2700 const char *if_nomatch;
2701
2702 ConditionParams() :
2703 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
2704 if_match(NULL), if_nomatch(NULL) {}
2705 } conds;
2706
2707 struct Params {
2708 ceph::real_time *lastmod;
2709 uint64_t *obj_size;
2710 map<string, bufferlist> *attrs;
2711 struct rgw_err *perr;
2712
2713 Params() : lastmod(NULL), obj_size(NULL), attrs(NULL), perr(NULL) {}
2714 } params;
2715
2716 explicit Read(RGWRados::Object *_source) : source(_source) {}
2717
2718 int prepare();
2719 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
2720 int read(int64_t ofs, int64_t end, bufferlist& bl);
2721 int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb);
2722 int get_attr(const char *name, bufferlist& dest);
2723 };
2724
2725 struct Write {
2726 RGWRados::Object *target;
2727
2728 struct MetaParams {
2729 ceph::real_time *mtime;
2730 map<std::string, bufferlist>* rmattrs;
2731 const bufferlist *data;
2732 RGWObjManifest *manifest;
2733 const string *ptag;
2734 list<rgw_obj_index_key> *remove_objs;
2735 ceph::real_time set_mtime;
2736 rgw_user owner;
2737 RGWObjCategory category;
2738 int flags;
2739 const char *if_match;
2740 const char *if_nomatch;
2741 uint64_t olh_epoch;
2742 ceph::real_time delete_at;
2743 bool canceled;
2744 const string *user_data;
2745
2746 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
2747 remove_objs(NULL), category(RGW_OBJ_CATEGORY_MAIN), flags(0),
2748 if_match(NULL), if_nomatch(NULL), olh_epoch(0), canceled(false), user_data(nullptr) {}
2749 } meta;
2750
2751 explicit Write(RGWRados::Object *_target) : target(_target) {}
2752
2753 int _do_write_meta(uint64_t size, uint64_t accounted_size,
2754 map<std::string, bufferlist>& attrs,
2755 bool assume_noent,
2756 void *index_op);
2757 int write_meta(uint64_t size, uint64_t accounted_size,
2758 map<std::string, bufferlist>& attrs);
2759 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
2760 };
2761
2762 struct Delete {
2763 RGWRados::Object *target;
2764
2765 struct DeleteParams {
2766 rgw_user bucket_owner;
2767 int versioning_status;
2768 ACLOwner obj_owner; /* needed for creation of deletion marker */
2769 uint64_t olh_epoch;
2770 string marker_version_id;
2771 uint32_t bilog_flags;
2772 list<rgw_obj_index_key> *remove_objs;
2773 ceph::real_time expiration_time;
2774 ceph::real_time unmod_since;
2775 ceph::real_time mtime; /* for setting delete marker mtime */
2776 bool high_precision_time;
2777
2778 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false) {}
2779 } params;
2780
2781 struct DeleteResult {
2782 bool delete_marker;
2783 string version_id;
2784
2785 DeleteResult() : delete_marker(false) {}
2786 } result;
2787
2788 explicit Delete(RGWRados::Object *_target) : target(_target) {}
2789
2790 int delete_obj();
2791 };
2792
2793 struct Stat {
2794 RGWRados::Object *source;
2795
2796 struct Result {
2797 rgw_obj obj;
2798 RGWObjManifest manifest;
2799 bool has_manifest;
2800 uint64_t size;
2801 struct timespec mtime;
2802 map<string, bufferlist> attrs;
2803
2804 Result() : has_manifest(false), size(0) {}
2805 } result;
2806
2807 struct State {
2808 librados::IoCtx io_ctx;
2809 librados::AioCompletion *completion;
2810 int ret;
2811
2812 State() : completion(NULL), ret(0) {}
2813 } state;
2814
2815
2816 explicit Stat(RGWRados::Object *_source) : source(_source) {}
2817
2818 int stat_async();
2819 int wait();
2820 int stat();
2821 private:
2822 int finish();
2823 };
2824 };
2825
2826 class Bucket {
2827 RGWRados *store;
2828 RGWBucketInfo bucket_info;
2829 rgw_bucket& bucket;
2830 int shard_id;
2831
2832 public:
2833 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
2834 shard_id(RGW_NO_SHARD) {}
2835 RGWRados *get_store() { return store; }
2836 rgw_bucket& get_bucket() { return bucket; }
2837 RGWBucketInfo& get_bucket_info() { return bucket_info; }
2838
2839 int get_shard_id() { return shard_id; }
2840 void set_shard_id(int id) {
2841 shard_id = id;
2842 }
2843
2844 class UpdateIndex {
2845 RGWRados::Bucket *target;
2846 string optag;
2847 rgw_obj obj;
2848 uint16_t bilog_flags{0};
2849 BucketShard bs;
2850 bool bs_initialized{false};
2851 bool blind;
2852 bool prepared{false};
2853 public:
2854
2855 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
2856 bs(target->get_store()) {
2857 blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
2858 }
2859
2860 int get_bucket_shard(BucketShard **pbs) {
2861 if (!bs_initialized) {
2862 int r = bs.init(target->get_bucket(), obj);
2863 if (r < 0) {
2864 return r;
2865 }
2866 bs_initialized = true;
2867 }
2868 *pbs = &bs;
2869 return 0;
2870 }
2871
2872 void set_bilog_flags(uint16_t flags) {
2873 bilog_flags = flags;
2874 }
2875
2876 int prepare(RGWModifyOp, const string *write_tag);
2877 int complete(int64_t poolid, uint64_t epoch, uint64_t size,
2878 uint64_t accounted_size, ceph::real_time& ut,
2879 const string& etag, const string& content_type,
2880 bufferlist *acl_bl, RGWObjCategory category,
2881 list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr);
2882 int complete_del(int64_t poolid, uint64_t epoch,
2883 ceph::real_time& removed_mtime, /* mtime of removed object */
2884 list<rgw_obj_index_key> *remove_objs);
2885 int cancel();
2886
2887 const string *get_optag() { return &optag; }
2888
2889 bool is_prepared() { return prepared; }
2890 };
2891
2892 struct List {
2893 RGWRados::Bucket *target;
2894 rgw_obj_key next_marker;
2895
2896 struct Params {
2897 string prefix;
2898 string delim;
2899 rgw_obj_key marker;
2900 rgw_obj_key end_marker;
2901 string ns;
2902 bool enforce_ns;
2903 RGWAccessListFilter *filter;
2904 bool list_versions;
2905
2906 Params() : enforce_ns(true), filter(NULL), list_versions(false) {}
2907 } params;
2908
2909 public:
2910 explicit List(RGWRados::Bucket *_target) : target(_target) {}
2911
2912 int list_objects(int max, vector<rgw_bucket_dir_entry> *result, map<string, bool> *common_prefixes, bool *is_truncated);
2913 rgw_obj_key& get_next_marker() {
2914 return next_marker;
2915 }
2916 };
2917 };
2918
2919 /** Write/overwrite an object to the bucket storage. */
2920 virtual int put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, ceph::real_time *mtime,
2921 map<std::string, bufferlist>& attrs, int flags,
2922 bufferlist& data,
2923 RGWObjVersionTracker *objv_tracker,
2924 ceph::real_time set_mtime /* 0 for don't set */);
2925
2926 virtual int put_system_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
2927 off_t ofs, bool exclusive,
2928 RGWObjVersionTracker *objv_tracker = nullptr);
2929 int aio_put_obj_data(void *ctx, rgw_raw_obj& obj, bufferlist& bl,
2930 off_t ofs, bool exclusive, void **handle);
2931
2932 int put_system_obj(void *ctx, rgw_raw_obj& obj, const char *data, size_t len, bool exclusive,
2933 ceph::real_time *mtime, map<std::string, bufferlist>& attrs, RGWObjVersionTracker *objv_tracker,
2934 ceph::real_time set_mtime) {
2935 bufferlist bl;
2936 bl.append(data, len);
2937 int flags = PUT_OBJ_CREATE;
2938 if (exclusive)
2939 flags |= PUT_OBJ_EXCL;
2940
2941 return put_system_obj_impl(obj, len, mtime, attrs, flags, bl, objv_tracker, set_mtime);
2942 }
2943 int aio_wait(void *handle);
2944 bool aio_completed(void *handle);
2945
2946 int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
2947 const std::string& obj_prefix,
2948 const std::string& obj_delim,
2949 std::function<int(const rgw_bucket_dir_entry&)> handler);
2950
2951 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
2952 return bucket_info.has_swift_versioning() &&
2953 bucket_info.swift_ver_location.size();
2954 }
2955
2956 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
2957 const rgw_user& user, /* in */
2958 RGWBucketInfo& bucket_info, /* in */
2959 rgw_obj& obj); /* in */
2960 int swift_versioning_restore(RGWObjectCtx& obj_ctx, /* in/out */
2961 const rgw_user& user, /* in */
2962 RGWBucketInfo& bucket_info, /* in */
2963 rgw_obj& obj, /* in */
2964 bool& restored); /* out */
2965 int copy_obj_to_remote_dest(RGWObjState *astate,
2966 map<string, bufferlist>& src_attrs,
2967 RGWRados::Object::Read& read_op,
2968 const rgw_user& user_id,
2969 rgw_obj& dest_obj,
2970 ceph::real_time *mtime);
2971
2972 enum AttrsMod {
2973 ATTRSMOD_NONE = 0,
2974 ATTRSMOD_REPLACE = 1,
2975 ATTRSMOD_MERGE = 2
2976 };
2977
2978 int rewrite_obj(RGWBucketInfo& dest_bucket_info, rgw_obj& obj);
2979
2980 int stat_remote_obj(RGWObjectCtx& obj_ctx,
2981 const rgw_user& user_id,
2982 const string& client_id,
2983 req_info *info,
2984 const string& source_zone,
2985 rgw_obj& src_obj,
2986 RGWBucketInfo& src_bucket_info,
2987 real_time *src_mtime,
2988 uint64_t *psize,
2989 const real_time *mod_ptr,
2990 const real_time *unmod_ptr,
2991 bool high_precision_time,
2992 const char *if_match,
2993 const char *if_nomatch,
2994 map<string, bufferlist> *pattrs,
2995 string *version_id,
2996 string *ptag,
2997 string *petag);
2998
2999 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
3000 const rgw_user& user_id,
3001 const string& client_id,
3002 const string& op_id,
3003 bool record_op_state,
3004 req_info *info,
3005 const string& source_zone,
3006 rgw_obj& dest_obj,
3007 rgw_obj& src_obj,
3008 RGWBucketInfo& dest_bucket_info,
3009 RGWBucketInfo& src_bucket_info,
3010 ceph::real_time *src_mtime,
3011 ceph::real_time *mtime,
3012 const ceph::real_time *mod_ptr,
3013 const ceph::real_time *unmod_ptr,
3014 bool high_precision_time,
3015 const char *if_match,
3016 const char *if_nomatch,
3017 AttrsMod attrs_mod,
3018 bool copy_if_newer,
3019 map<string, bufferlist>& attrs,
3020 RGWObjCategory category,
3021 uint64_t olh_epoch,
3022 ceph::real_time delete_at,
3023 string *version_id,
3024 string *ptag,
3025 ceph::buffer::list *petag,
3026 struct rgw_err *err,
3027 void (*progress_cb)(off_t, void *),
3028 void *progress_data);
3029 /**
3030 * Copy an object.
3031 * dest_obj: the object to copy into
3032 * src_obj: the object to copy from
3033 * attrs: usage depends on attrs_mod parameter
3034 * attrs_mod: the modification mode of the attrs, may have the following values:
3035 * ATTRSMOD_NONE - the attributes of the source object will be
3036 * copied without modifications, attrs parameter is ignored;
3037 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
3038 * parameter, source object attributes are not copied;
3039 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
3040 * are overwritten by values contained in attrs parameter.
3041 * err: stores any errors resulting from the get of the original object
3042 * Returns: 0 on success, -ERR# otherwise.
3043 */
3044 int copy_obj(RGWObjectCtx& obj_ctx,
3045 const rgw_user& user_id,
3046 const string& client_id,
3047 const string& op_id,
3048 req_info *info,
3049 const string& source_zone,
3050 rgw_obj& dest_obj,
3051 rgw_obj& src_obj,
3052 RGWBucketInfo& dest_bucket_info,
3053 RGWBucketInfo& src_bucket_info,
3054 ceph::real_time *src_mtime,
3055 ceph::real_time *mtime,
3056 const ceph::real_time *mod_ptr,
3057 const ceph::real_time *unmod_ptr,
3058 bool high_precision_time,
3059 const char *if_match,
3060 const char *if_nomatch,
3061 AttrsMod attrs_mod,
3062 bool copy_if_newer,
3063 map<std::string, bufferlist>& attrs,
3064 RGWObjCategory category,
3065 uint64_t olh_epoch,
3066 ceph::real_time delete_at,
3067 string *version_id,
3068 string *ptag,
3069 ceph::buffer::list *petag,
3070 struct rgw_err *err,
3071 void (*progress_cb)(off_t, void *),
3072 void *progress_data);
3073
3074 int copy_obj_data(RGWObjectCtx& obj_ctx,
3075 RGWBucketInfo& dest_bucket_info,
3076 RGWRados::Object::Read& read_op, off_t end,
3077 rgw_obj& dest_obj,
3078 rgw_obj& src_obj,
3079 uint64_t max_chunk_size,
3080 ceph::real_time *mtime,
3081 ceph::real_time set_mtime,
3082 map<string, bufferlist>& attrs,
3083 RGWObjCategory category,
3084 uint64_t olh_epoch,
3085 ceph::real_time delete_at,
3086 string *version_id,
3087 string *ptag,
3088 ceph::buffer::list *petag,
3089 struct rgw_err *err);
3090
3091 int check_bucket_empty(RGWBucketInfo& bucket_info);
3092
3093 /**
3094 * Delete a bucket.
3095 * bucket: the name of the bucket to delete
3096 * Returns 0 on success, -ERR# otherwise.
3097 */
3098 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty = true);
3099
3100 bool is_meta_master();
3101
3102 /**
3103 * Check to see if the bucket metadata is synced
3104 */
3105 bool is_syncing_bucket_meta(const rgw_bucket& bucket);
3106 void wakeup_meta_sync_shards(set<int>& shard_ids);
3107 void wakeup_data_sync_shards(const string& source_zone, map<int, set<string> >& shard_ids);
3108
3109 RGWMetaSyncStatusManager* get_meta_sync_manager();
3110 RGWDataSyncStatusManager* get_data_sync_manager(const std::string& source_zone);
3111
3112 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
3113 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
3114 int bucket_suspended(rgw_bucket& bucket, bool *suspended);
3115
3116 /** Delete an object.*/
3117 int delete_obj(RGWObjectCtx& obj_ctx,
3118 const RGWBucketInfo& bucket_owner,
3119 const rgw_obj& src_obj,
3120 int versioning_status,
3121 uint16_t bilog_flags = 0,
3122 const ceph::real_time& expiration_time = ceph::real_time());
3123
3124 /** Delete a raw object.*/
3125 int delete_raw_obj(const rgw_raw_obj& obj);
3126
3127 /* Delete a system object */
3128 virtual int delete_system_obj(rgw_raw_obj& src_obj, RGWObjVersionTracker *objv_tracker = NULL);
3129
3130 /** Remove an object from the bucket index */
3131 int delete_obj_index(const rgw_obj& obj);
3132
3133 /**
3134 * Get the attributes for an object.
3135 * bucket: name of the bucket holding the object.
3136 * obj: name of the object
3137 * name: name of the attr to retrieve
3138 * dest: bufferlist to store the result in
3139 * Returns: 0 on success, -ERR# otherwise.
3140 */
3141 virtual int system_obj_get_attr(rgw_raw_obj& obj, const char *name, bufferlist& dest);
3142
3143 int system_obj_set_attr(void *ctx, rgw_raw_obj& obj, const char *name, bufferlist& bl,
3144 RGWObjVersionTracker *objv_tracker);
3145 virtual int system_obj_set_attrs(void *ctx, rgw_raw_obj& obj,
3146 map<string, bufferlist>& attrs,
3147 map<string, bufferlist>* rmattrs,
3148 RGWObjVersionTracker *objv_tracker);
3149
3150 /**
3151 * Set an attr on an object.
3152 * bucket: name of the bucket holding the object
3153 * obj: name of the object to set the attr on
3154 * name: the attr to set
3155 * bl: the contents of the attr
3156 * Returns: 0 on success, -ERR# otherwise.
3157 */
3158 int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
3159
3160 int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
3161 map<string, bufferlist>& attrs,
3162 map<string, bufferlist>* rmattrs);
3163
3164 int get_system_obj_state(RGWObjectCtx *rctx, rgw_raw_obj& obj, RGWRawObjState **state, RGWObjVersionTracker *objv_tracker);
3165 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
3166 bool follow_olh, bool assume_noent = false);
3167 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state) {
3168 return get_obj_state(rctx, bucket_info, obj, state, true);
3169 }
3170
3171 virtual int stat_system_obj(RGWObjectCtx& obj_ctx,
3172 RGWRados::SystemObject::Read::GetObjState& state,
3173 rgw_raw_obj& obj,
3174 map<string, bufferlist> *attrs,
3175 ceph::real_time *lastmod,
3176 uint64_t *obj_size,
3177 RGWObjVersionTracker *objv_tracker);
3178
3179 virtual int get_system_obj(RGWObjectCtx& obj_ctx, RGWRados::SystemObject::Read::GetObjState& read_state,
3180 RGWObjVersionTracker *objv_tracker, rgw_raw_obj& obj,
3181 bufferlist& bl, off_t ofs, off_t end,
3182 map<string, bufferlist> *attrs,
3183 rgw_cache_entry_info *cache_info);
3184
3185 virtual void register_chained_cache(RGWChainedCache *cache) {}
3186 virtual bool chain_cache_entry(list<rgw_cache_entry_info *>& cache_info_entries, RGWChainedCache::Entry *chained_entry) { return false; }
3187
3188 int iterate_obj(RGWObjectCtx& ctx,
3189 const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3190 off_t ofs, off_t end,
3191 uint64_t max_chunk_size,
3192 int (*iterate_obj_cb)(const RGWBucketInfo& bucket_info, const rgw_obj& obj, const rgw_raw_obj&, off_t, off_t, off_t, bool, RGWObjState *, void *),
3193 void *arg);
3194
3195 int flush_read_list(struct get_obj_data *d);
3196
3197 int get_obj_iterate_cb(RGWObjectCtx *ctx, RGWObjState *astate,
3198 const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3199 const rgw_raw_obj& read_obj,
3200 off_t obj_ofs, off_t read_ofs, off_t len,
3201 bool is_head_obj, void *arg);
3202
3203 void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
3204
3205 /**
3206 * a simple object read without keeping state
3207 */
3208
3209 virtual int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
3210 map<string, bufferlist> *attrs, bufferlist *first_chunk,
3211 RGWObjVersionTracker *objv_tracker);
3212
3213 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
3214 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
3215
3216 void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
3217 int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
3218 int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
3219 int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
3220 const rgw_obj& obj_instance, bool delete_marker,
3221 const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
3222 uint64_t olh_epoch,
3223 ceph::real_time unmod_since, bool high_precision_time);
3224 int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch);
3225 int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
3226 map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
3227 int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
3228 int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
3229 int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
3230 bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
3231 uint64_t *plast_ver);
3232 int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
3233 int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
3234 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time);
3235 int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
3236 uint64_t olh_epoch);
3237
3238 void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
3239 int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
3240 int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
3241 int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
3242
3243 void gen_rand_obj_instance_name(rgw_obj *target);
3244
3245 int omap_get_vals(rgw_raw_obj& obj, bufferlist& header, const std::string& marker, uint64_t count, std::map<string, bufferlist>& m);
3246 int omap_get_all(rgw_raw_obj& obj, bufferlist& header, std::map<string, bufferlist>& m);
3247 int omap_set(rgw_raw_obj& obj, const std::string& key, bufferlist& bl);
3248 int omap_set(rgw_raw_obj& obj, map<std::string, bufferlist>& m);
3249 int omap_del(rgw_raw_obj& obj, const std::string& key);
3250 int update_containers_stats(map<string, RGWBucketEnt>& m);
3251 int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
3252
3253 int watch(const string& oid, uint64_t *watch_handle, librados::WatchCtx2 *ctx);
3254 int unwatch(uint64_t watch_handle);
3255 void add_watcher(int i);
3256 void remove_watcher(int i);
3257 virtual bool need_watch_notify() { return false; }
3258 int init_watch();
3259 void finalize_watch();
3260 int distribute(const string& key, bufferlist& bl);
3261 virtual int watch_cb(uint64_t notify_id,
3262 uint64_t cookie,
3263 uint64_t notifier_id,
3264 bufferlist& bl) { return 0; }
3265 void pick_control_oid(const string& key, string& notify_oid);
3266
3267 virtual void set_cache_enabled(bool state) {}
3268
3269 void set_atomic(void *ctx, rgw_obj& obj) {
3270 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3271 rctx->obj.set_atomic(obj);
3272 }
3273 void set_prefetch_data(void *ctx, rgw_obj& obj) {
3274 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3275 rctx->obj.set_prefetch_data(obj);
3276 }
3277 void set_prefetch_data(void *ctx, rgw_raw_obj& obj) {
3278 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
3279 rctx->raw.set_prefetch_data(obj);
3280 }
3281
3282 int decode_policy(bufferlist& bl, ACLOwner *owner);
3283 int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
3284 map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker);
3285 int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
3286 int get_user_stats(const rgw_user& user, RGWStorageStats& stats);
3287 int get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *cb);
3288 void get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj);
3289 void get_bucket_meta_oid(const rgw_bucket& bucket, string& oid);
3290
3291 int put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
3292 bool exclusive, RGWObjVersionTracker& objv_tracker, ceph::real_time mtime,
3293 map<string, bufferlist> *pattrs);
3294 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
3295 int get_bucket_entrypoint_info(RGWObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name,
3296 RGWBucketEntryPoint& entry_point, RGWObjVersionTracker *objv_tracker,
3297 ceph::real_time *pmtime, map<string, bufferlist> *pattrs, rgw_cache_entry_info *cache_info = NULL);
3298 int get_bucket_instance_info(RGWObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
3299 int get_bucket_instance_info(RGWObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
3300 int get_bucket_instance_from_oid(RGWObjectCtx& obj_ctx, string& oid, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs,
3301 rgw_cache_entry_info *cache_info = NULL);
3302
3303 int convert_old_bucket_info(RGWObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name);
3304 static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
3305 int get_bucket_info(RGWObjectCtx& obj_ctx,
3306 const string& tenant_name, const string& bucket_name,
3307 RGWBucketInfo& info,
3308 ceph::real_time *pmtime, map<string, bufferlist> *pattrs = NULL);
3309 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
3310 map<string, bufferlist> *pattrs, bool create_entry_point);
3311
3312 int cls_rgw_init_index(librados::IoCtx& io_ctx, librados::ObjectWriteOperation& op, string& oid);
3313 int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags);
3314 int cls_obj_complete_op(BucketShard& bs, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
3315 rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags);
3316 int cls_obj_complete_add(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
3317 RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags);
3318 int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
3319 ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags);
3320 int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags);
3321 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
3322 int cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_index_key& start, const string& prefix,
3323 uint32_t num_entries, bool list_versions, map<string, rgw_bucket_dir_entry>& m,
3324 bool *is_truncated, rgw_obj_index_key *last_entry,
3325 bool (*force_check_filter)(const string& name) = NULL);
3326 int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, map<string, struct rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
3327 int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
3328 int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
3329 int trim_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, string& end_marker);
3330 int get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id, map<int, string>& max_marker);
3331
3332 int bi_get_instance(const RGWBucketInfo& bucket_info, rgw_obj& obj, rgw_bucket_dir_entry *dirent);
3333 int bi_get(rgw_bucket& bucket, rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
3334 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
3335 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
3336 int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
3337 int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3338 int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3339 int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
3340 list<rgw_cls_bi_entry> *entries, bool *is_truncated);
3341 int bi_remove(BucketShard& bs);
3342
3343 int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
3344 int cls_obj_usage_log_read(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch, uint32_t max_entries,
3345 string& read_iter, map<rgw_user_bucket, rgw_usage_log_entry>& usage, bool *is_truncated);
3346 int cls_obj_usage_log_trim(string& oid, string& user, uint64_t start_epoch, uint64_t end_epoch);
3347
3348 int key_to_shard_id(const string& key, int max_shards);
3349 void shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id);
3350 void shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name);
3351 void shard_name(const string& prefix, unsigned shard_id, string& name);
3352 int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
3353 void time_log_prepare_entry(cls_log_entry& entry, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
3354 int time_log_add_init(librados::IoCtx& io_ctx);
3355 int time_log_add(const string& oid, list<cls_log_entry>& entries,
3356 librados::AioCompletion *completion, bool monotonic_inc = true);
3357 int time_log_add(const string& oid, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
3358 int time_log_list(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
3359 int max_entries, list<cls_log_entry>& entries,
3360 const string& marker, string *out_marker, bool *truncated);
3361 int time_log_info(const string& oid, cls_log_header *header);
3362 int time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion);
3363 int time_log_trim(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
3364 const string& from_marker, const string& to_marker,
3365 librados::AioCompletion *completion = nullptr);
3366
3367 string objexp_hint_get_shardname(int shard_num);
3368 int objexp_key_shard(const rgw_obj_index_key& key);
3369 void objexp_get_shard(int shard_num,
3370 string& shard); /* out */
3371 int objexp_hint_add(const ceph::real_time& delete_at,
3372 const string& tenant_name,
3373 const string& bucket_name,
3374 const string& bucket_id,
3375 const rgw_obj_index_key& obj_key);
3376 int objexp_hint_list(const string& oid,
3377 const ceph::real_time& start_time,
3378 const ceph::real_time& end_time,
3379 const int max_entries,
3380 const string& marker,
3381 list<cls_timeindex_entry>& entries, /* out */
3382 string *out_marker, /* out */
3383 bool *truncated); /* out */
3384 int objexp_hint_parse(cls_timeindex_entry &ti_entry,
3385 objexp_hint_entry& hint_entry); /* out */
3386 int objexp_hint_trim(const string& oid,
3387 const ceph::real_time& start_time,
3388 const ceph::real_time& end_time,
3389 const string& from_marker = std::string(),
3390 const string& to_marker = std::string());
3391
3392 int lock_exclusive(rgw_pool& pool, const string& oid, ceph::timespan& duration, string& zone_id, string& owner_id);
3393 int unlock(rgw_pool& pool, const string& oid, string& zone_id, string& owner_id);
3394
3395 void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
3396 int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool sync);
3397 int gc_operate(string& oid, librados::ObjectWriteOperation *op);
3398 int gc_aio_operate(string& oid, librados::ObjectWriteOperation *op);
3399 int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
3400
3401 int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
3402 int process_gc();
3403 int process_expire_objects();
3404 int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
3405
3406 int process_lc();
3407 int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
3408
3409 int bucket_check_index(RGWBucketInfo& bucket_info,
3410 map<RGWObjCategory, RGWStorageStats> *existing_stats,
3411 map<RGWObjCategory, RGWStorageStats> *calculated_stats);
3412 int bucket_rebuild_index(RGWBucketInfo& bucket_info);
3413 int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
3414 int move_rados_obj(librados::IoCtx& src_ioctx,
3415 const string& src_oid, const string& src_locator,
3416 librados::IoCtx& dst_ioctx,
3417 const string& dst_oid, const string& dst_locator);
3418 int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
3419 int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix);
3420
3421 int cls_user_get_header(const string& user_id, cls_user_header *header);
3422 int cls_user_get_header_async(const string& user_id, RGWGetUserHeader_CB *ctx);
3423 int cls_user_sync_bucket_stats(rgw_raw_obj& user_obj, const RGWBucketInfo& bucket_info);
3424 int cls_user_list_buckets(rgw_raw_obj& obj,
3425 const string& in_marker,
3426 const string& end_marker,
3427 int max_entries,
3428 list<cls_user_bucket_entry>& entries,
3429 string *out_marker,
3430 bool *truncated);
3431 int cls_user_add_bucket(rgw_raw_obj& obj, const cls_user_bucket_entry& entry);
3432 int cls_user_update_buckets(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add);
3433 int cls_user_complete_stats_sync(rgw_raw_obj& obj);
3434 int complete_sync_user_stats(const rgw_user& user_id);
3435 int cls_user_add_bucket(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries);
3436 int cls_user_remove_bucket(rgw_raw_obj& obj, const cls_user_bucket& bucket);
3437
3438 int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
3439 RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size);
3440
3441 uint64_t instance_id();
3442 const string& zone_id() {
3443 return get_zone_params().get_id();
3444 }
3445 string unique_id(uint64_t unique_num) {
3446 char buf[32];
3447 snprintf(buf, sizeof(buf), ".%llu.%llu", (unsigned long long)instance_id(), (unsigned long long)unique_num);
3448 string s = get_zone_params().get_id() + buf;
3449 return s;
3450 }
3451
3452 void init_unique_trans_id_deps() {
3453 char buf[16 + 2 + 1]; /* uint64_t needs 16, 2 hyphens add further 2 */
3454
3455 snprintf(buf, sizeof(buf), "-%llx-", (unsigned long long)instance_id());
3456 url_encode(string(buf) + get_zone_params().get_name(), trans_id_suffix);
3457 }
3458
3459 /* In order to preserve compability with Swift API, transaction ID
3460 * should contain at least 32 characters satisfying following spec:
3461 * - first 21 chars must be in range [0-9a-f]. Swift uses this
3462 * space for storing fragment of UUID obtained through a call to
3463 * uuid4() function of Python's uuid module;
3464 * - char no. 22 must be a hyphen;
3465 * - at least 10 next characters constitute hex-formatted timestamp
3466 * padded with zeroes if necessary. All bytes must be in [0-9a-f]
3467 * range;
3468 * - last, optional part of transaction ID is any url-encoded string
3469 * without restriction on length. */
3470 string unique_trans_id(const uint64_t unique_num) {
3471 char buf[41]; /* 2 + 21 + 1 + 16 (timestamp can consume up to 16) + 1 */
3472 time_t timestamp = time(NULL);
3473
3474 snprintf(buf, sizeof(buf), "tx%021llx-%010llx",
3475 (unsigned long long)unique_num,
3476 (unsigned long long)timestamp);
3477
3478 return string(buf) + trans_id_suffix;
3479 }
3480
3481 void get_log_pool(rgw_pool& pool) {
3482 pool = get_zone_params().log_pool;
3483 }
3484
3485 bool need_to_log_data() {
3486 return get_zone().log_data;
3487 }
3488
3489 bool need_to_log_metadata() {
3490 return get_zone().log_meta;
3491 }
3492
3493 librados::Rados* get_rados_handle();
3494
3495 int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
3496 int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
3497 list<librados::AioCompletion *>& handles, bool keep_index_consistent);
3498 private:
3499 /**
3500 * This is a helper method, it generates a list of bucket index objects with the given
3501 * bucket base oid and number of shards.
3502 *
3503 * bucket_oid_base [in] - base name of the bucket index object;
3504 * num_shards [in] - number of bucket index object shards.
3505 * bucket_objs [out] - filled by this method, a list of bucket index objects.
3506 */
3507 void get_bucket_index_objects(const string& bucket_oid_base, uint32_t num_shards,
3508 map<int, string>& bucket_objs, int shard_id = -1);
3509
3510 /**
3511 * Get the bucket index object with the given base bucket index object and object key,
3512 * and the number of bucket index shards.
3513 *
3514 * bucket_oid_base [in] - bucket object base name.
3515 * obj_key [in] - object key.
3516 * num_shards [in] - number of bucket index shards.
3517 * hash_type [in] - type of hash to find the shard ID.
3518 * bucket_obj [out] - the bucket index object for the given object.
3519 *
3520 * Return 0 on success, a failure code otherwise.
3521 */
3522 int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
3523 uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj, int *shard);
3524
3525 void get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards,
3526 int shard_id, string *bucket_obj);
3527
3528 /**
3529 * Check the actual on-disk state of the object specified
3530 * by list_state, and fill in the time and size of object.
3531 * Then append any changes to suggested_updates for
3532 * the rgw class' dir_suggest_changes function.
3533 *
3534 * Note that this can maul list_state; don't use it afterwards. Also
3535 * it expects object to already be filled in from list_state; it only
3536 * sets the size and mtime.
3537 *
3538 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
3539 * and -errno on other failures. (-ENOENT is not a failure, and it
3540 * will encode that info as a suggested update.)
3541 */
3542 int check_disk_state(librados::IoCtx io_ctx,
3543 const RGWBucketInfo& bucket_info,
3544 rgw_bucket_dir_entry& list_state,
3545 rgw_bucket_dir_entry& object,
3546 bufferlist& suggested_updates);
3547
3548 /**
3549 * Init pool iteration
3550 * bucket: pool name in a bucket object
3551 * ctx: context object to use for the iteration
3552 * Returns: 0 on success, -ERR# otherwise.
3553 */
3554 int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
3555 /**
3556 * Iterate over pool return object names, use optional filter
3557 * ctx: iteration context, initialized with pool_iterate_begin()
3558 * num: max number of objects to return
3559 * objs: a vector that the results will append into
3560 * is_truncated: if not NULL, will hold true iff iteration is complete
3561 * filter: if not NULL, will be used to filter returned objects
3562 * Returns: 0 on success, -ERR# otherwise.
3563 */
3564 int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
3565 bool *is_truncated, RGWAccessListFilter *filter);
3566
3567 uint64_t next_bucket_id();
3568};
3569
3570class RGWStoreManager {
3571public:
3572 RGWStoreManager() {}
3573 static RGWRados *get_storage(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread) {
3574 RGWRados *store = init_storage_provider(cct, use_gc_thread, use_lc_thread, quota_threads, run_sync_thread);
3575 return store;
3576 }
3577 static RGWRados *get_raw_storage(CephContext *cct) {
3578 RGWRados *store = init_raw_storage_provider(cct);
3579 return store;
3580 }
3581 static RGWRados *init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread);
3582 static RGWRados *init_raw_storage_provider(CephContext *cct);
3583 static void close_storage(RGWRados *store);
3584
3585};
3586
3587template <class T>
3588class RGWChainedCacheImpl : public RGWChainedCache {
3589 RWLock lock;
3590
3591 map<string, T> entries;
3592
3593public:
3594 RGWChainedCacheImpl() : lock("RGWChainedCacheImpl::lock") {}
3595
3596 void init(RGWRados *store) {
3597 store->register_chained_cache(this);
3598 }
3599
3600 bool find(const string& key, T *entry) {
3601 RWLock::RLocker rl(lock);
3602 typename map<string, T>::iterator iter = entries.find(key);
3603 if (iter == entries.end()) {
3604 return false;
3605 }
3606
3607 *entry = iter->second;
3608 return true;
3609 }
3610
3611 bool put(RGWRados *store, const string& key, T *entry, list<rgw_cache_entry_info *>& cache_info_entries) {
3612 Entry chain_entry(this, key, entry);
3613
3614 /* we need the store cache to call us under its lock to maintain lock ordering */
3615 return store->chain_cache_entry(cache_info_entries, &chain_entry);
3616 }
3617
3618 void chain_cb(const string& key, void *data) override {
3619 T *entry = static_cast<T *>(data);
3620 RWLock::WLocker wl(lock);
3621 entries[key] = *entry;
3622 }
3623
3624 void invalidate(const string& key) override {
3625 RWLock::WLocker wl(lock);
3626 entries.erase(key);
3627 }
3628
3629 void invalidate_all() override {
3630 RWLock::WLocker wl(lock);
3631 entries.clear();
3632 }
3633}; /* RGWChainedCacheImpl */
3634
3635/**
3636 * Base of PUT operation.
3637 * Allow to create chained data transformers like compresors and encryptors.
3638 */
3639class RGWPutObjDataProcessor
3640{
3641public:
3642 RGWPutObjDataProcessor(){}
3643 virtual ~RGWPutObjDataProcessor(){}
3644 virtual int handle_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool *again) = 0;
3645 virtual int throttle_data(void *handle, const rgw_raw_obj& obj, uint64_t size, bool need_to_wait) = 0;
3646}; /* RGWPutObjDataProcessor */
3647
3648
3649class RGWPutObjProcessor : public RGWPutObjDataProcessor
3650{
3651protected:
3652 RGWRados *store;
3653 RGWObjectCtx& obj_ctx;
3654 bool is_complete;
3655 RGWBucketInfo bucket_info;
3656 bool canceled;
3657
3658 virtual int do_complete(size_t accounted_size, const string& etag,
3659 ceph::real_time *mtime, ceph::real_time set_mtime,
3660 map<string, bufferlist>& attrs, ceph::real_time delete_at,
3661 const char *if_match, const char *if_nomatch, const string *user_data) = 0;
3662
3663public:
3664 RGWPutObjProcessor(RGWObjectCtx& _obj_ctx, RGWBucketInfo& _bi) : store(NULL),
3665 obj_ctx(_obj_ctx),
3666 is_complete(false),
3667 bucket_info(_bi),
3668 canceled(false) {}
3669 ~RGWPutObjProcessor() override {}
3670 virtual int prepare(RGWRados *_store, string *oid_rand) {
3671 store = _store;
3672 return 0;
3673 }
3674
3675 int complete(size_t accounted_size, const string& etag,
3676 ceph::real_time *mtime, ceph::real_time set_mtime,
3677 map<string, bufferlist>& attrs, ceph::real_time delete_at,
3678 const char *if_match = NULL, const char *if_nomatch = NULL, const string *user_data = nullptr);
3679
3680 CephContext *ctx();
3681
3682 bool is_canceled() { return canceled; }
3683}; /* RGWPutObjProcessor */
3684
3685struct put_obj_aio_info {
3686 void *handle;
3687 rgw_raw_obj obj;
3688 uint64_t size;
3689};
3690
3691#define RGW_PUT_OBJ_MIN_WINDOW_SIZE_DEFAULT (16 * 1024 * 1024)
3692
3693class RGWPutObjProcessor_Aio : public RGWPutObjProcessor
3694{
3695 list<struct put_obj_aio_info> pending;
3696 uint64_t window_size{RGW_PUT_OBJ_MIN_WINDOW_SIZE_DEFAULT};
3697 uint64_t pending_size{0};
3698
3699 struct put_obj_aio_info pop_pending();
3700 int wait_pending_front();
3701 bool pending_has_completed();
3702
3703 rgw_raw_obj last_written_obj;
3704
3705protected:
3706 uint64_t obj_len{0};
3707
3708 set<rgw_raw_obj> written_objs;
3709 rgw_obj head_obj;
3710
3711 void add_written_obj(const rgw_raw_obj& obj) {
3712 written_objs.insert(obj);
3713 }
3714
3715 int drain_pending();
3716 int handle_obj_data(rgw_raw_obj& obj, bufferlist& bl, off_t ofs, off_t abs_ofs, void **phandle, bool exclusive);
3717
3718public:
3719 int prepare(RGWRados *store, string *oid_rand) override;
3720 int throttle_data(void *handle, const rgw_raw_obj& obj, uint64_t size, bool need_to_wait) override;
3721
3722 RGWPutObjProcessor_Aio(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info) : RGWPutObjProcessor(obj_ctx, bucket_info) {}
3723 ~RGWPutObjProcessor_Aio() override;
3724}; /* RGWPutObjProcessor_Aio */
3725
3726class RGWPutObjProcessor_Atomic : public RGWPutObjProcessor_Aio
3727{
3728 bufferlist first_chunk;
3729 uint64_t part_size;
3730 off_t cur_part_ofs;
3731 off_t next_part_ofs;
3732 int cur_part_id;
3733 off_t data_ofs;
3734
3735 bufferlist pending_data_bl;
3736 uint64_t max_chunk_size;
3737
3738 bool versioned_object;
3739 uint64_t olh_epoch;
3740 string version_id;
3741
3742protected:
3743 rgw_bucket bucket;
3744 string obj_str;
3745
3746 string unique_tag;
3747
3748 rgw_raw_obj cur_obj;
3749 RGWObjManifest manifest;
3750 RGWObjManifest::generator manifest_gen;
3751
3752 int write_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool exclusive);
3753 int do_complete(size_t accounted_size, const string& etag,
3754 ceph::real_time *mtime, ceph::real_time set_mtime,
3755 map<string, bufferlist>& attrs, ceph::real_time delete_at,
3756 const char *if_match, const char *if_nomatch, const string *user_data) override;
3757
3758 int prepare_next_part(off_t ofs);
3759 int complete_parts();
3760 int complete_writing_data();
3761
3762 int prepare_init(RGWRados *store, string *oid_rand);
3763
3764public:
3765 ~RGWPutObjProcessor_Atomic() override {}
3766 RGWPutObjProcessor_Atomic(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info,
3767 rgw_bucket& _b, const string& _o, uint64_t _p, const string& _t, bool versioned) :
3768 RGWPutObjProcessor_Aio(obj_ctx, bucket_info),
3769 part_size(_p),
3770 cur_part_ofs(0),
3771 next_part_ofs(_p),
3772 cur_part_id(0),
3773 data_ofs(0),
3774 max_chunk_size(0),
3775 versioned_object(versioned),
3776 olh_epoch(0),
3777 bucket(_b),
3778 obj_str(_o),
3779 unique_tag(_t) {}
3780 int prepare(RGWRados *store, string *oid_rand) override;
3781 virtual bool immutable_head() { return false; }
3782 int handle_data(bufferlist& bl, off_t ofs, void **phandle, rgw_raw_obj *pobj, bool *again) override;
3783
3784 void set_olh_epoch(uint64_t epoch) {
3785 olh_epoch = epoch;
3786 }
3787
3788 void set_version_id(const string& vid) {
3789 version_id = vid;
3790 }
3791}; /* RGWPutObjProcessor_Atomic */
3792
3793#define MP_META_SUFFIX ".meta"
3794
3795class RGWMPObj {
3796 string oid;
3797 string prefix;
3798 string meta;
3799 string upload_id;
3800public:
3801 RGWMPObj() {}
3802 RGWMPObj(const string& _oid, const string& _upload_id) {
3803 init(_oid, _upload_id, _upload_id);
3804 }
3805 void init(const string& _oid, const string& _upload_id) {
3806 init(_oid, _upload_id, _upload_id);
3807 }
3808 void init(const string& _oid, const string& _upload_id, const string& part_unique_str) {
3809 if (_oid.empty()) {
3810 clear();
3811 return;
3812 }
3813 oid = _oid;
3814 upload_id = _upload_id;
3815 prefix = oid + ".";
3816 meta = prefix + upload_id + MP_META_SUFFIX;
3817 prefix.append(part_unique_str);
3818 }
3819 string& get_meta() { return meta; }
3820 string get_part(int num) {
3821 char buf[16];
3822 snprintf(buf, 16, ".%d", num);
3823 string s = prefix;
3824 s.append(buf);
3825 return s;
3826 }
3827 string get_part(string& part) {
3828 string s = prefix;
3829 s.append(".");
3830 s.append(part);
3831 return s;
3832 }
3833 string& get_upload_id() {
3834 return upload_id;
3835 }
3836 string& get_key() {
3837 return oid;
3838 }
3839 bool from_meta(string& meta) {
3840 int end_pos = meta.rfind('.'); // search for ".meta"
3841 if (end_pos < 0)
3842 return false;
3843 int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id>
3844 if (mid_pos < 0)
3845 return false;
3846 oid = meta.substr(0, mid_pos);
3847 upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
3848 init(oid, upload_id, upload_id);
3849 return true;
3850 }
3851 void clear() {
3852 oid = "";
3853 prefix = "";
3854 meta = "";
3855 upload_id = "";
3856 }
3857};
3858
3859class RGWPutObjProcessor_Multipart : public RGWPutObjProcessor_Atomic
3860{
3861 string part_num;
3862 RGWMPObj mp;
3863 req_state *s;
3864 string upload_id;
3865
3866protected:
3867 int prepare(RGWRados *store, string *oid_rand);
3868 int do_complete(size_t accounted_size, const string& etag,
3869 ceph::real_time *mtime, ceph::real_time set_mtime,
3870 map<string, bufferlist>& attrs, ceph::real_time delete_at,
3871 const char *if_match, const char *if_nomatch, const string *user_data) override;
3872public:
3873 bool immutable_head() { return true; }
3874 RGWPutObjProcessor_Multipart(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, uint64_t _p, req_state *_s) :
3875 RGWPutObjProcessor_Atomic(obj_ctx, bucket_info, _s->bucket, _s->object.name, _p, _s->req_id, false), s(_s) {}
3876 void get_mp(RGWMPObj** _mp);
3877}; /* RGWPutObjProcessor_Multipart */
3878#endif