]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_rados.h
import ceph nautilus 14.2.2
[ceph.git] / ceph / src / rgw / rgw_rados.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef CEPH_RGWRADOS_H
5 #define CEPH_RGWRADOS_H
6
7 #include <functional>
8
9 #include "include/rados/librados.hpp"
10 #include "include/Context.h"
11 #include "common/admin_socket.h"
12 #include "common/RefCountedObj.h"
13 #include "common/RWLock.h"
14 #include "common/ceph_time.h"
15 #include "common/lru_map.h"
16 #include "common/ceph_json.h"
17 #include "rgw_common.h"
18 #include "cls/rgw/cls_rgw_types.h"
19 #include "cls/version/cls_version_types.h"
20 #include "cls/log/cls_log_types.h"
21 #include "cls/timeindex/cls_timeindex_types.h"
22 #include "cls/otp/cls_otp_types.h"
23 #include "rgw_log.h"
24 #include "rgw_metadata.h"
25 #include "rgw_meta_sync_status.h"
26 #include "rgw_period_puller.h"
27 #include "rgw_sync_module.h"
28 #include "rgw_sync_log_trim.h"
29 #include "rgw_service.h"
30
31 #include "services/svc_rados.h"
32 #include "services/svc_zone.h"
33
34 class RGWWatcher;
35 class SafeTimer;
36 class ACLOwner;
37 class RGWGC;
38 class RGWMetaNotifier;
39 class RGWDataNotifier;
40 class RGWLC;
41 class RGWObjectExpirer;
42 class RGWMetaSyncProcessorThread;
43 class RGWDataSyncProcessorThread;
44 class RGWSyncLogTrimThread;
45 class RGWSyncTraceManager;
46 struct RGWZoneGroup;
47 struct RGWZoneParams;
48 class RGWReshard;
49 class RGWReshardWait;
50
51 class RGWSysObjectCtx;
52
53 /* flags for put_obj_meta() */
54 #define PUT_OBJ_CREATE 0x01
55 #define PUT_OBJ_EXCL 0x02
56 #define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
57
58 #define RGW_OBJ_NS_MULTIPART "multipart"
59 #define RGW_OBJ_NS_SHADOW "shadow"
60
61 #define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
62
63 #define RGW_NO_SHARD -1
64
65 #define RGW_SHARDS_PRIME_0 7877
66 #define RGW_SHARDS_PRIME_1 65521
67
68 extern const std::string MP_META_SUFFIX;
69
70 // only called by rgw_shard_id and rgw_bucket_shard_index
71 static inline int rgw_shards_mod(unsigned hval, int max_shards)
72 {
73 if (max_shards <= RGW_SHARDS_PRIME_0) {
74 return hval % RGW_SHARDS_PRIME_0 % max_shards;
75 }
76 return hval % RGW_SHARDS_PRIME_1 % max_shards;
77 }
78
79 // used for logging and tagging
80 static inline int rgw_shard_id(const string& key, int max_shards)
81 {
82 return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()),
83 max_shards);
84 }
85
86 // used for bucket indices
87 static inline uint32_t rgw_bucket_shard_index(const std::string& key,
88 int num_shards) {
89 uint32_t sid = ceph_str_hash_linux(key.c_str(), key.size());
90 uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
91 return rgw_shards_mod(sid2, num_shards);
92 }
93
94 static inline int rgw_shards_max()
95 {
96 return RGW_SHARDS_PRIME_1;
97 }
98
99 static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
100 {
101 if (bucket.marker.empty() || orig_oid.empty()) {
102 oid = orig_oid;
103 } else {
104 oid = bucket.marker;
105 oid.append("_");
106 oid.append(orig_oid);
107 }
108 }
109
110 static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
111 {
112 const rgw_bucket& bucket = obj.bucket;
113 prepend_bucket_marker(bucket, obj.get_oid(), oid);
114 const string& loc = obj.key.get_loc();
115 if (!loc.empty()) {
116 prepend_bucket_marker(bucket, loc, locator);
117 } else {
118 locator.clear();
119 }
120 }
121
122 int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool, librados::IoCtx& ioctx, bool create = false);
123
124 int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
125
126 static inline bool rgw_raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj)
127 {
128 ssize_t pos = raw_obj.oid.find('_');
129 if (pos < 0) {
130 return false;
131 }
132
133 if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) {
134 return false;
135 }
136 obj->bucket = bucket;
137
138 return true;
139 }
140
141
142 struct rgw_bucket_placement {
143 rgw_placement_rule placement_rule;
144 rgw_bucket bucket;
145
146 void dump(Formatter *f) const;
147 };
148
149 class rgw_obj_select {
150 rgw_placement_rule placement_rule;
151 rgw_obj obj;
152 rgw_raw_obj raw_obj;
153 bool is_raw;
154
155 public:
156 rgw_obj_select() : is_raw(false) {}
157 explicit rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {}
158 explicit rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {}
159 rgw_obj_select(const rgw_obj_select& rhs) {
160 placement_rule = rhs.placement_rule;
161 is_raw = rhs.is_raw;
162 if (is_raw) {
163 raw_obj = rhs.raw_obj;
164 } else {
165 obj = rhs.obj;
166 }
167 }
168
169 rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const;
170 rgw_raw_obj get_raw_obj(RGWRados *store) const;
171
172 rgw_obj_select& operator=(const rgw_obj& rhs) {
173 obj = rhs;
174 is_raw = false;
175 return *this;
176 }
177
178 rgw_obj_select& operator=(const rgw_raw_obj& rhs) {
179 raw_obj = rhs;
180 is_raw = true;
181 return *this;
182 }
183
184 void set_placement_rule(const rgw_placement_rule& rule) {
185 placement_rule = rule;
186 }
187 void dump(Formatter *f) const;
188 };
189
190 struct compression_block {
191 uint64_t old_ofs;
192 uint64_t new_ofs;
193 uint64_t len;
194
195 void encode(bufferlist& bl) const {
196 ENCODE_START(1, 1, bl);
197 encode(old_ofs, bl);
198 encode(new_ofs, bl);
199 encode(len, bl);
200 ENCODE_FINISH(bl);
201 }
202
203 void decode(bufferlist::const_iterator& bl) {
204 DECODE_START(1, bl);
205 decode(old_ofs, bl);
206 decode(new_ofs, bl);
207 decode(len, bl);
208 DECODE_FINISH(bl);
209 }
210 void dump(Formatter *f) const;
211 };
212 WRITE_CLASS_ENCODER(compression_block)
213
214 struct RGWCompressionInfo {
215 string compression_type;
216 uint64_t orig_size;
217 vector<compression_block> blocks;
218
219 RGWCompressionInfo() : compression_type("none"), orig_size(0) {}
220 RGWCompressionInfo(const RGWCompressionInfo& cs_info) : compression_type(cs_info.compression_type),
221 orig_size(cs_info.orig_size),
222 blocks(cs_info.blocks) {}
223
224 void encode(bufferlist& bl) const {
225 ENCODE_START(1, 1, bl);
226 encode(compression_type, bl);
227 encode(orig_size, bl);
228 encode(blocks, bl);
229 ENCODE_FINISH(bl);
230 }
231
232 void decode(bufferlist::const_iterator& bl) {
233 DECODE_START(1, bl);
234 decode(compression_type, bl);
235 decode(orig_size, bl);
236 decode(blocks, bl);
237 DECODE_FINISH(bl);
238 }
239 void dump(Formatter *f) const;
240 };
241 WRITE_CLASS_ENCODER(RGWCompressionInfo)
242
243 int rgw_compression_info_from_attrset(map<string, bufferlist>& attrs, bool& need_decompress, RGWCompressionInfo& cs_info);
244
245 struct RGWOLHInfo {
246 rgw_obj target;
247 bool removed;
248
249 RGWOLHInfo() : removed(false) {}
250
251 void encode(bufferlist& bl) const {
252 ENCODE_START(1, 1, bl);
253 encode(target, bl);
254 encode(removed, bl);
255 ENCODE_FINISH(bl);
256 }
257
258 void decode(bufferlist::const_iterator& bl) {
259 DECODE_START(1, bl);
260 decode(target, bl);
261 decode(removed, bl);
262 DECODE_FINISH(bl);
263 }
264 static void generate_test_instances(list<RGWOLHInfo*>& o);
265 void dump(Formatter *f) const;
266 };
267 WRITE_CLASS_ENCODER(RGWOLHInfo)
268
269 struct RGWOLHPendingInfo {
270 ceph::real_time time;
271
272 RGWOLHPendingInfo() {}
273
274 void encode(bufferlist& bl) const {
275 ENCODE_START(1, 1, bl);
276 encode(time, bl);
277 ENCODE_FINISH(bl);
278 }
279
280 void decode(bufferlist::const_iterator& bl) {
281 DECODE_START(1, bl);
282 decode(time, bl);
283 DECODE_FINISH(bl);
284 }
285
286 void dump(Formatter *f) const;
287 };
288 WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
289
290 struct RGWUsageBatch {
291 map<ceph::real_time, rgw_usage_log_entry> m;
292
293 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
294 bool exists = m.find(t) != m.end();
295 *account = !exists;
296 m[t].aggregate(entry);
297 }
298 };
299
300 struct RGWUsageIter {
301 string read_iter;
302 uint32_t index;
303
304 RGWUsageIter() : index(0) {}
305 };
306
307 class RGWGetDataCB {
308 public:
309 virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
310 RGWGetDataCB() {}
311 virtual ~RGWGetDataCB() {}
312 };
313
314 struct RGWCloneRangeInfo {
315 rgw_obj src;
316 off_t src_ofs;
317 off_t dst_ofs;
318 uint64_t len;
319 };
320
321 struct RGWObjManifestPart {
322 rgw_obj loc; /* the object where the data is located */
323 uint64_t loc_ofs; /* the offset at that object where the data is located */
324 uint64_t size; /* the part size */
325
326 RGWObjManifestPart() : loc_ofs(0), size(0) {}
327
328 void encode(bufferlist& bl) const {
329 ENCODE_START(2, 2, bl);
330 encode(loc, bl);
331 encode(loc_ofs, bl);
332 encode(size, bl);
333 ENCODE_FINISH(bl);
334 }
335
336 void decode(bufferlist::const_iterator& bl) {
337 DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
338 decode(loc, bl);
339 decode(loc_ofs, bl);
340 decode(size, bl);
341 DECODE_FINISH(bl);
342 }
343
344 void dump(Formatter *f) const;
345 static void generate_test_instances(list<RGWObjManifestPart*>& o);
346 };
347 WRITE_CLASS_ENCODER(RGWObjManifestPart)
348
349 /*
350 The manifest defines a set of rules for structuring the object parts.
351 There are a few terms to note:
352 - head: the head part of the object, which is the part that contains
353 the first chunk of data. An object might not have a head (as in the
354 case of multipart-part objects).
355 - stripe: data portion of a single rgw object that resides on a single
356 rados object.
357 - part: a collection of stripes that make a contiguous part of an
358 object. A regular object will only have one part (although might have
359 many stripes), a multipart object might have many parts. Each part
360 has a fixed stripe size, although the last stripe of a part might
361 be smaller than that. Consecutive parts may be merged if their stripe
362 value is the same.
363 */
364
365 struct RGWObjManifestRule {
366 uint32_t start_part_num;
367 uint64_t start_ofs;
368 uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */
369 uint64_t stripe_max_size; /* underlying obj max size */
370 string override_prefix;
371
372 RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
373 RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) :
374 start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {}
375
376 void encode(bufferlist& bl) const {
377 ENCODE_START(2, 1, bl);
378 encode(start_part_num, bl);
379 encode(start_ofs, bl);
380 encode(part_size, bl);
381 encode(stripe_max_size, bl);
382 encode(override_prefix, bl);
383 ENCODE_FINISH(bl);
384 }
385
386 void decode(bufferlist::const_iterator& bl) {
387 DECODE_START(2, bl);
388 decode(start_part_num, bl);
389 decode(start_ofs, bl);
390 decode(part_size, bl);
391 decode(stripe_max_size, bl);
392 if (struct_v >= 2)
393 decode(override_prefix, bl);
394 DECODE_FINISH(bl);
395 }
396 void dump(Formatter *f) const;
397 };
398 WRITE_CLASS_ENCODER(RGWObjManifestRule)
399
400 class RGWObjManifest {
401 protected:
402 bool explicit_objs; /* old manifest? */
403 map<uint64_t, RGWObjManifestPart> objs;
404
405 uint64_t obj_size;
406
407 rgw_obj obj;
408 uint64_t head_size;
409 rgw_placement_rule head_placement_rule;
410
411 uint64_t max_head_size;
412 string prefix;
413 rgw_bucket_placement tail_placement; /* might be different than the original bucket,
414 as object might have been copied across pools */
415 map<uint64_t, RGWObjManifestRule> rules;
416
417 string tail_instance; /* tail object's instance */
418
419 void convert_to_explicit(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
420 int append_explicit(RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
421 void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter, string *override_prefix);
422
423 void update_iterators() {
424 begin_iter.seek(0);
425 end_iter.seek(obj_size);
426 }
427 public:
428
429 RGWObjManifest() : explicit_objs(false), obj_size(0), head_size(0), max_head_size(0),
430 begin_iter(this), end_iter(this) {}
431 RGWObjManifest(const RGWObjManifest& rhs) {
432 *this = rhs;
433 }
434 RGWObjManifest& operator=(const RGWObjManifest& rhs) {
435 explicit_objs = rhs.explicit_objs;
436 objs = rhs.objs;
437 obj_size = rhs.obj_size;
438 obj = rhs.obj;
439 head_size = rhs.head_size;
440 max_head_size = rhs.max_head_size;
441 prefix = rhs.prefix;
442 tail_placement = rhs.tail_placement;
443 rules = rhs.rules;
444 tail_instance = rhs.tail_instance;
445
446 begin_iter.set_manifest(this);
447 end_iter.set_manifest(this);
448
449 begin_iter.seek(rhs.begin_iter.get_ofs());
450 end_iter.seek(rhs.end_iter.get_ofs());
451
452 return *this;
453 }
454
455 map<uint64_t, RGWObjManifestPart>& get_explicit_objs() {
456 return objs;
457 }
458
459
460 void set_explicit(uint64_t _size, map<uint64_t, RGWObjManifestPart>& _objs) {
461 explicit_objs = true;
462 obj_size = _size;
463 objs.swap(_objs);
464 }
465
466 void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj_select *location);
467
468 void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) {
469 RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size);
470 rules[0] = rule;
471 max_head_size = tail_ofs;
472 }
473
474 void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) {
475 RGWObjManifestRule rule(0, 0, 0, stripe_max_size);
476 rule.start_part_num = part_num;
477 rules[0] = rule;
478 max_head_size = 0;
479 }
480
481 void encode(bufferlist& bl) const {
482 ENCODE_START(7, 6, bl);
483 encode(obj_size, bl);
484 encode(objs, bl);
485 encode(explicit_objs, bl);
486 encode(obj, bl);
487 encode(head_size, bl);
488 encode(max_head_size, bl);
489 encode(prefix, bl);
490 encode(rules, bl);
491 bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket);
492 encode(encode_tail_bucket, bl);
493 if (encode_tail_bucket) {
494 encode(tail_placement.bucket, bl);
495 }
496 bool encode_tail_instance = (tail_instance != obj.key.instance);
497 encode(encode_tail_instance, bl);
498 if (encode_tail_instance) {
499 encode(tail_instance, bl);
500 }
501 encode(head_placement_rule, bl);
502 encode(tail_placement.placement_rule, bl);
503 ENCODE_FINISH(bl);
504 }
505
506 void decode(bufferlist::const_iterator& bl) {
507 DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl);
508 decode(obj_size, bl);
509 decode(objs, bl);
510 if (struct_v >= 3) {
511 decode(explicit_objs, bl);
512 decode(obj, bl);
513 decode(head_size, bl);
514 decode(max_head_size, bl);
515 decode(prefix, bl);
516 decode(rules, bl);
517 } else {
518 explicit_objs = true;
519 if (!objs.empty()) {
520 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
521 obj = iter->second.loc;
522 head_size = iter->second.size;
523 max_head_size = head_size;
524 }
525 }
526
527 if (explicit_objs && head_size > 0 && !objs.empty()) {
528 /* patch up manifest due to issue 16435:
529 * the first object in the explicit objs list might not be the one we need to access, use the
530 * head object instead if set. This would happen if we had an old object that was created
531 * when the explicit objs manifest was around, and it got copied.
532 */
533 rgw_obj& obj_0 = objs[0].loc;
534 if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) {
535 objs[0].loc = obj;
536 objs[0].size = head_size;
537 }
538 }
539
540 if (struct_v >= 4) {
541 if (struct_v < 6) {
542 decode(tail_placement.bucket, bl);
543 } else {
544 bool need_to_decode;
545 decode(need_to_decode, bl);
546 if (need_to_decode) {
547 decode(tail_placement.bucket, bl);
548 } else {
549 tail_placement.bucket = obj.bucket;
550 }
551 }
552 }
553
554 if (struct_v >= 5) {
555 if (struct_v < 6) {
556 decode(tail_instance, bl);
557 } else {
558 bool need_to_decode;
559 decode(need_to_decode, bl);
560 if (need_to_decode) {
561 decode(tail_instance, bl);
562 } else {
563 tail_instance = obj.key.instance;
564 }
565 }
566 } else { // old object created before 'tail_instance' field added to manifest
567 tail_instance = obj.key.instance;
568 }
569
570 if (struct_v >= 7) {
571 decode(head_placement_rule, bl);
572 decode(tail_placement.placement_rule, bl);
573 }
574
575 update_iterators();
576 DECODE_FINISH(bl);
577 }
578
579 void dump(Formatter *f) const;
580 static void generate_test_instances(list<RGWObjManifest*>& o);
581
582 int append(RGWObjManifest& m, const RGWZoneGroup& zonegroup,
583 const RGWZoneParams& zone_params);
584 int append(RGWObjManifest& m, RGWSI_Zone *zone_svc);
585
586 bool get_rule(uint64_t ofs, RGWObjManifestRule *rule);
587
588 bool empty() {
589 if (explicit_objs)
590 return objs.empty();
591 return rules.empty();
592 }
593
594 bool has_explicit_objs() {
595 return explicit_objs;
596 }
597
598 bool has_tail() {
599 if (explicit_objs) {
600 if (objs.size() == 1) {
601 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
602 rgw_obj& o = iter->second.loc;
603 return !(obj == o);
604 }
605 return (objs.size() >= 2);
606 }
607 return (obj_size > head_size);
608 }
609
610 void set_head(const rgw_placement_rule& placement_rule, const rgw_obj& _o, uint64_t _s) {
611 head_placement_rule = placement_rule;
612 obj = _o;
613 head_size = _s;
614
615 if (explicit_objs && head_size > 0) {
616 objs[0].loc = obj;
617 objs[0].size = head_size;
618 }
619 }
620
621 const rgw_obj& get_obj() {
622 return obj;
623 }
624
625 void set_tail_placement(const rgw_placement_rule& placement_rule, const rgw_bucket& _b) {
626 tail_placement.placement_rule = placement_rule;
627 tail_placement.bucket = _b;
628 }
629
630 const rgw_bucket_placement& get_tail_placement() {
631 return tail_placement;
632 }
633
634 const rgw_placement_rule& get_head_placement_rule() {
635 return head_placement_rule;
636 }
637
638 void set_prefix(const string& _p) {
639 prefix = _p;
640 }
641
642 const string& get_prefix() {
643 return prefix;
644 }
645
646 void set_tail_instance(const string& _ti) {
647 tail_instance = _ti;
648 }
649
650 const string& get_tail_instance() {
651 return tail_instance;
652 }
653
654 void set_head_size(uint64_t _s) {
655 head_size = _s;
656 }
657
658 void set_obj_size(uint64_t s) {
659 obj_size = s;
660
661 update_iterators();
662 }
663
664 uint64_t get_obj_size() {
665 return obj_size;
666 }
667
668 uint64_t get_head_size() {
669 return head_size;
670 }
671
672 uint64_t get_max_head_size() {
673 return max_head_size;
674 }
675
676 class obj_iterator {
677 RGWObjManifest *manifest;
678 uint64_t part_ofs; /* where current part starts */
679 uint64_t stripe_ofs; /* where current stripe starts */
680 uint64_t ofs; /* current position within the object */
681 uint64_t stripe_size; /* current part size */
682
683 int cur_part_id;
684 int cur_stripe;
685 string cur_override_prefix;
686
687 rgw_obj_select location;
688
689 map<uint64_t, RGWObjManifestRule>::iterator rule_iter;
690 map<uint64_t, RGWObjManifestRule>::iterator next_rule_iter;
691
692 map<uint64_t, RGWObjManifestPart>::iterator explicit_iter;
693
694 void init() {
695 part_ofs = 0;
696 stripe_ofs = 0;
697 ofs = 0;
698 stripe_size = 0;
699 cur_part_id = 0;
700 cur_stripe = 0;
701 }
702
703 void update_explicit_pos();
704
705
706 protected:
707
708 void set_manifest(RGWObjManifest *m) {
709 manifest = m;
710 }
711
712 public:
713 obj_iterator() : manifest(NULL) {
714 init();
715 }
716 explicit obj_iterator(RGWObjManifest *_m) : manifest(_m) {
717 init();
718 if (!manifest->empty()) {
719 seek(0);
720 }
721 }
722 obj_iterator(RGWObjManifest *_m, uint64_t _ofs) : manifest(_m) {
723 init();
724 if (!manifest->empty()) {
725 seek(_ofs);
726 }
727 }
728 void seek(uint64_t ofs);
729
730 void operator++();
731 bool operator==(const obj_iterator& rhs) {
732 return (ofs == rhs.ofs);
733 }
734 bool operator!=(const obj_iterator& rhs) {
735 return (ofs != rhs.ofs);
736 }
737 const rgw_obj_select& get_location() {
738 return location;
739 }
740
741 /* start of current stripe */
742 uint64_t get_stripe_ofs() {
743 if (manifest->explicit_objs) {
744 return explicit_iter->first;
745 }
746 return stripe_ofs;
747 }
748
749 /* current ofs relative to start of rgw object */
750 uint64_t get_ofs() const {
751 return ofs;
752 }
753
754 /* stripe number */
755 int get_cur_stripe() const {
756 return cur_stripe;
757 }
758
759 /* current stripe size */
760 uint64_t get_stripe_size() {
761 if (manifest->explicit_objs) {
762 return explicit_iter->second.size;
763 }
764 return stripe_size;
765 }
766
767 /* offset where data starts within current stripe */
768 uint64_t location_ofs() {
769 if (manifest->explicit_objs) {
770 return explicit_iter->second.loc_ofs;
771 }
772 return 0; /* all stripes start at zero offset */
773 }
774
775 void update_location();
776
777 friend class RGWObjManifest;
778 void dump(Formatter *f) const;
779 };
780
781 const obj_iterator& obj_begin();
782 const obj_iterator& obj_end();
783 obj_iterator obj_find(uint64_t ofs);
784
785 obj_iterator begin_iter;
786 obj_iterator end_iter;
787
788 /*
789 * simple object generator. Using a simple single rule manifest.
790 */
791 class generator {
792 RGWObjManifest *manifest;
793 uint64_t last_ofs;
794 uint64_t cur_part_ofs;
795 int cur_part_id;
796 int cur_stripe;
797 uint64_t cur_stripe_size;
798 string cur_oid;
799
800 string oid_prefix;
801
802 rgw_obj_select cur_obj;
803
804 RGWObjManifestRule rule;
805
806 public:
807 generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0),
808 cur_stripe(0), cur_stripe_size(0) {}
809 int create_begin(CephContext *cct, RGWObjManifest *manifest,
810 const rgw_placement_rule& head_placement_rule,
811 const rgw_placement_rule *tail_placement_rule,
812 const rgw_bucket& bucket,
813 const rgw_obj& obj);
814
815 int create_next(uint64_t ofs);
816
817 rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); }
818 rgw_raw_obj get_cur_obj(RGWRados *store) const { return cur_obj.get_raw_obj(store); }
819
820 /* total max size of current stripe (including head obj) */
821 uint64_t cur_stripe_max_size() const {
822 return cur_stripe_size;
823 }
824 };
825 };
826 WRITE_CLASS_ENCODER(RGWObjManifest)
827
828 struct RGWUploadPartInfo {
829 uint32_t num;
830 uint64_t size;
831 uint64_t accounted_size{0};
832 string etag;
833 ceph::real_time modified;
834 RGWObjManifest manifest;
835 RGWCompressionInfo cs_info;
836
837 RGWUploadPartInfo() : num(0), size(0) {}
838
839 void encode(bufferlist& bl) const {
840 ENCODE_START(4, 2, bl);
841 encode(num, bl);
842 encode(size, bl);
843 encode(etag, bl);
844 encode(modified, bl);
845 encode(manifest, bl);
846 encode(cs_info, bl);
847 encode(accounted_size, bl);
848 ENCODE_FINISH(bl);
849 }
850 void decode(bufferlist::const_iterator& bl) {
851 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
852 decode(num, bl);
853 decode(size, bl);
854 decode(etag, bl);
855 decode(modified, bl);
856 if (struct_v >= 3)
857 decode(manifest, bl);
858 if (struct_v >= 4) {
859 decode(cs_info, bl);
860 decode(accounted_size, bl);
861 } else {
862 accounted_size = size;
863 }
864 DECODE_FINISH(bl);
865 }
866 void dump(Formatter *f) const;
867 static void generate_test_instances(list<RGWUploadPartInfo*>& o);
868 };
869 WRITE_CLASS_ENCODER(RGWUploadPartInfo)
870
871 struct RGWObjState {
872 rgw_obj obj;
873 bool is_atomic;
874 bool has_attrs;
875 bool exists;
876 uint64_t size; //< size of raw object
877 uint64_t accounted_size{0}; //< size before compression, encryption
878 ceph::real_time mtime;
879 uint64_t epoch;
880 bufferlist obj_tag;
881 bufferlist tail_tag;
882 string write_tag;
883 bool fake_tag;
884 RGWObjManifest manifest;
885 bool has_manifest;
886 string shadow_obj;
887 bool has_data;
888 bufferlist data;
889 bool prefetch_data;
890 bool keep_tail;
891 bool is_olh;
892 bufferlist olh_tag;
893 uint64_t pg_ver;
894 uint32_t zone_short_id;
895
896 /* important! don't forget to update copy constructor */
897
898 RGWObjVersionTracker objv_tracker;
899
900 map<string, bufferlist> attrset;
901 RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
902 size(0), epoch(0), fake_tag(false), has_manifest(false),
903 has_data(false), prefetch_data(false), keep_tail(false), is_olh(false),
904 pg_ver(0), zone_short_id(0) {}
905 RGWObjState(const RGWObjState& rhs) : obj (rhs.obj) {
906 is_atomic = rhs.is_atomic;
907 has_attrs = rhs.has_attrs;
908 exists = rhs.exists;
909 size = rhs.size;
910 accounted_size = rhs.accounted_size;
911 mtime = rhs.mtime;
912 epoch = rhs.epoch;
913 if (rhs.obj_tag.length()) {
914 obj_tag = rhs.obj_tag;
915 }
916 if (rhs.tail_tag.length()) {
917 tail_tag = rhs.tail_tag;
918 }
919 write_tag = rhs.write_tag;
920 fake_tag = rhs.fake_tag;
921 if (rhs.has_manifest) {
922 manifest = rhs.manifest;
923 }
924 has_manifest = rhs.has_manifest;
925 shadow_obj = rhs.shadow_obj;
926 has_data = rhs.has_data;
927 if (rhs.data.length()) {
928 data = rhs.data;
929 }
930 prefetch_data = rhs.prefetch_data;
931 keep_tail = rhs.keep_tail;
932 is_olh = rhs.is_olh;
933 objv_tracker = rhs.objv_tracker;
934 pg_ver = rhs.pg_ver;
935 }
936
937 bool get_attr(string name, bufferlist& dest) {
938 map<string, bufferlist>::iterator iter = attrset.find(name);
939 if (iter != attrset.end()) {
940 dest = iter->second;
941 return true;
942 }
943 return false;
944 }
945 };
946
947 struct RGWRawObjState {
948 rgw_raw_obj obj;
949 bool has_attrs{false};
950 bool exists{false};
951 uint64_t size{0};
952 ceph::real_time mtime;
953 uint64_t epoch{0};
954 bufferlist obj_tag;
955 bool has_data{false};
956 bufferlist data;
957 bool prefetch_data{false};
958 uint64_t pg_ver{0};
959
960 /* important! don't forget to update copy constructor */
961
962 RGWObjVersionTracker objv_tracker;
963
964 map<string, bufferlist> attrset;
965 RGWRawObjState() {}
966 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
967 has_attrs = rhs.has_attrs;
968 exists = rhs.exists;
969 size = rhs.size;
970 mtime = rhs.mtime;
971 epoch = rhs.epoch;
972 if (rhs.obj_tag.length()) {
973 obj_tag = rhs.obj_tag;
974 }
975 has_data = rhs.has_data;
976 if (rhs.data.length()) {
977 data = rhs.data;
978 }
979 prefetch_data = rhs.prefetch_data;
980 pg_ver = rhs.pg_ver;
981 objv_tracker = rhs.objv_tracker;
982 }
983 };
984
985 struct RGWPoolIterCtx {
986 librados::IoCtx io_ctx;
987 librados::NObjectIterator iter;
988 };
989
990 struct RGWListRawObjsCtx {
991 bool initialized;
992 RGWPoolIterCtx iter_ctx;
993
994 RGWListRawObjsCtx() : initialized(false) {}
995 };
996
997 struct objexp_hint_entry {
998 string tenant;
999 string bucket_name;
1000 string bucket_id;
1001 rgw_obj_key obj_key;
1002 ceph::real_time exp_time;
1003
1004 void encode(bufferlist& bl) const {
1005 ENCODE_START(2, 1, bl);
1006 encode(bucket_name, bl);
1007 encode(bucket_id, bl);
1008 encode(obj_key, bl);
1009 encode(exp_time, bl);
1010 encode(tenant, bl);
1011 ENCODE_FINISH(bl);
1012 }
1013
1014 void decode(bufferlist::const_iterator& bl) {
1015 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
1016 DECODE_START(2, bl);
1017 decode(bucket_name, bl);
1018 decode(bucket_id, bl);
1019 decode(obj_key, bl);
1020 decode(exp_time, bl);
1021 if (struct_v >= 2) {
1022 decode(tenant, bl);
1023 } else {
1024 tenant.clear();
1025 }
1026 DECODE_FINISH(bl);
1027 }
1028 };
1029 WRITE_CLASS_ENCODER(objexp_hint_entry)
1030
1031 class RGWDataChangesLog;
1032 class RGWMetaSyncStatusManager;
1033 class RGWDataSyncStatusManager;
1034 class RGWCoroutinesManagerRegistry;
1035
1036 class RGWGetBucketStats_CB : public RefCountedObject {
1037 protected:
1038 rgw_bucket bucket;
1039 map<RGWObjCategory, RGWStorageStats> *stats;
1040 public:
1041 explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
1042 ~RGWGetBucketStats_CB() override {}
1043 virtual void handle_response(int r) = 0;
1044 virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
1045 stats = _stats;
1046 }
1047 };
1048
1049 class RGWGetUserStats_CB : public RefCountedObject {
1050 protected:
1051 rgw_user user;
1052 RGWStorageStats stats;
1053 public:
1054 explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
1055 ~RGWGetUserStats_CB() override {}
1056 virtual void handle_response(int r) = 0;
1057 virtual void set_response(RGWStorageStats& _stats) {
1058 stats = _stats;
1059 }
1060 };
1061
1062 class RGWGetDirHeader_CB;
1063 class RGWGetUserHeader_CB;
1064
1065 class RGWObjectCtx {
1066 RGWRados *store;
1067 RWLock lock{"RGWObjectCtx"};
1068 void *s{nullptr};
1069
1070 std::map<rgw_obj, RGWObjState> objs_state;
1071 public:
1072 explicit RGWObjectCtx(RGWRados *_store) : store(_store) {}
1073 explicit RGWObjectCtx(RGWRados *_store, void *_s) : store(_store), s(_s) {}
1074
1075 void *get_private() {
1076 return s;
1077 }
1078
1079 RGWRados *get_store() {
1080 return store;
1081 }
1082
1083 RGWObjState *get_state(const rgw_obj& obj) {
1084 RGWObjState *result;
1085 typename std::map<rgw_obj, RGWObjState>::iterator iter;
1086 lock.get_read();
1087 assert (!obj.empty());
1088 iter = objs_state.find(obj);
1089 if (iter != objs_state.end()) {
1090 result = &iter->second;
1091 lock.unlock();
1092 } else {
1093 lock.unlock();
1094 lock.get_write();
1095 result = &objs_state[obj];
1096 lock.unlock();
1097 }
1098 return result;
1099 }
1100
1101 void set_atomic(rgw_obj& obj) {
1102 RWLock::WLocker wl(lock);
1103 assert (!obj.empty());
1104 objs_state[obj].is_atomic = true;
1105 }
1106 void set_prefetch_data(const rgw_obj& obj) {
1107 RWLock::WLocker wl(lock);
1108 assert (!obj.empty());
1109 objs_state[obj].prefetch_data = true;
1110 }
1111
1112 void invalidate(const rgw_obj& obj) {
1113 RWLock::WLocker wl(lock);
1114 auto iter = objs_state.find(obj);
1115 if (iter == objs_state.end()) {
1116 return;
1117 }
1118 bool is_atomic = iter->second.is_atomic;
1119 bool prefetch_data = iter->second.prefetch_data;
1120
1121 objs_state.erase(iter);
1122
1123 if (is_atomic || prefetch_data) {
1124 auto& state = objs_state[obj];
1125 state.is_atomic = is_atomic;
1126 state.prefetch_data = prefetch_data;
1127 }
1128 }
1129 };
1130
1131 class RGWAsyncRadosProcessor;
1132
1133 template <class T>
1134 class RGWChainedCacheImpl;
1135
1136 struct bucket_info_entry {
1137 RGWBucketInfo info;
1138 real_time mtime;
1139 map<string, bufferlist> attrs;
1140 };
1141
1142 struct tombstone_entry {
1143 ceph::real_time mtime;
1144 uint32_t zone_short_id;
1145 uint64_t pg_ver;
1146
1147 tombstone_entry() = default;
1148 explicit tombstone_entry(const RGWObjState& state)
1149 : mtime(state.mtime), zone_short_id(state.zone_short_id),
1150 pg_ver(state.pg_ver) {}
1151 };
1152
1153 class RGWIndexCompletionManager;
1154
1155 class RGWRados : public AdminSocketHook
1156 {
1157 friend class RGWGC;
1158 friend class RGWMetaNotifier;
1159 friend class RGWDataNotifier;
1160 friend class RGWLC;
1161 friend class RGWObjectExpirer;
1162 friend class RGWMetaSyncProcessorThread;
1163 friend class RGWDataSyncProcessorThread;
1164 friend class RGWReshard;
1165 friend class RGWBucketReshard;
1166 friend class RGWBucketReshardLock;
1167 friend class BucketIndexLockGuard;
1168 friend class RGWCompleteMultipart;
1169
1170 static constexpr const char* admin_commands[4][3] = {
1171 { "cache list",
1172 "cache list name=filter,type=CephString,req=false",
1173 "cache list [filter_str]: list object cache, possibly matching substrings" },
1174 { "cache inspect",
1175 "cache inspect name=target,type=CephString,req=true",
1176 "cache inspect target: print cache element" },
1177 { "cache erase",
1178 "cache erase name=target,type=CephString,req=true",
1179 "cache erase target: erase element from cache" },
1180 { "cache zap",
1181 "cache zap",
1182 "cache zap: erase all elements from cache" }
1183 };
1184
1185 /** Open the pool used as root for this gateway */
1186 int open_root_pool_ctx();
1187 int open_gc_pool_ctx();
1188 int open_lc_pool_ctx();
1189 int open_objexp_pool_ctx();
1190 int open_reshard_pool_ctx();
1191
1192 int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx);
1193 int open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx);
1194 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid);
1195 int open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1196 string& bucket_oid_base);
1197 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1198 const string& obj_key, string *bucket_obj, int *shard_id);
1199 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1200 int shard_id, string *bucket_obj);
1201 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1202 map<int, string>& bucket_objs, int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
1203 template<typename T>
1204 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1205 map<int, string>& oids, map<int, T>& bucket_objs,
1206 int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
1207 void build_bucket_index_marker(const string& shard_id_str, const string& shard_marker,
1208 string *marker);
1209
1210 void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result);
1211
1212 std::atomic<int64_t> max_req_id = { 0 };
1213 Mutex lock;
1214 SafeTimer *timer;
1215
1216 RGWGC *gc;
1217 RGWLC *lc;
1218 RGWObjectExpirer *obj_expirer;
1219 bool use_gc_thread;
1220 bool use_lc_thread;
1221 bool quota_threads;
1222 bool run_sync_thread;
1223 bool run_reshard_thread;
1224
1225 RGWAsyncRadosProcessor* async_rados;
1226
1227 RGWMetaNotifier *meta_notifier;
1228 RGWDataNotifier *data_notifier;
1229 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
1230 RGWSyncTraceManager *sync_tracer = nullptr;
1231 map<string, RGWDataSyncProcessorThread *> data_sync_processor_threads;
1232
1233 boost::optional<rgw::BucketTrimManager> bucket_trim;
1234 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
1235
1236 Mutex meta_sync_thread_lock;
1237 Mutex data_sync_thread_lock;
1238
1239 librados::IoCtx root_pool_ctx; // .rgw
1240
1241 double inject_notify_timeout_probability = 0;
1242 unsigned max_notify_retries = 0;
1243
1244 friend class RGWWatcher;
1245
1246 Mutex bucket_id_lock;
1247
1248 // This field represents the number of bucket index object shards
1249 uint32_t bucket_index_max_shards;
1250
1251 int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
1252 int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
1253 int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
1254 uint64_t max_bucket_id;
1255
1256 int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
1257 RGWObjState *olh_state, RGWObjState **target_state);
1258 int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
1259 bool follow_olh, bool assume_noent = false);
1260 int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
1261 librados::ObjectOperation& op, RGWObjState **state);
1262 int append_atomic_test(const RGWObjState* astate, librados::ObjectOperation& op);
1263
1264 int update_placement_map();
1265 int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
1266
1267 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
1268 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
1269 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
1270 protected:
1271 CephContext *cct;
1272
1273 std::vector<librados::Rados> rados;
1274 uint32_t next_rados_handle;
1275 RWLock handle_lock;
1276 std::map<pthread_t, int> rados_map;
1277
1278 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
1279 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
1280
1281 using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
1282 tombstone_cache_t *obj_tombstone_cache;
1283
1284 librados::IoCtx gc_pool_ctx; // .rgw.gc
1285 librados::IoCtx lc_pool_ctx; // .rgw.lc
1286 librados::IoCtx objexp_pool_ctx;
1287 librados::IoCtx reshard_pool_ctx;
1288
1289 bool pools_initialized;
1290
1291 RGWQuotaHandler *quota_handler;
1292
1293 RGWCoroutinesManagerRegistry *cr_registry;
1294
1295 RGWSyncModuleInstanceRef sync_module;
1296 bool writeable_zone{false};
1297
1298 RGWIndexCompletionManager *index_completion_manager{nullptr};
1299
1300 bool use_cache{false};
1301 public:
1302 RGWRados(): lock("rados_timer_lock"), timer(NULL),
1303 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
1304 run_sync_thread(false), run_reshard_thread(false), async_rados(nullptr), meta_notifier(NULL),
1305 data_notifier(NULL), meta_sync_processor_thread(NULL),
1306 meta_sync_thread_lock("meta_sync_thread_lock"), data_sync_thread_lock("data_sync_thread_lock"),
1307 bucket_id_lock("rados_bucket_id"),
1308 bucket_index_max_shards(0),
1309 max_bucket_id(0), cct(NULL),
1310 next_rados_handle(0),
1311 handle_lock("rados_handle_lock"),
1312 binfo_cache(NULL), obj_tombstone_cache(nullptr),
1313 pools_initialized(false),
1314 quota_handler(NULL),
1315 cr_registry(NULL),
1316 meta_mgr(NULL), data_log(NULL), reshard(NULL) {}
1317
1318 RGWRados& set_use_cache(bool status) {
1319 use_cache = status;
1320 return *this;
1321 }
1322
1323 RGWLC *get_lc() {
1324 return lc;
1325 }
1326
1327 RGWRados& set_run_gc_thread(bool _use_gc_thread) {
1328 use_gc_thread = _use_gc_thread;
1329 return *this;
1330 }
1331
1332 RGWRados& set_run_lc_thread(bool _use_lc_thread) {
1333 use_lc_thread = _use_lc_thread;
1334 return *this;
1335 }
1336
1337 RGWRados& set_run_quota_threads(bool _run_quota_threads) {
1338 quota_threads = _run_quota_threads;
1339 return *this;
1340 }
1341
1342 RGWRados& set_run_sync_thread(bool _run_sync_thread) {
1343 run_sync_thread = _run_sync_thread;
1344 return *this;
1345 }
1346
1347 RGWRados& set_run_reshard_thread(bool _run_reshard_thread) {
1348 run_reshard_thread = _run_reshard_thread;
1349 return *this;
1350 }
1351
1352 uint64_t get_new_req_id() {
1353 return ++max_req_id;
1354 }
1355
1356 librados::IoCtx* get_lc_pool_ctx() {
1357 return &lc_pool_ctx;
1358 }
1359 void set_context(CephContext *_cct) {
1360 cct = _cct;
1361 }
1362
1363 RGWServices svc;
1364
1365 /**
1366 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
1367 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
1368 */
1369 string host_id;
1370
1371 // pulls missing periods for period_history
1372 std::unique_ptr<RGWPeriodPuller> period_puller;
1373 // maintains a connected history of periods
1374 std::unique_ptr<RGWPeriodHistory> period_history;
1375
1376 RGWAsyncRadosProcessor* get_async_rados() const { return async_rados; };
1377
1378 RGWMetadataManager *meta_mgr;
1379
1380 RGWDataChangesLog *data_log;
1381
1382 RGWReshard *reshard;
1383 std::shared_ptr<RGWReshardWait> reshard_wait;
1384
1385 virtual ~RGWRados() = default;
1386
1387 tombstone_cache_t *get_tombstone_cache() {
1388 return obj_tombstone_cache;
1389 }
1390 const RGWSyncModuleInstanceRef& get_sync_module() {
1391 return sync_module;
1392 }
1393 RGWSyncTraceManager *get_sync_tracer() {
1394 return sync_tracer;
1395 }
1396
1397 int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
1398 void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size);
1399 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
1400 int get_max_chunk_size(const rgw_placement_rule& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
1401
1402 uint32_t get_max_bucket_shards() {
1403 return rgw_shards_max();
1404 }
1405
1406
1407 int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
1408
1409 int list_raw_objects_init(const rgw_pool& pool, const string& marker, RGWListRawObjsCtx *ctx);
1410 int list_raw_objects_next(const string& prefix_filter, int max,
1411 RGWListRawObjsCtx& ctx, list<string>& oids,
1412 bool *is_truncated);
1413 int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
1414 RGWListRawObjsCtx& ctx, list<string>& oids,
1415 bool *is_truncated);
1416 string list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx);
1417
1418 CephContext *ctx() { return cct; }
1419 /** do all necessary setup of the storage device */
1420 int initialize(CephContext *_cct) {
1421 set_context(_cct);
1422 return initialize();
1423 }
1424 /** Initialize the RADOS instance and prepare to do other ops */
1425 int init_svc(bool raw);
1426 int init_rados();
1427 int init_complete();
1428 int initialize();
1429 void finalize();
1430
1431 int register_to_service_map(const string& daemon_type, const map<string, string>& meta);
1432 int update_service_map(std::map<std::string, std::string>&& status);
1433
1434 /// list logs
1435 int log_list_init(const string& prefix, RGWAccessHandle *handle);
1436 int log_list_next(RGWAccessHandle handle, string *name);
1437
1438 /// remove log
1439 int log_remove(const string& name);
1440
1441 /// show log
1442 int log_show_init(const string& name, RGWAccessHandle *handle);
1443 int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
1444
1445 // log bandwidth info
1446 int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
1447 int read_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch,
1448 uint32_t max_entries, bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket,
1449 rgw_usage_log_entry>& usage);
1450 int trim_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch);
1451 int clear_usage();
1452
1453 int create_pool(const rgw_pool& pool);
1454
1455 int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
1456 int clean_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
1457 void create_bucket_id(string *bucket_id);
1458
1459 bool get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool);
1460 bool obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
1461
1462 int create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket,
1463 const string& zonegroup_id,
1464 const rgw_placement_rule& placement_rule,
1465 const string& swift_ver_location,
1466 const RGWQuotaInfo * pquota_info,
1467 map<std::string,bufferlist>& attrs,
1468 RGWBucketInfo& bucket_info,
1469 obj_version *pobjv,
1470 obj_version *pep_objv,
1471 ceph::real_time creation_time,
1472 rgw_bucket *master_bucket,
1473 uint32_t *master_num_shards,
1474 bool exclusive = true);
1475
1476 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
1477
1478 struct BucketShard {
1479 RGWRados *store;
1480 rgw_bucket bucket;
1481 int shard_id;
1482 librados::IoCtx index_ctx;
1483 string bucket_obj;
1484
1485 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
1486 int init(const rgw_bucket& _bucket, const rgw_obj& obj, RGWBucketInfo* out);
1487 int init(const rgw_bucket& _bucket, int sid, RGWBucketInfo* out);
1488 int init(const RGWBucketInfo& bucket_info, const rgw_obj& obj);
1489 int init(const RGWBucketInfo& bucket_info, int sid);
1490 };
1491
1492 class Object {
1493 RGWRados *store;
1494 RGWBucketInfo bucket_info;
1495 RGWObjectCtx& ctx;
1496 rgw_obj obj;
1497
1498 BucketShard bs;
1499
1500 RGWObjState *state;
1501
1502 bool versioning_disabled;
1503
1504 bool bs_initialized;
1505
1506 protected:
1507 int get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent = false);
1508 void invalidate_state();
1509
1510 int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
1511 const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail);
1512 int complete_atomic_modification();
1513
1514 public:
1515 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
1516 ctx(_ctx), obj(_obj), bs(store),
1517 state(NULL), versioning_disabled(false),
1518 bs_initialized(false) {}
1519
1520 RGWRados *get_store() { return store; }
1521 rgw_obj& get_obj() { return obj; }
1522 RGWObjectCtx& get_ctx() { return ctx; }
1523 RGWBucketInfo& get_bucket_info() { return bucket_info; }
1524 int get_manifest(RGWObjManifest **pmanifest);
1525
1526 int get_bucket_shard(BucketShard **pbs) {
1527 if (!bs_initialized) {
1528 int r =
1529 bs.init(bucket_info.bucket, obj, nullptr /* no RGWBucketInfo */);
1530 if (r < 0) {
1531 return r;
1532 }
1533 bs_initialized = true;
1534 }
1535 *pbs = &bs;
1536 return 0;
1537 }
1538
1539 void set_versioning_disabled(bool status) {
1540 versioning_disabled = status;
1541 }
1542
1543 bool versioning_enabled() {
1544 return (!versioning_disabled && bucket_info.versioning_enabled());
1545 }
1546
1547 struct Read {
1548 RGWRados::Object *source;
1549
1550 struct GetObjState {
1551 map<rgw_pool, librados::IoCtx> io_ctxs;
1552 rgw_pool cur_pool;
1553 librados::IoCtx *cur_ioctx{nullptr};
1554 rgw_obj obj;
1555 rgw_raw_obj head_obj;
1556 } state;
1557
1558 struct ConditionParams {
1559 const ceph::real_time *mod_ptr;
1560 const ceph::real_time *unmod_ptr;
1561 bool high_precision_time;
1562 uint32_t mod_zone_id;
1563 uint64_t mod_pg_ver;
1564 const char *if_match;
1565 const char *if_nomatch;
1566
1567 ConditionParams() :
1568 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
1569 if_match(NULL), if_nomatch(NULL) {}
1570 } conds;
1571
1572 struct Params {
1573 ceph::real_time *lastmod;
1574 uint64_t *obj_size;
1575 map<string, bufferlist> *attrs;
1576
1577 Params() : lastmod(NULL), obj_size(NULL), attrs(NULL) {}
1578 } params;
1579
1580 explicit Read(RGWRados::Object *_source) : source(_source) {}
1581
1582 int prepare();
1583 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
1584 int read(int64_t ofs, int64_t end, bufferlist& bl);
1585 int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb);
1586 int get_attr(const char *name, bufferlist& dest);
1587 };
1588
1589 struct Write {
1590 RGWRados::Object *target;
1591
1592 struct MetaParams {
1593 ceph::real_time *mtime;
1594 map<std::string, bufferlist>* rmattrs;
1595 const bufferlist *data;
1596 RGWObjManifest *manifest;
1597 const string *ptag;
1598 list<rgw_obj_index_key> *remove_objs;
1599 ceph::real_time set_mtime;
1600 rgw_user owner;
1601 RGWObjCategory category;
1602 int flags;
1603 const char *if_match;
1604 const char *if_nomatch;
1605 std::optional<uint64_t> olh_epoch;
1606 ceph::real_time delete_at;
1607 bool canceled;
1608 const string *user_data;
1609 rgw_zone_set *zones_trace;
1610 bool modify_tail;
1611 bool completeMultipart;
1612 bool appendable;
1613
1614 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
1615 remove_objs(NULL), category(RGWObjCategory::Main), flags(0),
1616 if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr),
1617 modify_tail(false), completeMultipart(false), appendable(false) {}
1618 } meta;
1619
1620 explicit Write(RGWRados::Object *_target) : target(_target) {}
1621
1622 int _do_write_meta(uint64_t size, uint64_t accounted_size,
1623 map<std::string, bufferlist>& attrs,
1624 bool modify_tail, bool assume_noent,
1625 void *index_op);
1626 int write_meta(uint64_t size, uint64_t accounted_size,
1627 map<std::string, bufferlist>& attrs);
1628 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
1629 const req_state* get_req_state() {
1630 return (req_state *)target->get_ctx().get_private();
1631 }
1632 };
1633
1634 struct Delete {
1635 RGWRados::Object *target;
1636
1637 struct DeleteParams {
1638 rgw_user bucket_owner;
1639 int versioning_status;
1640 ACLOwner obj_owner; /* needed for creation of deletion marker */
1641 uint64_t olh_epoch;
1642 string marker_version_id;
1643 uint32_t bilog_flags;
1644 list<rgw_obj_index_key> *remove_objs;
1645 ceph::real_time expiration_time;
1646 ceph::real_time unmod_since;
1647 ceph::real_time mtime; /* for setting delete marker mtime */
1648 bool high_precision_time;
1649 rgw_zone_set *zones_trace;
1650
1651 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr) {}
1652 } params;
1653
1654 struct DeleteResult {
1655 bool delete_marker;
1656 string version_id;
1657
1658 DeleteResult() : delete_marker(false) {}
1659 } result;
1660
1661 explicit Delete(RGWRados::Object *_target) : target(_target) {}
1662
1663 int delete_obj();
1664 };
1665
1666 struct Stat {
1667 RGWRados::Object *source;
1668
1669 struct Result {
1670 rgw_obj obj;
1671 RGWObjManifest manifest;
1672 bool has_manifest;
1673 uint64_t size;
1674 struct timespec mtime;
1675 map<string, bufferlist> attrs;
1676
1677 Result() : has_manifest(false), size(0) {}
1678 } result;
1679
1680 struct State {
1681 librados::IoCtx io_ctx;
1682 librados::AioCompletion *completion;
1683 int ret;
1684
1685 State() : completion(NULL), ret(0) {}
1686 } state;
1687
1688
1689 explicit Stat(RGWRados::Object *_source) : source(_source) {}
1690
1691 int stat_async();
1692 int wait();
1693 int stat();
1694 private:
1695 int finish();
1696 };
1697 };
1698
1699 class Bucket {
1700 RGWRados *store;
1701 RGWBucketInfo bucket_info;
1702 rgw_bucket& bucket;
1703 int shard_id;
1704
1705 public:
1706 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
1707 shard_id(RGW_NO_SHARD) {}
1708 RGWRados *get_store() { return store; }
1709 rgw_bucket& get_bucket() { return bucket; }
1710 RGWBucketInfo& get_bucket_info() { return bucket_info; }
1711
1712 int update_bucket_id(const string& new_bucket_id);
1713
1714 int get_shard_id() { return shard_id; }
1715 void set_shard_id(int id) {
1716 shard_id = id;
1717 }
1718
1719 class UpdateIndex {
1720 RGWRados::Bucket *target;
1721 string optag;
1722 rgw_obj obj;
1723 uint16_t bilog_flags{0};
1724 BucketShard bs;
1725 bool bs_initialized{false};
1726 bool blind;
1727 bool prepared{false};
1728 rgw_zone_set *zones_trace{nullptr};
1729
1730 int init_bs() {
1731 int r =
1732 bs.init(target->get_bucket(), obj, nullptr /* no RGWBucketInfo */);
1733 if (r < 0) {
1734 return r;
1735 }
1736 bs_initialized = true;
1737 return 0;
1738 }
1739
1740 void invalidate_bs() {
1741 bs_initialized = false;
1742 }
1743
1744 int guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call);
1745 public:
1746
1747 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
1748 bs(target->get_store()) {
1749 blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
1750 }
1751
1752 int get_bucket_shard(BucketShard **pbs) {
1753 if (!bs_initialized) {
1754 int r = init_bs();
1755 if (r < 0) {
1756 return r;
1757 }
1758 }
1759 *pbs = &bs;
1760 return 0;
1761 }
1762
1763 void set_bilog_flags(uint16_t flags) {
1764 bilog_flags = flags;
1765 }
1766
1767 void set_zones_trace(rgw_zone_set *_zones_trace) {
1768 zones_trace = _zones_trace;
1769 }
1770
1771 int prepare(RGWModifyOp, const string *write_tag);
1772 int complete(int64_t poolid, uint64_t epoch, uint64_t size,
1773 uint64_t accounted_size, ceph::real_time& ut,
1774 const string& etag, const string& content_type,
1775 const string& storage_class,
1776 bufferlist *acl_bl, RGWObjCategory category,
1777 list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr, bool appendable = false);
1778 int complete_del(int64_t poolid, uint64_t epoch,
1779 ceph::real_time& removed_mtime, /* mtime of removed object */
1780 list<rgw_obj_index_key> *remove_objs);
1781 int cancel();
1782
1783 const string *get_optag() { return &optag; }
1784
1785 bool is_prepared() { return prepared; }
1786 }; // class UpdateIndex
1787
1788 class List {
1789 protected:
1790
1791 RGWRados::Bucket *target;
1792 rgw_obj_key next_marker;
1793
1794 int list_objects_ordered(int64_t max,
1795 vector<rgw_bucket_dir_entry> *result,
1796 map<string, bool> *common_prefixes,
1797 bool *is_truncated);
1798 int list_objects_unordered(int64_t max,
1799 vector<rgw_bucket_dir_entry> *result,
1800 map<string, bool> *common_prefixes,
1801 bool *is_truncated);
1802
1803 public:
1804
1805 struct Params {
1806 string prefix;
1807 string delim;
1808 rgw_obj_key marker;
1809 rgw_obj_key end_marker;
1810 string ns;
1811 bool enforce_ns;
1812 RGWAccessListFilter *filter;
1813 bool list_versions;
1814 bool allow_unordered;
1815
1816 Params() :
1817 enforce_ns(true),
1818 filter(NULL),
1819 list_versions(false),
1820 allow_unordered(false)
1821 {}
1822 } params;
1823
1824 explicit List(RGWRados::Bucket *_target) : target(_target) {}
1825
1826 int list_objects(int64_t max,
1827 vector<rgw_bucket_dir_entry> *result,
1828 map<string, bool> *common_prefixes,
1829 bool *is_truncated) {
1830 if (params.allow_unordered) {
1831 return list_objects_unordered(max, result, common_prefixes,
1832 is_truncated);
1833 } else {
1834 return list_objects_ordered(max, result, common_prefixes,
1835 is_truncated);
1836 }
1837 }
1838 rgw_obj_key& get_next_marker() {
1839 return next_marker;
1840 }
1841 }; // class List
1842 }; // class Bucket
1843
1844 int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
1845 const std::string& obj_prefix,
1846 const std::string& obj_delim,
1847 std::function<int(const rgw_bucket_dir_entry&)> handler);
1848
1849 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
1850 return bucket_info.has_swift_versioning() &&
1851 bucket_info.swift_ver_location.size();
1852 }
1853
1854 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
1855 const rgw_user& user, /* in */
1856 RGWBucketInfo& bucket_info, /* in */
1857 rgw_obj& obj); /* in */
1858 int swift_versioning_restore(RGWSysObjectCtx& sysobj_ctx,
1859 RGWObjectCtx& obj_ctx, /* in/out */
1860 const rgw_user& user, /* in */
1861 RGWBucketInfo& bucket_info, /* in */
1862 rgw_obj& obj, /* in */
1863 bool& restored); /* out */
1864 int copy_obj_to_remote_dest(RGWObjState *astate,
1865 map<string, bufferlist>& src_attrs,
1866 RGWRados::Object::Read& read_op,
1867 const rgw_user& user_id,
1868 rgw_obj& dest_obj,
1869 ceph::real_time *mtime);
1870
1871 enum AttrsMod {
1872 ATTRSMOD_NONE = 0,
1873 ATTRSMOD_REPLACE = 1,
1874 ATTRSMOD_MERGE = 2
1875 };
1876
1877 int rewrite_obj(RGWBucketInfo& dest_bucket_info, const rgw_obj& obj);
1878
1879 int stat_remote_obj(RGWObjectCtx& obj_ctx,
1880 const rgw_user& user_id,
1881 req_info *info,
1882 const string& source_zone,
1883 rgw_obj& src_obj,
1884 RGWBucketInfo& src_bucket_info,
1885 real_time *src_mtime,
1886 uint64_t *psize,
1887 const real_time *mod_ptr,
1888 const real_time *unmod_ptr,
1889 bool high_precision_time,
1890 const char *if_match,
1891 const char *if_nomatch,
1892 map<string, bufferlist> *pattrs,
1893 map<string, string> *pheaders,
1894 string *version_id,
1895 string *ptag,
1896 string *petag);
1897
1898 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
1899 const rgw_user& user_id,
1900 req_info *info,
1901 const string& source_zone,
1902 const rgw_obj& dest_obj,
1903 const rgw_obj& src_obj,
1904 RGWBucketInfo& dest_bucket_info,
1905 RGWBucketInfo& src_bucket_info,
1906 std::optional<rgw_placement_rule> dest_placement,
1907 ceph::real_time *src_mtime,
1908 ceph::real_time *mtime,
1909 const ceph::real_time *mod_ptr,
1910 const ceph::real_time *unmod_ptr,
1911 bool high_precision_time,
1912 const char *if_match,
1913 const char *if_nomatch,
1914 AttrsMod attrs_mod,
1915 bool copy_if_newer,
1916 map<string, bufferlist>& attrs,
1917 RGWObjCategory category,
1918 std::optional<uint64_t> olh_epoch,
1919 ceph::real_time delete_at,
1920 string *ptag,
1921 string *petag,
1922 void (*progress_cb)(off_t, void *),
1923 void *progress_data,
1924 rgw_zone_set *zones_trace= nullptr,
1925 std::optional<uint64_t>* bytes_transferred = 0);
1926 /**
1927 * Copy an object.
1928 * dest_obj: the object to copy into
1929 * src_obj: the object to copy from
1930 * attrs: usage depends on attrs_mod parameter
1931 * attrs_mod: the modification mode of the attrs, may have the following values:
1932 * ATTRSMOD_NONE - the attributes of the source object will be
1933 * copied without modifications, attrs parameter is ignored;
1934 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
1935 * parameter, source object attributes are not copied;
1936 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
1937 * are overwritten by values contained in attrs parameter.
1938 * Returns: 0 on success, -ERR# otherwise.
1939 */
1940 int copy_obj(RGWObjectCtx& obj_ctx,
1941 const rgw_user& user_id,
1942 req_info *info,
1943 const string& source_zone,
1944 rgw_obj& dest_obj,
1945 rgw_obj& src_obj,
1946 RGWBucketInfo& dest_bucket_info,
1947 RGWBucketInfo& src_bucket_info,
1948 const rgw_placement_rule& dest_placement,
1949 ceph::real_time *src_mtime,
1950 ceph::real_time *mtime,
1951 const ceph::real_time *mod_ptr,
1952 const ceph::real_time *unmod_ptr,
1953 bool high_precision_time,
1954 const char *if_match,
1955 const char *if_nomatch,
1956 AttrsMod attrs_mod,
1957 bool copy_if_newer,
1958 map<std::string, bufferlist>& attrs,
1959 RGWObjCategory category,
1960 uint64_t olh_epoch,
1961 ceph::real_time delete_at,
1962 string *version_id,
1963 string *ptag,
1964 string *petag,
1965 void (*progress_cb)(off_t, void *),
1966 void *progress_data);
1967
1968 int copy_obj_data(RGWObjectCtx& obj_ctx,
1969 RGWBucketInfo& dest_bucket_info,
1970 const rgw_placement_rule& dest_placement,
1971 RGWRados::Object::Read& read_op, off_t end,
1972 const rgw_obj& dest_obj,
1973 ceph::real_time *mtime,
1974 ceph::real_time set_mtime,
1975 map<string, bufferlist>& attrs,
1976 uint64_t olh_epoch,
1977 ceph::real_time delete_at,
1978 string *petag);
1979
1980 int transition_obj(RGWObjectCtx& obj_ctx,
1981 RGWBucketInfo& bucket_info,
1982 rgw_obj& obj,
1983 const rgw_placement_rule& placement_rule,
1984 const real_time& mtime,
1985 uint64_t olh_epoch);
1986
1987 int check_bucket_empty(RGWBucketInfo& bucket_info);
1988
1989 /**
1990 * Delete a bucket.
1991 * bucket: the name of the bucket to delete
1992 * Returns 0 on success, -ERR# otherwise.
1993 */
1994 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty = true);
1995
1996 void wakeup_meta_sync_shards(set<int>& shard_ids);
1997 void wakeup_data_sync_shards(const string& source_zone, map<int, set<string> >& shard_ids);
1998
1999 RGWMetaSyncStatusManager* get_meta_sync_manager();
2000 RGWDataSyncStatusManager* get_data_sync_manager(const std::string& source_zone);
2001
2002 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
2003 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
2004 int bucket_suspended(rgw_bucket& bucket, bool *suspended);
2005
2006 /** Delete an object.*/
2007 int delete_obj(RGWObjectCtx& obj_ctx,
2008 const RGWBucketInfo& bucket_owner,
2009 const rgw_obj& src_obj,
2010 int versioning_status,
2011 uint16_t bilog_flags = 0,
2012 const ceph::real_time& expiration_time = ceph::real_time(),
2013 rgw_zone_set *zones_trace = nullptr);
2014
2015 int delete_raw_obj(const rgw_raw_obj& obj);
2016
2017 /** Remove an object from the bucket index */
2018 int delete_obj_index(const rgw_obj& obj);
2019
2020 /**
2021 * Set an attr on an object.
2022 * bucket: name of the bucket holding the object
2023 * obj: name of the object to set the attr on
2024 * name: the attr to set
2025 * bl: the contents of the attr
2026 * Returns: 0 on success, -ERR# otherwise.
2027 */
2028 int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
2029
2030 int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
2031 map<string, bufferlist>& attrs,
2032 map<string, bufferlist>* rmattrs);
2033
2034 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
2035 bool follow_olh, bool assume_noent = false);
2036 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state) {
2037 return get_obj_state(rctx, bucket_info, obj, state, true);
2038 }
2039
2040 using iterate_obj_cb = int (*)(const rgw_raw_obj&, off_t, off_t,
2041 off_t, bool, RGWObjState*, void*);
2042
2043 int iterate_obj(RGWObjectCtx& ctx, const RGWBucketInfo& bucket_info,
2044 const rgw_obj& obj, off_t ofs, off_t end,
2045 uint64_t max_chunk_size, iterate_obj_cb cb, void *arg);
2046
2047 int flush_read_list(struct get_obj_data *d);
2048
2049 int get_obj_iterate_cb(const rgw_raw_obj& read_obj, off_t obj_ofs,
2050 off_t read_ofs, off_t len, bool is_head_obj,
2051 RGWObjState *astate, void *arg);
2052
2053 void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
2054
2055 /**
2056 * a simple object read without keeping state
2057 */
2058
2059 int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
2060 map<string, bufferlist> *attrs, bufferlist *first_chunk,
2061 RGWObjVersionTracker *objv_tracker);
2062
2063 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
2064 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
2065
2066 int guard_reshard(BucketShard *bs,
2067 const rgw_obj& obj_instance,
2068 const RGWBucketInfo& bucket_info,
2069 std::function<int(BucketShard *)> call);
2070 int block_while_resharding(RGWRados::BucketShard *bs,
2071 string *new_bucket_id,
2072 const RGWBucketInfo& bucket_info,
2073 optional_yield y);
2074
2075 void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
2076 int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
2077 int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
2078 int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
2079 const rgw_obj& obj_instance, bool delete_marker,
2080 const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
2081 uint64_t olh_epoch,
2082 ceph::real_time unmod_since, bool high_precision_time,
2083 rgw_zone_set *zones_trace = nullptr,
2084 bool log_data_change = false);
2085 int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
2086 int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
2087 map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
2088 int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
2089 int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
2090 int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2091 bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
2092 uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
2093 int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
2094 int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
2095 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time,
2096 rgw_zone_set *zones_trace = nullptr, bool log_data_change = false);
2097 int repair_olh(RGWObjState* state, const RGWBucketInfo& bucket_info,
2098 const rgw_obj& obj);
2099 int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
2100 uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
2101
2102 void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
2103 int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
2104 int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
2105 int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
2106
2107 void gen_rand_obj_instance_name(rgw_obj_key *target_key);
2108 void gen_rand_obj_instance_name(rgw_obj *target);
2109
2110 int update_containers_stats(map<string, RGWBucketEnt>& m);
2111 int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
2112
2113 public:
2114 void set_atomic(void *ctx, rgw_obj& obj) {
2115 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
2116 rctx->set_atomic(obj);
2117 }
2118 void set_prefetch_data(void *ctx, const rgw_obj& obj) {
2119 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
2120 rctx->set_prefetch_data(obj);
2121 }
2122 int decode_policy(bufferlist& bl, ACLOwner *owner);
2123 int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
2124 map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool* syncstopped = NULL);
2125 int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
2126 int get_user_stats(const rgw_user& user, RGWStorageStats& stats);
2127 int get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *cb);
2128 void get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj);
2129 void get_bucket_meta_oid(const rgw_bucket& bucket, string& oid);
2130
2131 int put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
2132 bool exclusive, RGWObjVersionTracker& objv_tracker, ceph::real_time mtime,
2133 map<string, bufferlist> *pattrs);
2134 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
2135 int get_bucket_entrypoint_info(RGWSysObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name,
2136 RGWBucketEntryPoint& entry_point, RGWObjVersionTracker *objv_tracker,
2137 ceph::real_time *pmtime, map<string, bufferlist> *pattrs, rgw_cache_entry_info *cache_info = NULL,
2138 boost::optional<obj_version> refresh_version = boost::none);
2139 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
2140 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
2141 int get_bucket_instance_from_oid(RGWSysObjectCtx& obj_ctx, const string& oid, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs,
2142 rgw_cache_entry_info *cache_info = NULL,
2143 boost::optional<obj_version> refresh_version = boost::none);
2144
2145 int convert_old_bucket_info(RGWSysObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name);
2146 static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
2147
2148
2149 private:
2150 int _get_bucket_info(RGWSysObjectCtx& obj_ctx, const string& tenant,
2151 const string& bucket_name, RGWBucketInfo& info,
2152 real_time *pmtime,
2153 map<string, bufferlist> *pattrs,
2154 boost::optional<obj_version> refresh_version);
2155 public:
2156
2157 bool call(std::string_view command, const cmdmap_t& cmdmap,
2158 std::string_view format,
2159 bufferlist& out) override final;
2160
2161 protected:
2162 // `call_list` must iterate over all cache entries and call
2163 // `cache_list_dump_helper` with the supplied Formatter on any that
2164 // include `filter` as a substring.
2165 //
2166 void call_list(const std::optional<std::string>& filter,
2167 Formatter* format);
2168 // `call_inspect` must look up the requested target and, if found,
2169 // dump it to the supplied Formatter and return true. If not found,
2170 // it must return false.
2171 //
2172 bool call_inspect(const std::string& target, Formatter* format);
2173
2174 // `call_erase` must erase the requested target and return true. If
2175 // the requested target does not exist, it should return false.
2176 bool call_erase(const std::string& target);
2177
2178 // `call_zap` must erase the cache.
2179 void call_zap();
2180 public:
2181
2182 int get_bucket_info(RGWSysObjectCtx& obj_ctx,
2183 const string& tenant_name, const string& bucket_name,
2184 RGWBucketInfo& info,
2185 ceph::real_time *pmtime, map<string, bufferlist> *pattrs = NULL);
2186
2187 // Returns 0 on successful refresh. Returns error code if there was
2188 // an error or the version stored on the OSD is the same as that
2189 // presented in the BucketInfo structure.
2190 //
2191 int try_refresh_bucket_info(RGWBucketInfo& info,
2192 ceph::real_time *pmtime,
2193 map<string, bufferlist> *pattrs = nullptr);
2194
2195 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
2196 map<string, bufferlist> *pattrs, bool create_entry_point);
2197
2198 int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2199 int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
2200 rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2201 int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
2202 RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2203 int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
2204 ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2205 int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2206 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
2207 int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, int shard_id,
2208 const rgw_obj_index_key& start,
2209 const string& prefix,
2210 uint32_t num_entries, bool list_versions,
2211 map<string, rgw_bucket_dir_entry>& m,
2212 bool *is_truncated,
2213 rgw_obj_index_key *last_entry,
2214 bool (*force_check_filter)(const string& name) = nullptr);
2215 int cls_bucket_list_unordered(RGWBucketInfo& bucket_info, int shard_id,
2216 const rgw_obj_index_key& start,
2217 const string& prefix,
2218 uint32_t num_entries, bool list_versions,
2219 vector<rgw_bucket_dir_entry>& ent_list,
2220 bool *is_truncated, rgw_obj_index_key *last_entry,
2221 bool (*force_check_filter)(const string& name) = nullptr);
2222 int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
2223 int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
2224 int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
2225 int trim_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, string& end_marker);
2226 int resync_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
2227 int stop_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
2228 int get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id, map<int, string>& max_marker);
2229
2230 int bi_get_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_dir_entry *dirent);
2231 int bi_get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_olh_entry *olh);
2232 int bi_get(const RGWBucketInfo& bucket_info, const rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
2233 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
2234 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
2235 int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
2236 int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2237 int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2238 int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
2239 list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2240 int bi_remove(BucketShard& bs);
2241
2242 int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
2243 int cls_obj_usage_log_read(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
2244 uint64_t end_epoch, uint32_t max_entries, string& read_iter, map<rgw_user_bucket,
2245 rgw_usage_log_entry>& usage, bool *is_truncated);
2246 int cls_obj_usage_log_trim(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
2247 uint64_t end_epoch);
2248 int cls_obj_usage_log_clear(string& oid);
2249
2250 int key_to_shard_id(const string& key, int max_shards);
2251 void shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id);
2252 void shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name);
2253 void shard_name(const string& prefix, unsigned shard_id, string& name);
2254 int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
2255 void time_log_prepare_entry(cls_log_entry& entry, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
2256 int time_log_add_init(librados::IoCtx& io_ctx);
2257 int time_log_add(const string& oid, list<cls_log_entry>& entries,
2258 librados::AioCompletion *completion, bool monotonic_inc = true);
2259 int time_log_add(const string& oid, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
2260 int time_log_list(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
2261 int max_entries, list<cls_log_entry>& entries,
2262 const string& marker, string *out_marker, bool *truncated);
2263 int time_log_info(const string& oid, cls_log_header *header);
2264 int time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion);
2265 int time_log_trim(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
2266 const string& from_marker, const string& to_marker,
2267 librados::AioCompletion *completion = nullptr);
2268
2269 string objexp_hint_get_shardname(int shard_num);
2270 int objexp_key_shard(const rgw_obj_index_key& key);
2271 void objexp_get_shard(int shard_num,
2272 string& shard); /* out */
2273 int objexp_hint_add(const ceph::real_time& delete_at,
2274 const string& tenant_name,
2275 const string& bucket_name,
2276 const string& bucket_id,
2277 const rgw_obj_index_key& obj_key);
2278 int objexp_hint_list(const string& oid,
2279 const ceph::real_time& start_time,
2280 const ceph::real_time& end_time,
2281 const int max_entries,
2282 const string& marker,
2283 list<cls_timeindex_entry>& entries, /* out */
2284 string *out_marker, /* out */
2285 bool *truncated); /* out */
2286 int objexp_hint_parse(cls_timeindex_entry &ti_entry,
2287 objexp_hint_entry& hint_entry); /* out */
2288 int objexp_hint_trim(const string& oid,
2289 const ceph::real_time& start_time,
2290 const ceph::real_time& end_time,
2291 const string& from_marker = std::string(),
2292 const string& to_marker = std::string());
2293
2294 int lock_exclusive(const rgw_pool& pool, const string& oid, ceph::timespan& duration, string& zone_id, string& owner_id);
2295 int unlock(const rgw_pool& pool, const string& oid, string& zone_id, string& owner_id);
2296
2297 void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
2298 int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool sync);
2299 int gc_operate(string& oid, librados::ObjectWriteOperation *op);
2300 int gc_aio_operate(string& oid, librados::ObjectWriteOperation *op, librados::AioCompletion **pc = nullptr);
2301 int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
2302
2303 int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
2304 int process_gc(bool expired_only);
2305 bool process_expire_objects();
2306 int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
2307
2308 int process_lc();
2309 int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
2310
2311 int bucket_check_index(RGWBucketInfo& bucket_info,
2312 map<RGWObjCategory, RGWStorageStats> *existing_stats,
2313 map<RGWObjCategory, RGWStorageStats> *calculated_stats);
2314 int bucket_rebuild_index(RGWBucketInfo& bucket_info);
2315 int bucket_set_reshard(const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
2316 int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
2317 int move_rados_obj(librados::IoCtx& src_ioctx,
2318 const string& src_oid, const string& src_locator,
2319 librados::IoCtx& dst_ioctx,
2320 const string& dst_oid, const string& dst_locator);
2321 int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
2322 int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix);
2323
2324 int cls_user_get_header(const string& user_id, cls_user_header *header);
2325 int cls_user_reset_stats(const string& user_id);
2326 int cls_user_get_header_async(const string& user_id, RGWGetUserHeader_CB *ctx);
2327 int cls_user_sync_bucket_stats(rgw_raw_obj& user_obj, const RGWBucketInfo& bucket_info);
2328 int cls_user_list_buckets(rgw_raw_obj& obj,
2329 const string& in_marker,
2330 const string& end_marker,
2331 int max_entries,
2332 list<cls_user_bucket_entry>& entries,
2333 string *out_marker,
2334 bool *truncated);
2335 int cls_user_add_bucket(rgw_raw_obj& obj, const cls_user_bucket_entry& entry);
2336 int cls_user_update_buckets(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add);
2337 int cls_user_complete_stats_sync(rgw_raw_obj& obj);
2338 int complete_sync_user_stats(const rgw_user& user_id);
2339 int cls_user_remove_bucket(rgw_raw_obj& obj, const cls_user_bucket& bucket);
2340 int cls_user_get_bucket_stats(const rgw_bucket& bucket, cls_user_bucket_entry& entry);
2341
2342 int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
2343 RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, bool check_size_only = false);
2344
2345 int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
2346 RGWQuotaInfo& bucket_quota);
2347
2348 int add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
2349
2350 uint64_t instance_id();
2351
2352 librados::Rados* get_rados_handle();
2353
2354 int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
2355 int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
2356 list<librados::AioCompletion *>& handles, bool keep_index_consistent);
2357
2358 /* mfa/totp stuff */
2359 private:
2360 void prepare_mfa_write(librados::ObjectWriteOperation *op,
2361 RGWObjVersionTracker *objv_tracker,
2362 const ceph::real_time& mtime);
2363 public:
2364 string get_mfa_oid(const rgw_user& user);
2365 int get_mfa_ref(const rgw_user& user, rgw_rados_ref *ref);
2366 int check_mfa(const rgw_user& user, const string& otp_id, const string& pin);
2367 int create_mfa(const rgw_user& user, const rados::cls::otp::otp_info_t& config,
2368 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2369 int remove_mfa(const rgw_user& user, const string& id,
2370 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2371 int get_mfa(const rgw_user& user, const string& id, rados::cls::otp::otp_info_t *result);
2372 int list_mfa(const rgw_user& user, list<rados::cls::otp::otp_info_t> *result);
2373 int otp_get_current_time(const rgw_user& user, ceph::real_time *result);
2374
2375 /* mfa interfaces used by metadata engine */
2376 int set_mfa(const string& oid, const list<rados::cls::otp::otp_info_t>& entries, bool reset_obj,
2377 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2378 int list_mfa(const string& oid, list<rados::cls::otp::otp_info_t> *result,
2379 RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime);
2380 private:
2381 /**
2382 * This is a helper method, it generates a list of bucket index objects with the given
2383 * bucket base oid and number of shards.
2384 *
2385 * bucket_oid_base [in] - base name of the bucket index object;
2386 * num_shards [in] - number of bucket index object shards.
2387 * bucket_objs [out] - filled by this method, a list of bucket index objects.
2388 */
2389 void get_bucket_index_objects(const string& bucket_oid_base, uint32_t num_shards,
2390 map<int, string>& bucket_objs, int shard_id = -1);
2391
2392 /**
2393 * Get the bucket index object with the given base bucket index object and object key,
2394 * and the number of bucket index shards.
2395 *
2396 * bucket_oid_base [in] - bucket object base name.
2397 * obj_key [in] - object key.
2398 * num_shards [in] - number of bucket index shards.
2399 * hash_type [in] - type of hash to find the shard ID.
2400 * bucket_obj [out] - the bucket index object for the given object.
2401 *
2402 * Return 0 on success, a failure code otherwise.
2403 */
2404 int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
2405 uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj, int *shard);
2406
2407 void get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards,
2408 int shard_id, string *bucket_obj);
2409
2410 /**
2411 * Check the actual on-disk state of the object specified
2412 * by list_state, and fill in the time and size of object.
2413 * Then append any changes to suggested_updates for
2414 * the rgw class' dir_suggest_changes function.
2415 *
2416 * Note that this can maul list_state; don't use it afterwards. Also
2417 * it expects object to already be filled in from list_state; it only
2418 * sets the size and mtime.
2419 *
2420 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
2421 * and -errno on other failures. (-ENOENT is not a failure, and it
2422 * will encode that info as a suggested update.)
2423 */
2424 int check_disk_state(librados::IoCtx io_ctx,
2425 const RGWBucketInfo& bucket_info,
2426 rgw_bucket_dir_entry& list_state,
2427 rgw_bucket_dir_entry& object,
2428 bufferlist& suggested_updates);
2429
2430 /**
2431 * Init pool iteration
2432 * pool: pool to use for the ctx initialization
2433 * ctx: context object to use for the iteration
2434 * Returns: 0 on success, -ERR# otherwise.
2435 */
2436 int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
2437
2438 /**
2439 * Init pool iteration
2440 * pool: pool to use
2441 * cursor: position to start iteration
2442 * ctx: context object to use for the iteration
2443 * Returns: 0 on success, -ERR# otherwise.
2444 */
2445 int pool_iterate_begin(const rgw_pool& pool, const string& cursor, RGWPoolIterCtx& ctx);
2446
2447 /**
2448 * Get pool iteration position
2449 * ctx: context object to use for the iteration
2450 * Returns: string representation of position
2451 */
2452 string pool_iterate_get_cursor(RGWPoolIterCtx& ctx);
2453
2454 /**
2455 * Iterate over pool return object names, use optional filter
2456 * ctx: iteration context, initialized with pool_iterate_begin()
2457 * num: max number of objects to return
2458 * objs: a vector that the results will append into
2459 * is_truncated: if not NULL, will hold true iff iteration is complete
2460 * filter: if not NULL, will be used to filter returned objects
2461 * Returns: 0 on success, -ERR# otherwise.
2462 */
2463 int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
2464 bool *is_truncated, RGWAccessListFilter *filter);
2465
2466 uint64_t next_bucket_id();
2467 };
2468
2469 class RGWStoreManager {
2470 public:
2471 RGWStoreManager() {}
2472 static RGWRados *get_storage(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads,
2473 bool run_sync_thread, bool run_reshard_thread, bool use_cache = true) {
2474 RGWRados *store = init_storage_provider(cct, use_gc_thread, use_lc_thread, quota_threads, run_sync_thread,
2475 run_reshard_thread, use_cache);
2476 return store;
2477 }
2478 static RGWRados *get_raw_storage(CephContext *cct) {
2479 RGWRados *store = init_raw_storage_provider(cct);
2480 return store;
2481 }
2482 static RGWRados *init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread, bool use_metadata_cache);
2483 static RGWRados *init_raw_storage_provider(CephContext *cct);
2484 static void close_storage(RGWRados *store);
2485
2486 };
2487
2488 class RGWMPObj {
2489 string oid;
2490 string prefix;
2491 string meta;
2492 string upload_id;
2493 public:
2494 RGWMPObj() {}
2495 RGWMPObj(const string& _oid, const string& _upload_id) {
2496 init(_oid, _upload_id, _upload_id);
2497 }
2498 void init(const string& _oid, const string& _upload_id) {
2499 init(_oid, _upload_id, _upload_id);
2500 }
2501 void init(const string& _oid, const string& _upload_id, const string& part_unique_str) {
2502 if (_oid.empty()) {
2503 clear();
2504 return;
2505 }
2506 oid = _oid;
2507 upload_id = _upload_id;
2508 prefix = oid + ".";
2509 meta = prefix + upload_id + MP_META_SUFFIX;
2510 prefix.append(part_unique_str);
2511 }
2512 const string& get_meta() const { return meta; }
2513 string get_part(int num) const {
2514 char buf[16];
2515 snprintf(buf, 16, ".%d", num);
2516 string s = prefix;
2517 s.append(buf);
2518 return s;
2519 }
2520 string get_part(const string& part) const {
2521 string s = prefix;
2522 s.append(".");
2523 s.append(part);
2524 return s;
2525 }
2526 const string& get_upload_id() const {
2527 return upload_id;
2528 }
2529 const string& get_key() const {
2530 return oid;
2531 }
2532 bool from_meta(string& meta) {
2533 int end_pos = meta.rfind('.'); // search for ".meta"
2534 if (end_pos < 0)
2535 return false;
2536 int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id>
2537 if (mid_pos < 0)
2538 return false;
2539 oid = meta.substr(0, mid_pos);
2540 upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
2541 init(oid, upload_id, upload_id);
2542 return true;
2543 }
2544 void clear() {
2545 oid = "";
2546 prefix = "";
2547 meta = "";
2548 upload_id = "";
2549 }
2550 }; // class RGWMPObj
2551
2552
2553 class RGWRadosThread {
2554 class Worker : public Thread {
2555 CephContext *cct;
2556 RGWRadosThread *processor;
2557 Mutex lock;
2558 Cond cond;
2559
2560 void wait() {
2561 Mutex::Locker l(lock);
2562 cond.Wait(lock);
2563 };
2564
2565 void wait_interval(const utime_t& wait_time) {
2566 Mutex::Locker l(lock);
2567 cond.WaitInterval(lock, wait_time);
2568 }
2569
2570 public:
2571 Worker(CephContext *_cct, RGWRadosThread *_p) : cct(_cct), processor(_p), lock("RGWRadosThread::Worker") {}
2572 void *entry() override;
2573 void signal() {
2574 Mutex::Locker l(lock);
2575 cond.Signal();
2576 }
2577 };
2578
2579 Worker *worker;
2580
2581 protected:
2582 CephContext *cct;
2583 RGWRados *store;
2584
2585 std::atomic<bool> down_flag = { false };
2586
2587 string thread_name;
2588
2589 virtual uint64_t interval_msec() = 0;
2590 virtual void stop_process() {}
2591 public:
2592 RGWRadosThread(RGWRados *_store, const string& thread_name = "radosgw")
2593 : worker(NULL), cct(_store->ctx()), store(_store), thread_name(thread_name) {}
2594 virtual ~RGWRadosThread() {
2595 stop();
2596 }
2597
2598 virtual int init() { return 0; }
2599 virtual int process() = 0;
2600
2601 bool going_down() { return down_flag; }
2602
2603 void start();
2604 void stop();
2605
2606 void signal() {
2607 if (worker) {
2608 worker->signal();
2609 }
2610 }
2611 };
2612
2613 #endif