]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/rgw_rados.h
update download target update for octopus release
[ceph.git] / ceph / src / rgw / rgw_rados.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#ifndef CEPH_RGWRADOS_H
5#define CEPH_RGWRADOS_H
6
7#include <functional>
8
9#include "include/rados/librados.hpp"
10#include "include/Context.h"
3a9019d9 11#include "common/admin_socket.h"
7c673cae
FG
12#include "common/RefCountedObj.h"
13#include "common/RWLock.h"
14#include "common/ceph_time.h"
15#include "common/lru_map.h"
11fdf7f2 16#include "common/ceph_json.h"
7c673cae
FG
17#include "rgw_common.h"
18#include "cls/rgw/cls_rgw_types.h"
19#include "cls/version/cls_version_types.h"
20#include "cls/log/cls_log_types.h"
7c673cae 21#include "cls/timeindex/cls_timeindex_types.h"
11fdf7f2 22#include "cls/otp/cls_otp_types.h"
7c673cae
FG
23#include "rgw_log.h"
24#include "rgw_metadata.h"
25#include "rgw_meta_sync_status.h"
26#include "rgw_period_puller.h"
27#include "rgw_sync_module.h"
b32b8144 28#include "rgw_sync_log_trim.h"
11fdf7f2
TL
29#include "rgw_service.h"
30
31#include "services/svc_rados.h"
32#include "services/svc_zone.h"
7c673cae
FG
33
34class RGWWatcher;
35class SafeTimer;
36class ACLOwner;
37class RGWGC;
38class RGWMetaNotifier;
39class RGWDataNotifier;
40class RGWLC;
41class RGWObjectExpirer;
42class RGWMetaSyncProcessorThread;
43class RGWDataSyncProcessorThread;
44class RGWSyncLogTrimThread;
11fdf7f2 45class RGWSyncTraceManager;
7c673cae
FG
46struct RGWZoneGroup;
47struct RGWZoneParams;
31f18b77
FG
48class RGWReshard;
49class RGWReshardWait;
7c673cae 50
11fdf7f2
TL
51class RGWSysObjectCtx;
52
7c673cae
FG
53/* flags for put_obj_meta() */
54#define PUT_OBJ_CREATE 0x01
55#define PUT_OBJ_EXCL 0x02
56#define PUT_OBJ_CREATE_EXCL (PUT_OBJ_CREATE | PUT_OBJ_EXCL)
57
58#define RGW_OBJ_NS_MULTIPART "multipart"
59#define RGW_OBJ_NS_SHADOW "shadow"
60
61#define RGW_BUCKET_INSTANCE_MD_PREFIX ".bucket.meta."
62
63#define RGW_NO_SHARD -1
64
31f18b77
FG
65#define RGW_SHARDS_PRIME_0 7877
66#define RGW_SHARDS_PRIME_1 65521
67
11fdf7f2
TL
68extern const std::string MP_META_SUFFIX;
69
1adf2230 70// only called by rgw_shard_id and rgw_bucket_shard_index
31f18b77
FG
71static inline int rgw_shards_mod(unsigned hval, int max_shards)
72{
73 if (max_shards <= RGW_SHARDS_PRIME_0) {
74 return hval % RGW_SHARDS_PRIME_0 % max_shards;
75 }
76 return hval % RGW_SHARDS_PRIME_1 % max_shards;
77}
78
1adf2230
AA
79// used for logging and tagging
80static inline int rgw_shard_id(const string& key, int max_shards)
31f18b77 81{
1adf2230
AA
82 return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()),
83 max_shards);
84}
85
86// used for bucket indices
87static inline uint32_t rgw_bucket_shard_index(const std::string& key,
88 int num_shards) {
89 uint32_t sid = ceph_str_hash_linux(key.c_str(), key.size());
90 uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
91 return rgw_shards_mod(sid2, num_shards);
31f18b77
FG
92}
93
94static inline int rgw_shards_max()
95{
96 return RGW_SHARDS_PRIME_1;
97}
7c673cae
FG
98
99static inline void prepend_bucket_marker(const rgw_bucket& bucket, const string& orig_oid, string& oid)
100{
101 if (bucket.marker.empty() || orig_oid.empty()) {
102 oid = orig_oid;
103 } else {
104 oid = bucket.marker;
105 oid.append("_");
106 oid.append(orig_oid);
107 }
108}
109
110static inline void get_obj_bucket_and_oid_loc(const rgw_obj& obj, string& oid, string& locator)
111{
112 const rgw_bucket& bucket = obj.bucket;
113 prepend_bucket_marker(bucket, obj.get_oid(), oid);
114 const string& loc = obj.key.get_loc();
115 if (!loc.empty()) {
116 prepend_bucket_marker(bucket, loc, locator);
117 } else {
118 locator.clear();
119 }
120}
121
7c673cae
FG
122int rgw_policy_from_attrset(CephContext *cct, map<string, bufferlist>& attrset, RGWAccessControlPolicy *policy);
123
124static inline bool rgw_raw_obj_to_obj(const rgw_bucket& bucket, const rgw_raw_obj& raw_obj, rgw_obj *obj)
125{
126 ssize_t pos = raw_obj.oid.find('_');
127 if (pos < 0) {
128 return false;
129 }
130
131 if (!rgw_obj_key::parse_raw_oid(raw_obj.oid.substr(pos + 1), &obj->key)) {
132 return false;
133 }
134 obj->bucket = bucket;
135
136 return true;
137}
138
11fdf7f2 139
7c673cae 140struct rgw_bucket_placement {
11fdf7f2 141 rgw_placement_rule placement_rule;
7c673cae
FG
142 rgw_bucket bucket;
143
144 void dump(Formatter *f) const;
145};
146
147class rgw_obj_select {
11fdf7f2 148 rgw_placement_rule placement_rule;
7c673cae
FG
149 rgw_obj obj;
150 rgw_raw_obj raw_obj;
151 bool is_raw;
152
153public:
154 rgw_obj_select() : is_raw(false) {}
11fdf7f2
TL
155 explicit rgw_obj_select(const rgw_obj& _obj) : obj(_obj), is_raw(false) {}
156 explicit rgw_obj_select(const rgw_raw_obj& _raw_obj) : raw_obj(_raw_obj), is_raw(true) {}
7c673cae 157 rgw_obj_select(const rgw_obj_select& rhs) {
c07f9fc5 158 placement_rule = rhs.placement_rule;
7c673cae
FG
159 is_raw = rhs.is_raw;
160 if (is_raw) {
161 raw_obj = rhs.raw_obj;
162 } else {
163 obj = rhs.obj;
164 }
165 }
166
167 rgw_raw_obj get_raw_obj(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params) const;
168 rgw_raw_obj get_raw_obj(RGWRados *store) const;
169
170 rgw_obj_select& operator=(const rgw_obj& rhs) {
171 obj = rhs;
172 is_raw = false;
173 return *this;
174 }
175
176 rgw_obj_select& operator=(const rgw_raw_obj& rhs) {
177 raw_obj = rhs;
178 is_raw = true;
179 return *this;
180 }
181
11fdf7f2 182 void set_placement_rule(const rgw_placement_rule& rule) {
7c673cae
FG
183 placement_rule = rule;
184 }
11fdf7f2 185 void dump(Formatter *f) const;
7c673cae
FG
186};
187
188struct compression_block {
189 uint64_t old_ofs;
190 uint64_t new_ofs;
191 uint64_t len;
192
193 void encode(bufferlist& bl) const {
194 ENCODE_START(1, 1, bl);
11fdf7f2
TL
195 encode(old_ofs, bl);
196 encode(new_ofs, bl);
197 encode(len, bl);
7c673cae
FG
198 ENCODE_FINISH(bl);
199 }
200
11fdf7f2 201 void decode(bufferlist::const_iterator& bl) {
7c673cae 202 DECODE_START(1, bl);
11fdf7f2
TL
203 decode(old_ofs, bl);
204 decode(new_ofs, bl);
205 decode(len, bl);
7c673cae
FG
206 DECODE_FINISH(bl);
207 }
11fdf7f2 208 void dump(Formatter *f) const;
7c673cae
FG
209};
210WRITE_CLASS_ENCODER(compression_block)
211
212struct RGWCompressionInfo {
213 string compression_type;
214 uint64_t orig_size;
215 vector<compression_block> blocks;
216
217 RGWCompressionInfo() : compression_type("none"), orig_size(0) {}
218 RGWCompressionInfo(const RGWCompressionInfo& cs_info) : compression_type(cs_info.compression_type),
219 orig_size(cs_info.orig_size),
220 blocks(cs_info.blocks) {}
221
222 void encode(bufferlist& bl) const {
223 ENCODE_START(1, 1, bl);
11fdf7f2
TL
224 encode(compression_type, bl);
225 encode(orig_size, bl);
226 encode(blocks, bl);
7c673cae
FG
227 ENCODE_FINISH(bl);
228 }
229
11fdf7f2 230 void decode(bufferlist::const_iterator& bl) {
7c673cae 231 DECODE_START(1, bl);
11fdf7f2
TL
232 decode(compression_type, bl);
233 decode(orig_size, bl);
234 decode(blocks, bl);
7c673cae 235 DECODE_FINISH(bl);
11fdf7f2
TL
236 }
237 void dump(Formatter *f) const;
7c673cae
FG
238};
239WRITE_CLASS_ENCODER(RGWCompressionInfo)
240
241int rgw_compression_info_from_attrset(map<string, bufferlist>& attrs, bool& need_decompress, RGWCompressionInfo& cs_info);
242
243struct RGWOLHInfo {
244 rgw_obj target;
245 bool removed;
246
247 RGWOLHInfo() : removed(false) {}
248
249 void encode(bufferlist& bl) const {
250 ENCODE_START(1, 1, bl);
11fdf7f2
TL
251 encode(target, bl);
252 encode(removed, bl);
7c673cae
FG
253 ENCODE_FINISH(bl);
254 }
255
11fdf7f2 256 void decode(bufferlist::const_iterator& bl) {
7c673cae 257 DECODE_START(1, bl);
11fdf7f2
TL
258 decode(target, bl);
259 decode(removed, bl);
7c673cae
FG
260 DECODE_FINISH(bl);
261 }
262 static void generate_test_instances(list<RGWOLHInfo*>& o);
263 void dump(Formatter *f) const;
264};
265WRITE_CLASS_ENCODER(RGWOLHInfo)
266
267struct RGWOLHPendingInfo {
268 ceph::real_time time;
269
270 RGWOLHPendingInfo() {}
271
272 void encode(bufferlist& bl) const {
273 ENCODE_START(1, 1, bl);
11fdf7f2 274 encode(time, bl);
7c673cae
FG
275 ENCODE_FINISH(bl);
276 }
277
11fdf7f2 278 void decode(bufferlist::const_iterator& bl) {
7c673cae 279 DECODE_START(1, bl);
11fdf7f2 280 decode(time, bl);
7c673cae
FG
281 DECODE_FINISH(bl);
282 }
283
284 void dump(Formatter *f) const;
285};
286WRITE_CLASS_ENCODER(RGWOLHPendingInfo)
287
288struct RGWUsageBatch {
289 map<ceph::real_time, rgw_usage_log_entry> m;
290
291 void insert(ceph::real_time& t, rgw_usage_log_entry& entry, bool *account) {
292 bool exists = m.find(t) != m.end();
293 *account = !exists;
294 m[t].aggregate(entry);
295 }
296};
297
298struct RGWUsageIter {
299 string read_iter;
300 uint32_t index;
301
302 RGWUsageIter() : index(0) {}
303};
304
305class RGWGetDataCB {
7c673cae
FG
306public:
307 virtual int handle_data(bufferlist& bl, off_t bl_ofs, off_t bl_len) = 0;
11fdf7f2 308 RGWGetDataCB() {}
7c673cae 309 virtual ~RGWGetDataCB() {}
7c673cae
FG
310};
311
312struct RGWCloneRangeInfo {
313 rgw_obj src;
314 off_t src_ofs;
315 off_t dst_ofs;
316 uint64_t len;
317};
318
319struct RGWObjManifestPart {
320 rgw_obj loc; /* the object where the data is located */
321 uint64_t loc_ofs; /* the offset at that object where the data is located */
322 uint64_t size; /* the part size */
323
324 RGWObjManifestPart() : loc_ofs(0), size(0) {}
325
326 void encode(bufferlist& bl) const {
327 ENCODE_START(2, 2, bl);
11fdf7f2
TL
328 encode(loc, bl);
329 encode(loc_ofs, bl);
330 encode(size, bl);
7c673cae
FG
331 ENCODE_FINISH(bl);
332 }
333
11fdf7f2 334 void decode(bufferlist::const_iterator& bl) {
7c673cae 335 DECODE_START_LEGACY_COMPAT_LEN_32(2, 2, 2, bl);
11fdf7f2
TL
336 decode(loc, bl);
337 decode(loc_ofs, bl);
338 decode(size, bl);
7c673cae
FG
339 DECODE_FINISH(bl);
340 }
341
342 void dump(Formatter *f) const;
343 static void generate_test_instances(list<RGWObjManifestPart*>& o);
344};
345WRITE_CLASS_ENCODER(RGWObjManifestPart)
346
347/*
348 The manifest defines a set of rules for structuring the object parts.
349 There are a few terms to note:
350 - head: the head part of the object, which is the part that contains
351 the first chunk of data. An object might not have a head (as in the
352 case of multipart-part objects).
353 - stripe: data portion of a single rgw object that resides on a single
354 rados object.
355 - part: a collection of stripes that make a contiguous part of an
356 object. A regular object will only have one part (although might have
357 many stripes), a multipart object might have many parts. Each part
358 has a fixed stripe size, although the last stripe of a part might
359 be smaller than that. Consecutive parts may be merged if their stripe
360 value is the same.
361*/
362
363struct RGWObjManifestRule {
364 uint32_t start_part_num;
365 uint64_t start_ofs;
366 uint64_t part_size; /* each part size, 0 if there's no part size, meaning it's unlimited */
367 uint64_t stripe_max_size; /* underlying obj max size */
368 string override_prefix;
369
370 RGWObjManifestRule() : start_part_num(0), start_ofs(0), part_size(0), stripe_max_size(0) {}
371 RGWObjManifestRule(uint32_t _start_part_num, uint64_t _start_ofs, uint64_t _part_size, uint64_t _stripe_max_size) :
372 start_part_num(_start_part_num), start_ofs(_start_ofs), part_size(_part_size), stripe_max_size(_stripe_max_size) {}
373
374 void encode(bufferlist& bl) const {
375 ENCODE_START(2, 1, bl);
11fdf7f2
TL
376 encode(start_part_num, bl);
377 encode(start_ofs, bl);
378 encode(part_size, bl);
379 encode(stripe_max_size, bl);
380 encode(override_prefix, bl);
7c673cae
FG
381 ENCODE_FINISH(bl);
382 }
383
11fdf7f2 384 void decode(bufferlist::const_iterator& bl) {
7c673cae 385 DECODE_START(2, bl);
11fdf7f2
TL
386 decode(start_part_num, bl);
387 decode(start_ofs, bl);
388 decode(part_size, bl);
389 decode(stripe_max_size, bl);
7c673cae 390 if (struct_v >= 2)
11fdf7f2 391 decode(override_prefix, bl);
7c673cae
FG
392 DECODE_FINISH(bl);
393 }
394 void dump(Formatter *f) const;
395};
396WRITE_CLASS_ENCODER(RGWObjManifestRule)
397
398class RGWObjManifest {
399protected:
400 bool explicit_objs; /* old manifest? */
401 map<uint64_t, RGWObjManifestPart> objs;
402
403 uint64_t obj_size;
404
405 rgw_obj obj;
406 uint64_t head_size;
11fdf7f2 407 rgw_placement_rule head_placement_rule;
7c673cae
FG
408
409 uint64_t max_head_size;
410 string prefix;
411 rgw_bucket_placement tail_placement; /* might be different than the original bucket,
412 as object might have been copied across pools */
413 map<uint64_t, RGWObjManifestRule> rules;
414
415 string tail_instance; /* tail object's instance */
416
417 void convert_to_explicit(const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
418 int append_explicit(RGWObjManifest& m, const RGWZoneGroup& zonegroup, const RGWZoneParams& zone_params);
419 void append_rules(RGWObjManifest& m, map<uint64_t, RGWObjManifestRule>::iterator& iter, string *override_prefix);
420
421 void update_iterators() {
422 begin_iter.seek(0);
423 end_iter.seek(obj_size);
424 }
425public:
426
427 RGWObjManifest() : explicit_objs(false), obj_size(0), head_size(0), max_head_size(0),
428 begin_iter(this), end_iter(this) {}
429 RGWObjManifest(const RGWObjManifest& rhs) {
430 *this = rhs;
431 }
432 RGWObjManifest& operator=(const RGWObjManifest& rhs) {
433 explicit_objs = rhs.explicit_objs;
434 objs = rhs.objs;
435 obj_size = rhs.obj_size;
436 obj = rhs.obj;
437 head_size = rhs.head_size;
438 max_head_size = rhs.max_head_size;
439 prefix = rhs.prefix;
440 tail_placement = rhs.tail_placement;
441 rules = rhs.rules;
442 tail_instance = rhs.tail_instance;
443
444 begin_iter.set_manifest(this);
445 end_iter.set_manifest(this);
446
447 begin_iter.seek(rhs.begin_iter.get_ofs());
448 end_iter.seek(rhs.end_iter.get_ofs());
449
450 return *this;
451 }
452
453 map<uint64_t, RGWObjManifestPart>& get_explicit_objs() {
454 return objs;
455 }
456
457
458 void set_explicit(uint64_t _size, map<uint64_t, RGWObjManifestPart>& _objs) {
459 explicit_objs = true;
460 obj_size = _size;
461 objs.swap(_objs);
462 }
463
464 void get_implicit_location(uint64_t cur_part_id, uint64_t cur_stripe, uint64_t ofs, string *override_prefix, rgw_obj_select *location);
465
466 void set_trivial_rule(uint64_t tail_ofs, uint64_t stripe_max_size) {
467 RGWObjManifestRule rule(0, tail_ofs, 0, stripe_max_size);
468 rules[0] = rule;
469 max_head_size = tail_ofs;
470 }
471
472 void set_multipart_part_rule(uint64_t stripe_max_size, uint64_t part_num) {
473 RGWObjManifestRule rule(0, 0, 0, stripe_max_size);
474 rule.start_part_num = part_num;
475 rules[0] = rule;
476 max_head_size = 0;
477 }
478
479 void encode(bufferlist& bl) const {
480 ENCODE_START(7, 6, bl);
11fdf7f2
TL
481 encode(obj_size, bl);
482 encode(objs, bl);
483 encode(explicit_objs, bl);
484 encode(obj, bl);
485 encode(head_size, bl);
486 encode(max_head_size, bl);
487 encode(prefix, bl);
488 encode(rules, bl);
7c673cae 489 bool encode_tail_bucket = !(tail_placement.bucket == obj.bucket);
11fdf7f2 490 encode(encode_tail_bucket, bl);
7c673cae 491 if (encode_tail_bucket) {
11fdf7f2 492 encode(tail_placement.bucket, bl);
7c673cae
FG
493 }
494 bool encode_tail_instance = (tail_instance != obj.key.instance);
11fdf7f2 495 encode(encode_tail_instance, bl);
7c673cae 496 if (encode_tail_instance) {
11fdf7f2 497 encode(tail_instance, bl);
7c673cae 498 }
11fdf7f2
TL
499 encode(head_placement_rule, bl);
500 encode(tail_placement.placement_rule, bl);
7c673cae
FG
501 ENCODE_FINISH(bl);
502 }
503
11fdf7f2 504 void decode(bufferlist::const_iterator& bl) {
7c673cae 505 DECODE_START_LEGACY_COMPAT_LEN_32(7, 2, 2, bl);
11fdf7f2
TL
506 decode(obj_size, bl);
507 decode(objs, bl);
7c673cae 508 if (struct_v >= 3) {
11fdf7f2
TL
509 decode(explicit_objs, bl);
510 decode(obj, bl);
511 decode(head_size, bl);
512 decode(max_head_size, bl);
513 decode(prefix, bl);
514 decode(rules, bl);
7c673cae
FG
515 } else {
516 explicit_objs = true;
517 if (!objs.empty()) {
518 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
519 obj = iter->second.loc;
520 head_size = iter->second.size;
521 max_head_size = head_size;
522 }
523 }
524
525 if (explicit_objs && head_size > 0 && !objs.empty()) {
526 /* patch up manifest due to issue 16435:
527 * the first object in the explicit objs list might not be the one we need to access, use the
528 * head object instead if set. This would happen if we had an old object that was created
529 * when the explicit objs manifest was around, and it got copied.
530 */
531 rgw_obj& obj_0 = objs[0].loc;
532 if (!obj_0.get_oid().empty() && obj_0.key.ns.empty()) {
533 objs[0].loc = obj;
534 objs[0].size = head_size;
535 }
536 }
537
538 if (struct_v >= 4) {
539 if (struct_v < 6) {
11fdf7f2 540 decode(tail_placement.bucket, bl);
7c673cae
FG
541 } else {
542 bool need_to_decode;
11fdf7f2 543 decode(need_to_decode, bl);
7c673cae 544 if (need_to_decode) {
11fdf7f2 545 decode(tail_placement.bucket, bl);
7c673cae
FG
546 } else {
547 tail_placement.bucket = obj.bucket;
548 }
549 }
550 }
551
552 if (struct_v >= 5) {
553 if (struct_v < 6) {
11fdf7f2 554 decode(tail_instance, bl);
7c673cae
FG
555 } else {
556 bool need_to_decode;
11fdf7f2 557 decode(need_to_decode, bl);
7c673cae 558 if (need_to_decode) {
11fdf7f2 559 decode(tail_instance, bl);
7c673cae
FG
560 } else {
561 tail_instance = obj.key.instance;
562 }
563 }
564 } else { // old object created before 'tail_instance' field added to manifest
565 tail_instance = obj.key.instance;
566 }
567
568 if (struct_v >= 7) {
11fdf7f2
TL
569 decode(head_placement_rule, bl);
570 decode(tail_placement.placement_rule, bl);
7c673cae
FG
571 }
572
573 update_iterators();
574 DECODE_FINISH(bl);
575 }
576
577 void dump(Formatter *f) const;
578 static void generate_test_instances(list<RGWObjManifest*>& o);
579
11fdf7f2
TL
580 int append(RGWObjManifest& m, const RGWZoneGroup& zonegroup,
581 const RGWZoneParams& zone_params);
582 int append(RGWObjManifest& m, RGWSI_Zone *zone_svc);
7c673cae
FG
583
584 bool get_rule(uint64_t ofs, RGWObjManifestRule *rule);
585
586 bool empty() {
587 if (explicit_objs)
588 return objs.empty();
589 return rules.empty();
590 }
591
592 bool has_explicit_objs() {
593 return explicit_objs;
594 }
595
596 bool has_tail() {
597 if (explicit_objs) {
598 if (objs.size() == 1) {
599 map<uint64_t, RGWObjManifestPart>::iterator iter = objs.begin();
600 rgw_obj& o = iter->second.loc;
601 return !(obj == o);
602 }
603 return (objs.size() >= 2);
604 }
605 return (obj_size > head_size);
606 }
607
11fdf7f2 608 void set_head(const rgw_placement_rule& placement_rule, const rgw_obj& _o, uint64_t _s) {
7c673cae
FG
609 head_placement_rule = placement_rule;
610 obj = _o;
611 head_size = _s;
612
613 if (explicit_objs && head_size > 0) {
614 objs[0].loc = obj;
615 objs[0].size = head_size;
616 }
617 }
618
619 const rgw_obj& get_obj() {
620 return obj;
621 }
622
11fdf7f2 623 void set_tail_placement(const rgw_placement_rule& placement_rule, const rgw_bucket& _b) {
7c673cae
FG
624 tail_placement.placement_rule = placement_rule;
625 tail_placement.bucket = _b;
626 }
627
628 const rgw_bucket_placement& get_tail_placement() {
629 return tail_placement;
630 }
631
11fdf7f2 632 const rgw_placement_rule& get_head_placement_rule() {
7c673cae
FG
633 return head_placement_rule;
634 }
635
636 void set_prefix(const string& _p) {
637 prefix = _p;
638 }
639
640 const string& get_prefix() {
641 return prefix;
642 }
643
644 void set_tail_instance(const string& _ti) {
645 tail_instance = _ti;
646 }
647
648 const string& get_tail_instance() {
649 return tail_instance;
650 }
651
652 void set_head_size(uint64_t _s) {
653 head_size = _s;
654 }
655
656 void set_obj_size(uint64_t s) {
657 obj_size = s;
658
659 update_iterators();
660 }
661
662 uint64_t get_obj_size() {
663 return obj_size;
664 }
665
666 uint64_t get_head_size() {
667 return head_size;
668 }
669
7c673cae
FG
670 uint64_t get_max_head_size() {
671 return max_head_size;
672 }
673
674 class obj_iterator {
675 RGWObjManifest *manifest;
676 uint64_t part_ofs; /* where current part starts */
677 uint64_t stripe_ofs; /* where current stripe starts */
678 uint64_t ofs; /* current position within the object */
679 uint64_t stripe_size; /* current part size */
680
681 int cur_part_id;
682 int cur_stripe;
683 string cur_override_prefix;
684
685 rgw_obj_select location;
686
687 map<uint64_t, RGWObjManifestRule>::iterator rule_iter;
688 map<uint64_t, RGWObjManifestRule>::iterator next_rule_iter;
689
690 map<uint64_t, RGWObjManifestPart>::iterator explicit_iter;
691
692 void init() {
693 part_ofs = 0;
694 stripe_ofs = 0;
695 ofs = 0;
696 stripe_size = 0;
697 cur_part_id = 0;
698 cur_stripe = 0;
699 }
700
701 void update_explicit_pos();
702
703
704 protected:
705
706 void set_manifest(RGWObjManifest *m) {
707 manifest = m;
708 }
709
710 public:
711 obj_iterator() : manifest(NULL) {
712 init();
713 }
714 explicit obj_iterator(RGWObjManifest *_m) : manifest(_m) {
715 init();
716 if (!manifest->empty()) {
717 seek(0);
718 }
719 }
720 obj_iterator(RGWObjManifest *_m, uint64_t _ofs) : manifest(_m) {
721 init();
722 if (!manifest->empty()) {
723 seek(_ofs);
724 }
725 }
726 void seek(uint64_t ofs);
727
728 void operator++();
729 bool operator==(const obj_iterator& rhs) {
730 return (ofs == rhs.ofs);
731 }
732 bool operator!=(const obj_iterator& rhs) {
733 return (ofs != rhs.ofs);
734 }
735 const rgw_obj_select& get_location() {
736 return location;
737 }
738
739 /* start of current stripe */
740 uint64_t get_stripe_ofs() {
741 if (manifest->explicit_objs) {
742 return explicit_iter->first;
743 }
744 return stripe_ofs;
745 }
746
747 /* current ofs relative to start of rgw object */
748 uint64_t get_ofs() const {
749 return ofs;
750 }
751
752 /* stripe number */
753 int get_cur_stripe() const {
754 return cur_stripe;
755 }
756
757 /* current stripe size */
758 uint64_t get_stripe_size() {
759 if (manifest->explicit_objs) {
760 return explicit_iter->second.size;
761 }
762 return stripe_size;
763 }
764
765 /* offset where data starts within current stripe */
766 uint64_t location_ofs() {
767 if (manifest->explicit_objs) {
768 return explicit_iter->second.loc_ofs;
769 }
770 return 0; /* all stripes start at zero offset */
771 }
772
773 void update_location();
774
775 friend class RGWObjManifest;
11fdf7f2 776 void dump(Formatter *f) const;
7c673cae
FG
777 };
778
779 const obj_iterator& obj_begin();
780 const obj_iterator& obj_end();
781 obj_iterator obj_find(uint64_t ofs);
782
783 obj_iterator begin_iter;
784 obj_iterator end_iter;
785
786 /*
787 * simple object generator. Using a simple single rule manifest.
788 */
789 class generator {
790 RGWObjManifest *manifest;
791 uint64_t last_ofs;
792 uint64_t cur_part_ofs;
793 int cur_part_id;
794 int cur_stripe;
795 uint64_t cur_stripe_size;
796 string cur_oid;
797
798 string oid_prefix;
799
800 rgw_obj_select cur_obj;
7c673cae
FG
801
802 RGWObjManifestRule rule;
803
804 public:
805 generator() : manifest(NULL), last_ofs(0), cur_part_ofs(0), cur_part_id(0),
806 cur_stripe(0), cur_stripe_size(0) {}
11fdf7f2
TL
807 int create_begin(CephContext *cct, RGWObjManifest *manifest,
808 const rgw_placement_rule& head_placement_rule,
809 const rgw_placement_rule *tail_placement_rule,
810 const rgw_bucket& bucket,
811 const rgw_obj& obj);
7c673cae
FG
812
813 int create_next(uint64_t ofs);
814
815 rgw_raw_obj get_cur_obj(RGWZoneGroup& zonegroup, RGWZoneParams& zone_params) { return cur_obj.get_raw_obj(zonegroup, zone_params); }
11fdf7f2 816 rgw_raw_obj get_cur_obj(RGWRados *store) const { return cur_obj.get_raw_obj(store); }
7c673cae
FG
817
818 /* total max size of current stripe (including head obj) */
11fdf7f2 819 uint64_t cur_stripe_max_size() const {
7c673cae
FG
820 return cur_stripe_size;
821 }
822 };
823};
824WRITE_CLASS_ENCODER(RGWObjManifest)
825
826struct RGWUploadPartInfo {
827 uint32_t num;
828 uint64_t size;
829 uint64_t accounted_size{0};
830 string etag;
831 ceph::real_time modified;
832 RGWObjManifest manifest;
833 RGWCompressionInfo cs_info;
834
835 RGWUploadPartInfo() : num(0), size(0) {}
836
837 void encode(bufferlist& bl) const {
838 ENCODE_START(4, 2, bl);
11fdf7f2
TL
839 encode(num, bl);
840 encode(size, bl);
841 encode(etag, bl);
842 encode(modified, bl);
843 encode(manifest, bl);
844 encode(cs_info, bl);
845 encode(accounted_size, bl);
7c673cae
FG
846 ENCODE_FINISH(bl);
847 }
11fdf7f2 848 void decode(bufferlist::const_iterator& bl) {
7c673cae 849 DECODE_START_LEGACY_COMPAT_LEN(4, 2, 2, bl);
11fdf7f2
TL
850 decode(num, bl);
851 decode(size, bl);
852 decode(etag, bl);
853 decode(modified, bl);
7c673cae 854 if (struct_v >= 3)
11fdf7f2 855 decode(manifest, bl);
7c673cae 856 if (struct_v >= 4) {
11fdf7f2
TL
857 decode(cs_info, bl);
858 decode(accounted_size, bl);
7c673cae
FG
859 } else {
860 accounted_size = size;
861 }
862 DECODE_FINISH(bl);
863 }
864 void dump(Formatter *f) const;
865 static void generate_test_instances(list<RGWUploadPartInfo*>& o);
866};
867WRITE_CLASS_ENCODER(RGWUploadPartInfo)
868
869struct RGWObjState {
870 rgw_obj obj;
871 bool is_atomic;
872 bool has_attrs;
873 bool exists;
874 uint64_t size; //< size of raw object
875 uint64_t accounted_size{0}; //< size before compression, encryption
876 ceph::real_time mtime;
877 uint64_t epoch;
878 bufferlist obj_tag;
181888fb 879 bufferlist tail_tag;
7c673cae
FG
880 string write_tag;
881 bool fake_tag;
882 RGWObjManifest manifest;
883 bool has_manifest;
884 string shadow_obj;
885 bool has_data;
886 bufferlist data;
887 bool prefetch_data;
888 bool keep_tail;
889 bool is_olh;
890 bufferlist olh_tag;
891 uint64_t pg_ver;
892 uint32_t zone_short_id;
893
894 /* important! don't forget to update copy constructor */
895
896 RGWObjVersionTracker objv_tracker;
897
898 map<string, bufferlist> attrset;
899 RGWObjState() : is_atomic(false), has_attrs(0), exists(false),
900 size(0), epoch(0), fake_tag(false), has_manifest(false),
901 has_data(false), prefetch_data(false), keep_tail(false), is_olh(false),
902 pg_ver(0), zone_short_id(0) {}
903 RGWObjState(const RGWObjState& rhs) : obj (rhs.obj) {
904 is_atomic = rhs.is_atomic;
905 has_attrs = rhs.has_attrs;
906 exists = rhs.exists;
907 size = rhs.size;
908 accounted_size = rhs.accounted_size;
909 mtime = rhs.mtime;
910 epoch = rhs.epoch;
911 if (rhs.obj_tag.length()) {
912 obj_tag = rhs.obj_tag;
913 }
181888fb
FG
914 if (rhs.tail_tag.length()) {
915 tail_tag = rhs.tail_tag;
916 }
7c673cae
FG
917 write_tag = rhs.write_tag;
918 fake_tag = rhs.fake_tag;
919 if (rhs.has_manifest) {
920 manifest = rhs.manifest;
921 }
922 has_manifest = rhs.has_manifest;
923 shadow_obj = rhs.shadow_obj;
924 has_data = rhs.has_data;
925 if (rhs.data.length()) {
926 data = rhs.data;
927 }
928 prefetch_data = rhs.prefetch_data;
929 keep_tail = rhs.keep_tail;
930 is_olh = rhs.is_olh;
931 objv_tracker = rhs.objv_tracker;
932 pg_ver = rhs.pg_ver;
933 }
934
935 bool get_attr(string name, bufferlist& dest) {
936 map<string, bufferlist>::iterator iter = attrset.find(name);
937 if (iter != attrset.end()) {
938 dest = iter->second;
939 return true;
940 }
941 return false;
942 }
943};
944
945struct RGWRawObjState {
946 rgw_raw_obj obj;
947 bool has_attrs{false};
948 bool exists{false};
949 uint64_t size{0};
950 ceph::real_time mtime;
11fdf7f2 951 uint64_t epoch{0};
7c673cae
FG
952 bufferlist obj_tag;
953 bool has_data{false};
954 bufferlist data;
955 bool prefetch_data{false};
956 uint64_t pg_ver{0};
957
958 /* important! don't forget to update copy constructor */
959
960 RGWObjVersionTracker objv_tracker;
961
962 map<string, bufferlist> attrset;
963 RGWRawObjState() {}
964 RGWRawObjState(const RGWRawObjState& rhs) : obj (rhs.obj) {
965 has_attrs = rhs.has_attrs;
966 exists = rhs.exists;
967 size = rhs.size;
968 mtime = rhs.mtime;
969 epoch = rhs.epoch;
970 if (rhs.obj_tag.length()) {
971 obj_tag = rhs.obj_tag;
972 }
973 has_data = rhs.has_data;
974 if (rhs.data.length()) {
975 data = rhs.data;
976 }
977 prefetch_data = rhs.prefetch_data;
978 pg_ver = rhs.pg_ver;
979 objv_tracker = rhs.objv_tracker;
980 }
981};
982
983struct RGWPoolIterCtx {
984 librados::IoCtx io_ctx;
985 librados::NObjectIterator iter;
986};
987
988struct RGWListRawObjsCtx {
989 bool initialized;
990 RGWPoolIterCtx iter_ctx;
991
992 RGWListRawObjsCtx() : initialized(false) {}
993};
994
7c673cae
FG
995struct objexp_hint_entry {
996 string tenant;
997 string bucket_name;
998 string bucket_id;
999 rgw_obj_key obj_key;
1000 ceph::real_time exp_time;
1001
1002 void encode(bufferlist& bl) const {
1003 ENCODE_START(2, 1, bl);
11fdf7f2
TL
1004 encode(bucket_name, bl);
1005 encode(bucket_id, bl);
1006 encode(obj_key, bl);
1007 encode(exp_time, bl);
1008 encode(tenant, bl);
7c673cae
FG
1009 ENCODE_FINISH(bl);
1010 }
1011
11fdf7f2 1012 void decode(bufferlist::const_iterator& bl) {
7c673cae
FG
1013 // XXX Do we want DECODE_START_LEGACY_COMPAT_LEN(2, 1, 1, bl); ?
1014 DECODE_START(2, bl);
11fdf7f2
TL
1015 decode(bucket_name, bl);
1016 decode(bucket_id, bl);
1017 decode(obj_key, bl);
1018 decode(exp_time, bl);
7c673cae 1019 if (struct_v >= 2) {
11fdf7f2 1020 decode(tenant, bl);
7c673cae
FG
1021 } else {
1022 tenant.clear();
1023 }
1024 DECODE_FINISH(bl);
1025 }
1026};
1027WRITE_CLASS_ENCODER(objexp_hint_entry)
1028
7c673cae
FG
1029class RGWDataChangesLog;
1030class RGWMetaSyncStatusManager;
1031class RGWDataSyncStatusManager;
7c673cae 1032class RGWCoroutinesManagerRegistry;
7c673cae
FG
1033
1034class RGWGetBucketStats_CB : public RefCountedObject {
1035protected:
1036 rgw_bucket bucket;
1037 map<RGWObjCategory, RGWStorageStats> *stats;
1038public:
224ce89b 1039 explicit RGWGetBucketStats_CB(const rgw_bucket& _bucket) : bucket(_bucket), stats(NULL) {}
7c673cae
FG
1040 ~RGWGetBucketStats_CB() override {}
1041 virtual void handle_response(int r) = 0;
1042 virtual void set_response(map<RGWObjCategory, RGWStorageStats> *_stats) {
1043 stats = _stats;
1044 }
1045};
1046
1047class RGWGetUserStats_CB : public RefCountedObject {
1048protected:
1049 rgw_user user;
1050 RGWStorageStats stats;
1051public:
1052 explicit RGWGetUserStats_CB(const rgw_user& _user) : user(_user) {}
1053 ~RGWGetUserStats_CB() override {}
1054 virtual void handle_response(int r) = 0;
1055 virtual void set_response(RGWStorageStats& _stats) {
1056 stats = _stats;
1057 }
1058};
1059
1060class RGWGetDirHeader_CB;
1061class RGWGetUserHeader_CB;
1062
11fdf7f2
TL
1063class RGWObjectCtx {
1064 RGWRados *store;
1065 RWLock lock{"RGWObjectCtx"};
1066 void *s{nullptr};
7c673cae 1067
11fdf7f2 1068 std::map<rgw_obj, RGWObjState> objs_state;
7c673cae 1069public:
11fdf7f2
TL
1070 explicit RGWObjectCtx(RGWRados *_store) : store(_store) {}
1071 explicit RGWObjectCtx(RGWRados *_store, void *_s) : store(_store), s(_s) {}
7c673cae 1072
11fdf7f2
TL
1073 void *get_private() {
1074 return s;
1075 }
7c673cae 1076
11fdf7f2
TL
1077 RGWRados *get_store() {
1078 return store;
1079 }
7c673cae 1080
11fdf7f2
TL
1081 RGWObjState *get_state(const rgw_obj& obj) {
1082 RGWObjState *result;
1083 typename std::map<rgw_obj, RGWObjState>::iterator iter;
7c673cae
FG
1084 lock.get_read();
1085 assert (!obj.empty());
1086 iter = objs_state.find(obj);
1087 if (iter != objs_state.end()) {
1088 result = &iter->second;
1089 lock.unlock();
1090 } else {
1091 lock.unlock();
1092 lock.get_write();
1093 result = &objs_state[obj];
1094 lock.unlock();
1095 }
1096 return result;
1097 }
1098
11fdf7f2 1099 void set_atomic(rgw_obj& obj) {
7c673cae
FG
1100 RWLock::WLocker wl(lock);
1101 assert (!obj.empty());
1102 objs_state[obj].is_atomic = true;
1103 }
11fdf7f2 1104 void set_prefetch_data(const rgw_obj& obj) {
7c673cae
FG
1105 RWLock::WLocker wl(lock);
1106 assert (!obj.empty());
1107 objs_state[obj].prefetch_data = true;
1108 }
11fdf7f2
TL
1109
1110 void invalidate(const rgw_obj& obj) {
7c673cae
FG
1111 RWLock::WLocker wl(lock);
1112 auto iter = objs_state.find(obj);
1113 if (iter == objs_state.end()) {
1114 return;
1115 }
1116 bool is_atomic = iter->second.is_atomic;
1117 bool prefetch_data = iter->second.prefetch_data;
1118
1119 objs_state.erase(iter);
1120
1121 if (is_atomic || prefetch_data) {
11fdf7f2
TL
1122 auto& state = objs_state[obj];
1123 state.is_atomic = is_atomic;
1124 state.prefetch_data = prefetch_data;
7c673cae
FG
1125 }
1126 }
1127};
1128
7c673cae
FG
1129class RGWAsyncRadosProcessor;
1130
1131template <class T>
1132class RGWChainedCacheImpl;
1133
1134struct bucket_info_entry {
1135 RGWBucketInfo info;
1136 real_time mtime;
1137 map<string, bufferlist> attrs;
1138};
1139
1140struct tombstone_entry {
1141 ceph::real_time mtime;
1142 uint32_t zone_short_id;
1143 uint64_t pg_ver;
1144
1145 tombstone_entry() = default;
11fdf7f2 1146 explicit tombstone_entry(const RGWObjState& state)
7c673cae
FG
1147 : mtime(state.mtime), zone_short_id(state.zone_short_id),
1148 pg_ver(state.pg_ver) {}
1149};
1150
31f18b77
FG
1151class RGWIndexCompletionManager;
1152
3a9019d9 1153class RGWRados : public AdminSocketHook
7c673cae
FG
1154{
1155 friend class RGWGC;
1156 friend class RGWMetaNotifier;
1157 friend class RGWDataNotifier;
1158 friend class RGWLC;
1159 friend class RGWObjectExpirer;
1160 friend class RGWMetaSyncProcessorThread;
1161 friend class RGWDataSyncProcessorThread;
31f18b77
FG
1162 friend class RGWReshard;
1163 friend class RGWBucketReshard;
f64942e4 1164 friend class RGWBucketReshardLock;
31f18b77 1165 friend class BucketIndexLockGuard;
d2e6a577 1166 friend class RGWCompleteMultipart;
7c673cae 1167
11fdf7f2
TL
1168 static constexpr const char* admin_commands[4][3] = {
1169 { "cache list",
1170 "cache list name=filter,type=CephString,req=false",
1171 "cache list [filter_str]: list object cache, possibly matching substrings" },
1172 { "cache inspect",
1173 "cache inspect name=target,type=CephString,req=true",
1174 "cache inspect target: print cache element" },
1175 { "cache erase",
1176 "cache erase name=target,type=CephString,req=true",
1177 "cache erase target: erase element from cache" },
1178 { "cache zap",
1179 "cache zap",
1180 "cache zap: erase all elements from cache" }
1181 };
3a9019d9 1182
7c673cae
FG
1183 /** Open the pool used as root for this gateway */
1184 int open_root_pool_ctx();
1185 int open_gc_pool_ctx();
1186 int open_lc_pool_ctx();
1187 int open_objexp_pool_ctx();
31f18b77 1188 int open_reshard_pool_ctx();
7c673cae 1189
494da23a
TL
1190 int open_pool_ctx(const rgw_pool& pool, librados::IoCtx& io_ctx,
1191 bool mostly_omap);
7c673cae
FG
1192 int open_bucket_index_ctx(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx);
1193 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid);
1194 int open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1195 string& bucket_oid_base);
1196 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1197 const string& obj_key, string *bucket_obj, int *shard_id);
1198 int open_bucket_index_shard(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1199 int shard_id, string *bucket_obj);
1200 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1201 map<int, string>& bucket_objs, int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
1202 template<typename T>
1203 int open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
1204 map<int, string>& oids, map<int, T>& bucket_objs,
1205 int shard_id = -1, map<int, string> *bucket_instance_ids = NULL);
1206 void build_bucket_index_marker(const string& shard_id_str, const string& shard_marker,
1207 string *marker);
1208
1209 void get_bucket_instance_ids(const RGWBucketInfo& bucket_info, int shard_id, map<int, string> *result);
1210
1211 std::atomic<int64_t> max_req_id = { 0 };
1212 Mutex lock;
7c673cae
FG
1213 SafeTimer *timer;
1214
1215 RGWGC *gc;
1216 RGWLC *lc;
1217 RGWObjectExpirer *obj_expirer;
1218 bool use_gc_thread;
1219 bool use_lc_thread;
1220 bool quota_threads;
1221 bool run_sync_thread;
31f18b77 1222 bool run_reshard_thread;
7c673cae
FG
1223
1224 RGWAsyncRadosProcessor* async_rados;
1225
1226 RGWMetaNotifier *meta_notifier;
1227 RGWDataNotifier *data_notifier;
1228 RGWMetaSyncProcessorThread *meta_sync_processor_thread;
11fdf7f2 1229 RGWSyncTraceManager *sync_tracer = nullptr;
7c673cae
FG
1230 map<string, RGWDataSyncProcessorThread *> data_sync_processor_threads;
1231
b32b8144 1232 boost::optional<rgw::BucketTrimManager> bucket_trim;
7c673cae
FG
1233 RGWSyncLogTrimThread *sync_log_trimmer{nullptr};
1234
1235 Mutex meta_sync_thread_lock;
1236 Mutex data_sync_thread_lock;
1237
7c673cae 1238 librados::IoCtx root_pool_ctx; // .rgw
11fdf7f2
TL
1239
1240 double inject_notify_timeout_probability = 0;
1241 unsigned max_notify_retries = 0;
7c673cae
FG
1242
1243 friend class RGWWatcher;
1244
1245 Mutex bucket_id_lock;
1246
1247 // This field represents the number of bucket index object shards
1248 uint32_t bucket_index_max_shards;
1249
1250 int get_obj_head_ioctx(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::IoCtx *ioctx);
1251 int get_obj_head_ref(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_rados_ref *ref);
224ce89b 1252 int get_system_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae
FG
1253 uint64_t max_bucket_id;
1254
1255 int get_olh_target_state(RGWObjectCtx& rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
1256 RGWObjState *olh_state, RGWObjState **target_state);
7c673cae
FG
1257 int get_obj_state_impl(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
1258 bool follow_olh, bool assume_noent = false);
1259 int append_atomic_test(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
1260 librados::ObjectOperation& op, RGWObjState **state);
11fdf7f2 1261 int append_atomic_test(const RGWObjState* astate, librados::ObjectOperation& op);
7c673cae
FG
1262
1263 int update_placement_map();
1264 int store_bucket_info(RGWBucketInfo& info, map<string, bufferlist> *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive);
1265
1266 void remove_rgw_head_obj(librados::ObjectWriteOperation& op);
1267 void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const string& prefix, bool fail_if_exist);
1268 void cls_obj_check_mtime(librados::ObjectOperation& op, const real_time& mtime, bool high_precision_time, RGWCheckMTimeType type);
1269protected:
1270 CephContext *cct;
1271
494da23a 1272 librados::Rados rados;
7c673cae
FG
1273
1274 using RGWChainedCacheImpl_bucket_info_entry = RGWChainedCacheImpl<bucket_info_entry>;
1275 RGWChainedCacheImpl_bucket_info_entry *binfo_cache;
1276
1277 using tombstone_cache_t = lru_map<rgw_obj, tombstone_entry>;
1278 tombstone_cache_t *obj_tombstone_cache;
1279
1280 librados::IoCtx gc_pool_ctx; // .rgw.gc
1281 librados::IoCtx lc_pool_ctx; // .rgw.lc
1282 librados::IoCtx objexp_pool_ctx;
31f18b77 1283 librados::IoCtx reshard_pool_ctx;
7c673cae 1284
11fdf7f2 1285 bool pools_initialized;
7c673cae 1286
11fdf7f2 1287 RGWQuotaHandler *quota_handler;
7c673cae 1288
11fdf7f2 1289 RGWCoroutinesManagerRegistry *cr_registry;
7c673cae 1290
11fdf7f2
TL
1291 RGWSyncModuleInstanceRef sync_module;
1292 bool writeable_zone{false};
7c673cae 1293
11fdf7f2 1294 RGWIndexCompletionManager *index_completion_manager{nullptr};
7c673cae 1295
11fdf7f2
TL
1296 bool use_cache{false};
1297public:
1298 RGWRados(): lock("rados_timer_lock"), timer(NULL),
1299 gc(NULL), lc(NULL), obj_expirer(NULL), use_gc_thread(false), use_lc_thread(false), quota_threads(false),
1300 run_sync_thread(false), run_reshard_thread(false), async_rados(nullptr), meta_notifier(NULL),
1301 data_notifier(NULL), meta_sync_processor_thread(NULL),
1302 meta_sync_thread_lock("meta_sync_thread_lock"), data_sync_thread_lock("data_sync_thread_lock"),
1303 bucket_id_lock("rados_bucket_id"),
1304 bucket_index_max_shards(0),
1305 max_bucket_id(0), cct(NULL),
11fdf7f2
TL
1306 binfo_cache(NULL), obj_tombstone_cache(nullptr),
1307 pools_initialized(false),
1308 quota_handler(NULL),
1309 cr_registry(NULL),
1310 meta_mgr(NULL), data_log(NULL), reshard(NULL) {}
7c673cae 1311
11fdf7f2
TL
1312 RGWRados& set_use_cache(bool status) {
1313 use_cache = status;
1314 return *this;
7c673cae
FG
1315 }
1316
11fdf7f2
TL
1317 RGWLC *get_lc() {
1318 return lc;
7c673cae
FG
1319 }
1320
11fdf7f2
TL
1321 RGWRados& set_run_gc_thread(bool _use_gc_thread) {
1322 use_gc_thread = _use_gc_thread;
1323 return *this;
7c673cae
FG
1324 }
1325
11fdf7f2
TL
1326 RGWRados& set_run_lc_thread(bool _use_lc_thread) {
1327 use_lc_thread = _use_lc_thread;
1328 return *this;
7c673cae
FG
1329 }
1330
11fdf7f2
TL
1331 RGWRados& set_run_quota_threads(bool _run_quota_threads) {
1332 quota_threads = _run_quota_threads;
1333 return *this;
7c673cae
FG
1334 }
1335
11fdf7f2
TL
1336 RGWRados& set_run_sync_thread(bool _run_sync_thread) {
1337 run_sync_thread = _run_sync_thread;
1338 return *this;
7c673cae
FG
1339 }
1340
11fdf7f2
TL
1341 RGWRados& set_run_reshard_thread(bool _run_reshard_thread) {
1342 run_reshard_thread = _run_reshard_thread;
1343 return *this;
7c673cae
FG
1344 }
1345
11fdf7f2
TL
1346 uint64_t get_new_req_id() {
1347 return ++max_req_id;
7c673cae
FG
1348 }
1349
11fdf7f2
TL
1350 librados::IoCtx* get_lc_pool_ctx() {
1351 return &lc_pool_ctx;
7c673cae 1352 }
11fdf7f2
TL
1353 void set_context(CephContext *_cct) {
1354 cct = _cct;
7c673cae 1355 }
31f18b77 1356
11fdf7f2
TL
1357 RGWServices svc;
1358
1359 /**
1360 * AmazonS3 errors contain a HostId string, but is an opaque base64 blob; we
1361 * try to be more transparent. This has a wrapper so we can update it when zonegroup/zone are changed.
1362 */
1363 string host_id;
31f18b77 1364
7c673cae
FG
1365 // pulls missing periods for period_history
1366 std::unique_ptr<RGWPeriodPuller> period_puller;
1367 // maintains a connected history of periods
1368 std::unique_ptr<RGWPeriodHistory> period_history;
1369
1370 RGWAsyncRadosProcessor* get_async_rados() const { return async_rados; };
1371
1372 RGWMetadataManager *meta_mgr;
1373
1374 RGWDataChangesLog *data_log;
1375
31f18b77
FG
1376 RGWReshard *reshard;
1377 std::shared_ptr<RGWReshardWait> reshard_wait;
1378
7c673cae
FG
1379 virtual ~RGWRados() = default;
1380
1381 tombstone_cache_t *get_tombstone_cache() {
1382 return obj_tombstone_cache;
1383 }
7c673cae
FG
1384 const RGWSyncModuleInstanceRef& get_sync_module() {
1385 return sync_module;
1386 }
11fdf7f2
TL
1387 RGWSyncTraceManager *get_sync_tracer() {
1388 return sync_tracer;
1389 }
7c673cae
FG
1390
1391 int get_required_alignment(const rgw_pool& pool, uint64_t *alignment);
11fdf7f2
TL
1392 void get_max_aligned_size(uint64_t size, uint64_t alignment, uint64_t *max_size);
1393 int get_max_chunk_size(const rgw_pool& pool, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
1394 int get_max_chunk_size(const rgw_placement_rule& placement_rule, const rgw_obj& obj, uint64_t *max_chunk_size, uint64_t *palignment = nullptr);
7c673cae
FG
1395
1396 uint32_t get_max_bucket_shards() {
31f18b77 1397 return rgw_shards_max();
7c673cae
FG
1398 }
1399
181888fb 1400
224ce89b 1401 int get_raw_obj_ref(const rgw_raw_obj& obj, rgw_rados_ref *ref);
7c673cae 1402
181888fb
FG
1403 int list_raw_objects_init(const rgw_pool& pool, const string& marker, RGWListRawObjsCtx *ctx);
1404 int list_raw_objects_next(const string& prefix_filter, int max,
1405 RGWListRawObjsCtx& ctx, list<string>& oids,
1406 bool *is_truncated);
7c673cae
FG
1407 int list_raw_objects(const rgw_pool& pool, const string& prefix_filter, int max,
1408 RGWListRawObjsCtx& ctx, list<string>& oids,
1409 bool *is_truncated);
181888fb 1410 string list_raw_objs_get_cursor(RGWListRawObjsCtx& ctx);
7c673cae 1411
7c673cae
FG
1412 CephContext *ctx() { return cct; }
1413 /** do all necessary setup of the storage device */
11fdf7f2 1414 int initialize(CephContext *_cct) {
7c673cae 1415 set_context(_cct);
7c673cae
FG
1416 return initialize();
1417 }
1418 /** Initialize the RADOS instance and prepare to do other ops */
11fdf7f2
TL
1419 int init_svc(bool raw);
1420 int init_rados();
7c673cae 1421 int init_complete();
7c673cae
FG
1422 int initialize();
1423 void finalize();
1424
224ce89b 1425 int register_to_service_map(const string& daemon_type, const map<string, string>& meta);
11fdf7f2 1426 int update_service_map(std::map<std::string, std::string>&& status);
7c673cae
FG
1427
1428 /// list logs
1429 int log_list_init(const string& prefix, RGWAccessHandle *handle);
1430 int log_list_next(RGWAccessHandle handle, string *name);
1431
1432 /// remove log
1433 int log_remove(const string& name);
1434
1435 /// show log
1436 int log_show_init(const string& name, RGWAccessHandle *handle);
1437 int log_show_next(RGWAccessHandle handle, rgw_log_entry *entry);
1438
1439 // log bandwidth info
1440 int log_usage(map<rgw_user_bucket, RGWUsageBatch>& usage_info);
11fdf7f2
TL
1441 int read_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch,
1442 uint32_t max_entries, bool *is_truncated, RGWUsageIter& read_iter, map<rgw_user_bucket,
1443 rgw_usage_log_entry>& usage);
1444 int trim_usage(const rgw_user& user, const string& bucket_name, uint64_t start_epoch, uint64_t end_epoch);
1445 int clear_usage();
7c673cae
FG
1446
1447 int create_pool(const rgw_pool& pool);
1448
7c673cae 1449 int init_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
f64942e4 1450 int clean_bucket_index(RGWBucketInfo& bucket_info, int num_shards);
7c673cae
FG
1451 void create_bucket_id(string *bucket_id);
1452
11fdf7f2
TL
1453 bool get_obj_data_pool(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_pool *pool);
1454 bool obj_to_raw(const rgw_placement_rule& placement_rule, const rgw_obj& obj, rgw_raw_obj *raw_obj);
7c673cae 1455
11fdf7f2 1456 int create_bucket(const RGWUserInfo& owner, rgw_bucket& bucket,
7c673cae 1457 const string& zonegroup_id,
11fdf7f2 1458 const rgw_placement_rule& placement_rule,
7c673cae
FG
1459 const string& swift_ver_location,
1460 const RGWQuotaInfo * pquota_info,
1461 map<std::string,bufferlist>& attrs,
1462 RGWBucketInfo& bucket_info,
1463 obj_version *pobjv,
1464 obj_version *pep_objv,
1465 ceph::real_time creation_time,
1466 rgw_bucket *master_bucket,
1467 uint32_t *master_num_shards,
1468 bool exclusive = true);
7c673cae
FG
1469
1470 RGWCoroutinesManagerRegistry *get_cr_registry() { return cr_registry; }
1471
7c673cae
FG
1472 struct BucketShard {
1473 RGWRados *store;
1474 rgw_bucket bucket;
1475 int shard_id;
1476 librados::IoCtx index_ctx;
1477 string bucket_obj;
1478
1479 explicit BucketShard(RGWRados *_store) : store(_store), shard_id(-1) {}
f64942e4
AA
1480 int init(const rgw_bucket& _bucket, const rgw_obj& obj, RGWBucketInfo* out);
1481 int init(const rgw_bucket& _bucket, int sid, RGWBucketInfo* out);
a8e16298 1482 int init(const RGWBucketInfo& bucket_info, const rgw_obj& obj);
b32b8144 1483 int init(const RGWBucketInfo& bucket_info, int sid);
7c673cae
FG
1484 };
1485
1486 class Object {
1487 RGWRados *store;
1488 RGWBucketInfo bucket_info;
1489 RGWObjectCtx& ctx;
1490 rgw_obj obj;
1491
1492 BucketShard bs;
1493
1494 RGWObjState *state;
1495
1496 bool versioning_disabled;
1497
1498 bool bs_initialized;
1499
1500 protected:
1501 int get_state(RGWObjState **pstate, bool follow_olh, bool assume_noent = false);
1502 void invalidate_state();
1503
1504 int prepare_atomic_modification(librados::ObjectWriteOperation& op, bool reset_obj, const string *ptag,
181888fb 1505 const char *ifmatch, const char *ifnomatch, bool removal_op, bool modify_tail);
7c673cae
FG
1506 int complete_atomic_modification();
1507
1508 public:
1509 Object(RGWRados *_store, const RGWBucketInfo& _bucket_info, RGWObjectCtx& _ctx, const rgw_obj& _obj) : store(_store), bucket_info(_bucket_info),
1510 ctx(_ctx), obj(_obj), bs(store),
1511 state(NULL), versioning_disabled(false),
1512 bs_initialized(false) {}
1513
1514 RGWRados *get_store() { return store; }
1515 rgw_obj& get_obj() { return obj; }
1516 RGWObjectCtx& get_ctx() { return ctx; }
1517 RGWBucketInfo& get_bucket_info() { return bucket_info; }
1518 int get_manifest(RGWObjManifest **pmanifest);
1519
1520 int get_bucket_shard(BucketShard **pbs) {
1521 if (!bs_initialized) {
f64942e4
AA
1522 int r =
1523 bs.init(bucket_info.bucket, obj, nullptr /* no RGWBucketInfo */);
7c673cae
FG
1524 if (r < 0) {
1525 return r;
1526 }
1527 bs_initialized = true;
1528 }
1529 *pbs = &bs;
1530 return 0;
1531 }
1532
1533 void set_versioning_disabled(bool status) {
1534 versioning_disabled = status;
1535 }
1536
1537 bool versioning_enabled() {
1538 return (!versioning_disabled && bucket_info.versioning_enabled());
1539 }
1540
1541 struct Read {
1542 RGWRados::Object *source;
1543
1544 struct GetObjState {
11fdf7f2
TL
1545 map<rgw_pool, librados::IoCtx> io_ctxs;
1546 rgw_pool cur_pool;
1547 librados::IoCtx *cur_ioctx{nullptr};
7c673cae
FG
1548 rgw_obj obj;
1549 rgw_raw_obj head_obj;
1550 } state;
1551
1552 struct ConditionParams {
1553 const ceph::real_time *mod_ptr;
1554 const ceph::real_time *unmod_ptr;
1555 bool high_precision_time;
1556 uint32_t mod_zone_id;
1557 uint64_t mod_pg_ver;
1558 const char *if_match;
1559 const char *if_nomatch;
1560
1561 ConditionParams() :
1562 mod_ptr(NULL), unmod_ptr(NULL), high_precision_time(false), mod_zone_id(0), mod_pg_ver(0),
1563 if_match(NULL), if_nomatch(NULL) {}
1564 } conds;
1565
1566 struct Params {
1567 ceph::real_time *lastmod;
1568 uint64_t *obj_size;
1569 map<string, bufferlist> *attrs;
eafe8130 1570 rgw_obj *target_obj;
7c673cae 1571
eafe8130
TL
1572 Params() : lastmod(nullptr), obj_size(nullptr), attrs(nullptr),
1573 target_obj(nullptr) {}
7c673cae
FG
1574 } params;
1575
1576 explicit Read(RGWRados::Object *_source) : source(_source) {}
1577
1578 int prepare();
1579 static int range_to_ofs(uint64_t obj_size, int64_t &ofs, int64_t &end);
1580 int read(int64_t ofs, int64_t end, bufferlist& bl);
1581 int iterate(int64_t ofs, int64_t end, RGWGetDataCB *cb);
1582 int get_attr(const char *name, bufferlist& dest);
1583 };
1584
1585 struct Write {
1586 RGWRados::Object *target;
1587
1588 struct MetaParams {
1589 ceph::real_time *mtime;
1590 map<std::string, bufferlist>* rmattrs;
1591 const bufferlist *data;
1592 RGWObjManifest *manifest;
1593 const string *ptag;
1594 list<rgw_obj_index_key> *remove_objs;
1595 ceph::real_time set_mtime;
1596 rgw_user owner;
1597 RGWObjCategory category;
1598 int flags;
1599 const char *if_match;
1600 const char *if_nomatch;
11fdf7f2 1601 std::optional<uint64_t> olh_epoch;
7c673cae
FG
1602 ceph::real_time delete_at;
1603 bool canceled;
1604 const string *user_data;
31f18b77 1605 rgw_zone_set *zones_trace;
181888fb 1606 bool modify_tail;
3efd9988 1607 bool completeMultipart;
11fdf7f2 1608 bool appendable;
7c673cae
FG
1609
1610 MetaParams() : mtime(NULL), rmattrs(NULL), data(NULL), manifest(NULL), ptag(NULL),
11fdf7f2 1611 remove_objs(NULL), category(RGWObjCategory::Main), flags(0),
91327a77 1612 if_match(NULL), if_nomatch(NULL), canceled(false), user_data(nullptr), zones_trace(nullptr),
11fdf7f2 1613 modify_tail(false), completeMultipart(false), appendable(false) {}
7c673cae
FG
1614 } meta;
1615
1616 explicit Write(RGWRados::Object *_target) : target(_target) {}
1617
1618 int _do_write_meta(uint64_t size, uint64_t accounted_size,
1619 map<std::string, bufferlist>& attrs,
181888fb 1620 bool modify_tail, bool assume_noent,
7c673cae
FG
1621 void *index_op);
1622 int write_meta(uint64_t size, uint64_t accounted_size,
1623 map<std::string, bufferlist>& attrs);
1624 int write_data(const char *data, uint64_t ofs, uint64_t len, bool exclusive);
11fdf7f2
TL
1625 const req_state* get_req_state() {
1626 return (req_state *)target->get_ctx().get_private();
1627 }
7c673cae
FG
1628 };
1629
1630 struct Delete {
1631 RGWRados::Object *target;
1632
1633 struct DeleteParams {
1634 rgw_user bucket_owner;
1635 int versioning_status;
1636 ACLOwner obj_owner; /* needed for creation of deletion marker */
1637 uint64_t olh_epoch;
1638 string marker_version_id;
1639 uint32_t bilog_flags;
1640 list<rgw_obj_index_key> *remove_objs;
1641 ceph::real_time expiration_time;
1642 ceph::real_time unmod_since;
1643 ceph::real_time mtime; /* for setting delete marker mtime */
1644 bool high_precision_time;
31f18b77 1645 rgw_zone_set *zones_trace;
7c673cae 1646
31f18b77 1647 DeleteParams() : versioning_status(0), olh_epoch(0), bilog_flags(0), remove_objs(NULL), high_precision_time(false), zones_trace(nullptr) {}
7c673cae
FG
1648 } params;
1649
1650 struct DeleteResult {
1651 bool delete_marker;
1652 string version_id;
1653
1654 DeleteResult() : delete_marker(false) {}
1655 } result;
1656
1657 explicit Delete(RGWRados::Object *_target) : target(_target) {}
1658
1659 int delete_obj();
1660 };
1661
1662 struct Stat {
1663 RGWRados::Object *source;
1664
1665 struct Result {
1666 rgw_obj obj;
1667 RGWObjManifest manifest;
1668 bool has_manifest;
1669 uint64_t size;
1670 struct timespec mtime;
1671 map<string, bufferlist> attrs;
1672
1673 Result() : has_manifest(false), size(0) {}
1674 } result;
1675
1676 struct State {
1677 librados::IoCtx io_ctx;
1678 librados::AioCompletion *completion;
1679 int ret;
1680
1681 State() : completion(NULL), ret(0) {}
1682 } state;
1683
1684
1685 explicit Stat(RGWRados::Object *_source) : source(_source) {}
1686
1687 int stat_async();
1688 int wait();
1689 int stat();
1690 private:
1691 int finish();
1692 };
1693 };
1694
1695 class Bucket {
1696 RGWRados *store;
1697 RGWBucketInfo bucket_info;
1698 rgw_bucket& bucket;
1699 int shard_id;
1700
1701 public:
1702 Bucket(RGWRados *_store, const RGWBucketInfo& _bucket_info) : store(_store), bucket_info(_bucket_info), bucket(bucket_info.bucket),
1703 shard_id(RGW_NO_SHARD) {}
1704 RGWRados *get_store() { return store; }
1705 rgw_bucket& get_bucket() { return bucket; }
1706 RGWBucketInfo& get_bucket_info() { return bucket_info; }
1707
31f18b77
FG
1708 int update_bucket_id(const string& new_bucket_id);
1709
7c673cae
FG
1710 int get_shard_id() { return shard_id; }
1711 void set_shard_id(int id) {
1712 shard_id = id;
1713 }
1714
1715 class UpdateIndex {
1716 RGWRados::Bucket *target;
1717 string optag;
1718 rgw_obj obj;
1719 uint16_t bilog_flags{0};
1720 BucketShard bs;
1721 bool bs_initialized{false};
1722 bool blind;
1723 bool prepared{false};
31f18b77
FG
1724 rgw_zone_set *zones_trace{nullptr};
1725
1726 int init_bs() {
f64942e4
AA
1727 int r =
1728 bs.init(target->get_bucket(), obj, nullptr /* no RGWBucketInfo */);
31f18b77
FG
1729 if (r < 0) {
1730 return r;
1731 }
1732 bs_initialized = true;
1733 return 0;
1734 }
1735
1736 void invalidate_bs() {
1737 bs_initialized = false;
1738 }
1739
1740 int guard_reshard(BucketShard **pbs, std::function<int(BucketShard *)> call);
7c673cae
FG
1741 public:
1742
1743 UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj),
1744 bs(target->get_store()) {
1745 blind = (target->get_bucket_info().index_type == RGWBIType_Indexless);
1746 }
1747
1748 int get_bucket_shard(BucketShard **pbs) {
1749 if (!bs_initialized) {
31f18b77 1750 int r = init_bs();
7c673cae
FG
1751 if (r < 0) {
1752 return r;
1753 }
7c673cae
FG
1754 }
1755 *pbs = &bs;
1756 return 0;
1757 }
1758
1759 void set_bilog_flags(uint16_t flags) {
1760 bilog_flags = flags;
1761 }
31f18b77
FG
1762
1763 void set_zones_trace(rgw_zone_set *_zones_trace) {
1764 zones_trace = _zones_trace;
1765 }
7c673cae
FG
1766
1767 int prepare(RGWModifyOp, const string *write_tag);
1768 int complete(int64_t poolid, uint64_t epoch, uint64_t size,
1769 uint64_t accounted_size, ceph::real_time& ut,
1770 const string& etag, const string& content_type,
11fdf7f2 1771 const string& storage_class,
7c673cae 1772 bufferlist *acl_bl, RGWObjCategory category,
11fdf7f2 1773 list<rgw_obj_index_key> *remove_objs, const string *user_data = nullptr, bool appendable = false);
7c673cae
FG
1774 int complete_del(int64_t poolid, uint64_t epoch,
1775 ceph::real_time& removed_mtime, /* mtime of removed object */
1776 list<rgw_obj_index_key> *remove_objs);
1777 int cancel();
1778
1779 const string *get_optag() { return &optag; }
1780
1781 bool is_prepared() { return prepared; }
1adf2230
AA
1782 }; // class UpdateIndex
1783
1784 class List {
1785 protected:
eafe8130
TL
1786 // absolute maximum number of objects that
1787 // list_objects_(un)ordered can return
1788 static constexpr int64_t bucket_list_objects_absolute_max = 25000;
7c673cae 1789
7c673cae
FG
1790 RGWRados::Bucket *target;
1791 rgw_obj_key next_marker;
1792
1adf2230
AA
1793 int list_objects_ordered(int64_t max,
1794 vector<rgw_bucket_dir_entry> *result,
1795 map<string, bool> *common_prefixes,
1796 bool *is_truncated);
1797 int list_objects_unordered(int64_t max,
1798 vector<rgw_bucket_dir_entry> *result,
1799 map<string, bool> *common_prefixes,
1800 bool *is_truncated);
1801
1802 public:
1803
7c673cae
FG
1804 struct Params {
1805 string prefix;
1806 string delim;
1807 rgw_obj_key marker;
1808 rgw_obj_key end_marker;
1809 string ns;
1810 bool enforce_ns;
1811 RGWAccessListFilter *filter;
1812 bool list_versions;
1adf2230
AA
1813 bool allow_unordered;
1814
1815 Params() :
1816 enforce_ns(true),
1817 filter(NULL),
1818 list_versions(false),
1819 allow_unordered(false)
1820 {}
7c673cae
FG
1821 } params;
1822
7c673cae
FG
1823 explicit List(RGWRados::Bucket *_target) : target(_target) {}
1824
1adf2230
AA
1825 int list_objects(int64_t max,
1826 vector<rgw_bucket_dir_entry> *result,
1827 map<string, bool> *common_prefixes,
1828 bool *is_truncated) {
1829 if (params.allow_unordered) {
1830 return list_objects_unordered(max, result, common_prefixes,
1831 is_truncated);
1832 } else {
1833 return list_objects_ordered(max, result, common_prefixes,
1834 is_truncated);
1835 }
1836 }
7c673cae
FG
1837 rgw_obj_key& get_next_marker() {
1838 return next_marker;
1839 }
1adf2230
AA
1840 }; // class List
1841 }; // class Bucket
7c673cae 1842
7c673cae
FG
1843 int on_last_entry_in_listing(RGWBucketInfo& bucket_info,
1844 const std::string& obj_prefix,
1845 const std::string& obj_delim,
1846 std::function<int(const rgw_bucket_dir_entry&)> handler);
1847
1848 bool swift_versioning_enabled(const RGWBucketInfo& bucket_info) const {
1849 return bucket_info.has_swift_versioning() &&
1850 bucket_info.swift_ver_location.size();
1851 }
1852
1853 int swift_versioning_copy(RGWObjectCtx& obj_ctx, /* in/out */
1854 const rgw_user& user, /* in */
1855 RGWBucketInfo& bucket_info, /* in */
1856 rgw_obj& obj); /* in */
11fdf7f2
TL
1857 int swift_versioning_restore(RGWSysObjectCtx& sysobj_ctx,
1858 RGWObjectCtx& obj_ctx, /* in/out */
7c673cae
FG
1859 const rgw_user& user, /* in */
1860 RGWBucketInfo& bucket_info, /* in */
1861 rgw_obj& obj, /* in */
1862 bool& restored); /* out */
1863 int copy_obj_to_remote_dest(RGWObjState *astate,
1864 map<string, bufferlist>& src_attrs,
1865 RGWRados::Object::Read& read_op,
1866 const rgw_user& user_id,
1867 rgw_obj& dest_obj,
1868 ceph::real_time *mtime);
1869
1870 enum AttrsMod {
1871 ATTRSMOD_NONE = 0,
1872 ATTRSMOD_REPLACE = 1,
1873 ATTRSMOD_MERGE = 2
1874 };
1875
11fdf7f2 1876 int rewrite_obj(RGWBucketInfo& dest_bucket_info, const rgw_obj& obj);
7c673cae
FG
1877
1878 int stat_remote_obj(RGWObjectCtx& obj_ctx,
1879 const rgw_user& user_id,
7c673cae
FG
1880 req_info *info,
1881 const string& source_zone,
1882 rgw_obj& src_obj,
1883 RGWBucketInfo& src_bucket_info,
1884 real_time *src_mtime,
1885 uint64_t *psize,
1886 const real_time *mod_ptr,
1887 const real_time *unmod_ptr,
1888 bool high_precision_time,
1889 const char *if_match,
1890 const char *if_nomatch,
1891 map<string, bufferlist> *pattrs,
11fdf7f2 1892 map<string, string> *pheaders,
7c673cae
FG
1893 string *version_id,
1894 string *ptag,
1895 string *petag);
1896
1897 int fetch_remote_obj(RGWObjectCtx& obj_ctx,
1898 const rgw_user& user_id,
7c673cae
FG
1899 req_info *info,
1900 const string& source_zone,
11fdf7f2
TL
1901 const rgw_obj& dest_obj,
1902 const rgw_obj& src_obj,
7c673cae
FG
1903 RGWBucketInfo& dest_bucket_info,
1904 RGWBucketInfo& src_bucket_info,
11fdf7f2 1905 std::optional<rgw_placement_rule> dest_placement,
7c673cae
FG
1906 ceph::real_time *src_mtime,
1907 ceph::real_time *mtime,
1908 const ceph::real_time *mod_ptr,
1909 const ceph::real_time *unmod_ptr,
1910 bool high_precision_time,
1911 const char *if_match,
1912 const char *if_nomatch,
1913 AttrsMod attrs_mod,
1914 bool copy_if_newer,
1915 map<string, bufferlist>& attrs,
1916 RGWObjCategory category,
11fdf7f2 1917 std::optional<uint64_t> olh_epoch,
7c673cae 1918 ceph::real_time delete_at,
7c673cae 1919 string *ptag,
11fdf7f2 1920 string *petag,
7c673cae 1921 void (*progress_cb)(off_t, void *),
31f18b77 1922 void *progress_data,
81eedcae
TL
1923 rgw_zone_set *zones_trace= nullptr,
1924 std::optional<uint64_t>* bytes_transferred = 0);
7c673cae
FG
1925 /**
1926 * Copy an object.
1927 * dest_obj: the object to copy into
1928 * src_obj: the object to copy from
1929 * attrs: usage depends on attrs_mod parameter
1930 * attrs_mod: the modification mode of the attrs, may have the following values:
1931 * ATTRSMOD_NONE - the attributes of the source object will be
1932 * copied without modifications, attrs parameter is ignored;
1933 * ATTRSMOD_REPLACE - new object will have the attributes provided by attrs
1934 * parameter, source object attributes are not copied;
1935 * ATTRSMOD_MERGE - any conflicting meta keys on the source object's attributes
1936 * are overwritten by values contained in attrs parameter.
7c673cae
FG
1937 * Returns: 0 on success, -ERR# otherwise.
1938 */
1939 int copy_obj(RGWObjectCtx& obj_ctx,
1940 const rgw_user& user_id,
7c673cae
FG
1941 req_info *info,
1942 const string& source_zone,
1943 rgw_obj& dest_obj,
1944 rgw_obj& src_obj,
1945 RGWBucketInfo& dest_bucket_info,
1946 RGWBucketInfo& src_bucket_info,
11fdf7f2 1947 const rgw_placement_rule& dest_placement,
7c673cae
FG
1948 ceph::real_time *src_mtime,
1949 ceph::real_time *mtime,
1950 const ceph::real_time *mod_ptr,
1951 const ceph::real_time *unmod_ptr,
1952 bool high_precision_time,
1953 const char *if_match,
1954 const char *if_nomatch,
1955 AttrsMod attrs_mod,
1956 bool copy_if_newer,
1957 map<std::string, bufferlist>& attrs,
1958 RGWObjCategory category,
1959 uint64_t olh_epoch,
1960 ceph::real_time delete_at,
1961 string *version_id,
1962 string *ptag,
11fdf7f2 1963 string *petag,
7c673cae
FG
1964 void (*progress_cb)(off_t, void *),
1965 void *progress_data);
1966
1967 int copy_obj_data(RGWObjectCtx& obj_ctx,
1968 RGWBucketInfo& dest_bucket_info,
11fdf7f2 1969 const rgw_placement_rule& dest_placement,
7c673cae 1970 RGWRados::Object::Read& read_op, off_t end,
11fdf7f2 1971 const rgw_obj& dest_obj,
7c673cae
FG
1972 ceph::real_time *mtime,
1973 ceph::real_time set_mtime,
1974 map<string, bufferlist>& attrs,
7c673cae
FG
1975 uint64_t olh_epoch,
1976 ceph::real_time delete_at,
11fdf7f2 1977 string *petag);
7c673cae 1978
11fdf7f2
TL
1979 int transition_obj(RGWObjectCtx& obj_ctx,
1980 RGWBucketInfo& bucket_info,
1981 rgw_obj& obj,
1982 const rgw_placement_rule& placement_rule,
1983 const real_time& mtime,
1984 uint64_t olh_epoch);
1985
7c673cae
FG
1986 int check_bucket_empty(RGWBucketInfo& bucket_info);
1987
1988 /**
1989 * Delete a bucket.
1990 * bucket: the name of the bucket to delete
1991 * Returns 0 on success, -ERR# otherwise.
1992 */
1993 int delete_bucket(RGWBucketInfo& bucket_info, RGWObjVersionTracker& objv_tracker, bool check_empty = true);
1994
7c673cae
FG
1995 void wakeup_meta_sync_shards(set<int>& shard_ids);
1996 void wakeup_data_sync_shards(const string& source_zone, map<int, set<string> >& shard_ids);
1997
1998 RGWMetaSyncStatusManager* get_meta_sync_manager();
1999 RGWDataSyncStatusManager* get_data_sync_manager(const std::string& source_zone);
2000
2001 int set_bucket_owner(rgw_bucket& bucket, ACLOwner& owner);
2002 int set_buckets_enabled(std::vector<rgw_bucket>& buckets, bool enabled);
2003 int bucket_suspended(rgw_bucket& bucket, bool *suspended);
2004
2005 /** Delete an object.*/
2006 int delete_obj(RGWObjectCtx& obj_ctx,
2007 const RGWBucketInfo& bucket_owner,
2008 const rgw_obj& src_obj,
2009 int versioning_status,
2010 uint16_t bilog_flags = 0,
31f18b77
FG
2011 const ceph::real_time& expiration_time = ceph::real_time(),
2012 rgw_zone_set *zones_trace = nullptr);
7c673cae 2013
7c673cae
FG
2014 int delete_raw_obj(const rgw_raw_obj& obj);
2015
7c673cae 2016 /** Remove an object from the bucket index */
494da23a 2017 int delete_obj_index(const rgw_obj& obj, ceph::real_time mtime);
7c673cae 2018
7c673cae
FG
2019 /**
2020 * Set an attr on an object.
2021 * bucket: name of the bucket holding the object
2022 * obj: name of the object to set the attr on
2023 * name: the attr to set
2024 * bl: the contents of the attr
2025 * Returns: 0 on success, -ERR# otherwise.
2026 */
2027 int set_attr(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj, const char *name, bufferlist& bl);
2028
2029 int set_attrs(void *ctx, const RGWBucketInfo& bucket_info, rgw_obj& obj,
2030 map<string, bufferlist>& attrs,
2031 map<string, bufferlist>* rmattrs);
2032
7c673cae
FG
2033 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state,
2034 bool follow_olh, bool assume_noent = false);
2035 int get_obj_state(RGWObjectCtx *rctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWObjState **state) {
2036 return get_obj_state(rctx, bucket_info, obj, state, true);
2037 }
2038
11fdf7f2
TL
2039 using iterate_obj_cb = int (*)(const rgw_raw_obj&, off_t, off_t,
2040 off_t, bool, RGWObjState*, void*);
2041
2042 int iterate_obj(RGWObjectCtx& ctx, const RGWBucketInfo& bucket_info,
2043 const rgw_obj& obj, off_t ofs, off_t end,
2044 uint64_t max_chunk_size, iterate_obj_cb cb, void *arg);
7c673cae
FG
2045
2046 int flush_read_list(struct get_obj_data *d);
2047
11fdf7f2
TL
2048 int get_obj_iterate_cb(const rgw_raw_obj& read_obj, off_t obj_ofs,
2049 off_t read_ofs, off_t len, bool is_head_obj,
2050 RGWObjState *astate, void *arg);
7c673cae
FG
2051
2052 void get_obj_aio_completion_cb(librados::completion_t cb, void *arg);
2053
2054 /**
2055 * a simple object read without keeping state
2056 */
2057
11fdf7f2
TL
2058 int raw_obj_stat(rgw_raw_obj& obj, uint64_t *psize, ceph::real_time *pmtime, uint64_t *epoch,
2059 map<string, bufferlist> *attrs, bufferlist *first_chunk,
2060 RGWObjVersionTracker *objv_tracker);
7c673cae
FG
2061
2062 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectWriteOperation *op);
2063 int obj_operate(const RGWBucketInfo& bucket_info, const rgw_obj& obj, librados::ObjectReadOperation *op);
2064
f64942e4
AA
2065 int guard_reshard(BucketShard *bs,
2066 const rgw_obj& obj_instance,
2067 const RGWBucketInfo& bucket_info,
2068 std::function<int(BucketShard *)> call);
2069 int block_while_resharding(RGWRados::BucketShard *bs,
2070 string *new_bucket_id,
11fdf7f2
TL
2071 const RGWBucketInfo& bucket_info,
2072 optional_yield y);
31f18b77 2073
7c673cae
FG
2074 void bucket_index_guard_olh_op(RGWObjState& olh_state, librados::ObjectOperation& op);
2075 int olh_init_modification(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
2076 int olh_init_modification_impl(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, string *op_tag);
2077 int bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjState& olh_state,
2078 const rgw_obj& obj_instance, bool delete_marker,
2079 const string& op_tag, struct rgw_bucket_dir_entry_meta *meta,
2080 uint64_t olh_epoch,
91327a77
AA
2081 ceph::real_time unmod_since, bool high_precision_time,
2082 rgw_zone_set *zones_trace = nullptr,
2083 bool log_data_change = false);
31f18b77 2084 int bucket_index_unlink_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj_instance, const string& op_tag, const string& olh_tag, uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
2085 int bucket_index_read_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance, uint64_t ver_marker,
2086 map<uint64_t, vector<rgw_bucket_olh_log_entry> > *log, bool *is_truncated);
2087 int bucket_index_trim_olh_log(const RGWBucketInfo& bucket_info, RGWObjState& obj_state, const rgw_obj& obj_instance, uint64_t ver);
2088 int bucket_index_clear_olh(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& obj_instance);
2089 int apply_olh_log(RGWObjectCtx& ctx, RGWObjState& obj_state, const RGWBucketInfo& bucket_info, const rgw_obj& obj,
2090 bufferlist& obj_tag, map<uint64_t, vector<rgw_bucket_olh_log_entry> >& log,
31f18b77
FG
2091 uint64_t *plast_ver, rgw_zone_set *zones_trace = nullptr);
2092 int update_olh(RGWObjectCtx& obj_ctx, RGWObjState *state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_zone_set *zones_trace = nullptr);
7c673cae 2093 int set_olh(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta,
91327a77
AA
2094 uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time,
2095 rgw_zone_set *zones_trace = nullptr, bool log_data_change = false);
a8e16298
TL
2096 int repair_olh(RGWObjState* state, const RGWBucketInfo& bucket_info,
2097 const rgw_obj& obj);
7c673cae 2098 int unlink_obj_instance(RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj,
31f18b77 2099 uint64_t olh_epoch, rgw_zone_set *zones_trace = nullptr);
7c673cae
FG
2100
2101 void check_pending_olh_entries(map<string, bufferlist>& pending_entries, map<string, bufferlist> *rm_pending_entries);
2102 int remove_olh_pending_entries(const RGWBucketInfo& bucket_info, RGWObjState& state, const rgw_obj& olh_obj, map<string, bufferlist>& pending_attrs);
2103 int follow_olh(const RGWBucketInfo& bucket_info, RGWObjectCtx& ctx, RGWObjState *state, const rgw_obj& olh_obj, rgw_obj *target);
2104 int get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, RGWOLHInfo *olh);
2105
11fdf7f2 2106 void gen_rand_obj_instance_name(rgw_obj_key *target_key);
7c673cae
FG
2107 void gen_rand_obj_instance_name(rgw_obj *target);
2108
7c673cae
FG
2109 int update_containers_stats(map<string, RGWBucketEnt>& m);
2110 int append_async(rgw_raw_obj& obj, size_t size, bufferlist& bl);
2111
11fdf7f2 2112public:
7c673cae
FG
2113 void set_atomic(void *ctx, rgw_obj& obj) {
2114 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
11fdf7f2 2115 rctx->set_atomic(obj);
7c673cae 2116 }
11fdf7f2 2117 void set_prefetch_data(void *ctx, const rgw_obj& obj) {
7c673cae 2118 RGWObjectCtx *rctx = static_cast<RGWObjectCtx *>(ctx);
11fdf7f2 2119 rctx->set_prefetch_data(obj);
7c673cae 2120 }
7c673cae
FG
2121 int decode_policy(bufferlist& bl, ACLOwner *owner);
2122 int get_bucket_stats(RGWBucketInfo& bucket_info, int shard_id, string *bucket_ver, string *master_ver,
c07f9fc5 2123 map<RGWObjCategory, RGWStorageStats>& stats, string *max_marker, bool* syncstopped = NULL);
7c673cae
FG
2124 int get_bucket_stats_async(RGWBucketInfo& bucket_info, int shard_id, RGWGetBucketStats_CB *cb);
2125 int get_user_stats(const rgw_user& user, RGWStorageStats& stats);
2126 int get_user_stats_async(const rgw_user& user, RGWGetUserStats_CB *cb);
2127 void get_bucket_instance_obj(const rgw_bucket& bucket, rgw_raw_obj& obj);
2128 void get_bucket_meta_oid(const rgw_bucket& bucket, string& oid);
2129
2130 int put_bucket_entrypoint_info(const string& tenant_name, const string& bucket_name, RGWBucketEntryPoint& entry_point,
2131 bool exclusive, RGWObjVersionTracker& objv_tracker, ceph::real_time mtime,
2132 map<string, bufferlist> *pattrs);
2133 int put_bucket_instance_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, map<string, bufferlist> *pattrs);
11fdf7f2 2134 int get_bucket_entrypoint_info(RGWSysObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name,
7c673cae 2135 RGWBucketEntryPoint& entry_point, RGWObjVersionTracker *objv_tracker,
b32b8144
FG
2136 ceph::real_time *pmtime, map<string, bufferlist> *pattrs, rgw_cache_entry_info *cache_info = NULL,
2137 boost::optional<obj_version> refresh_version = boost::none);
11fdf7f2
TL
2138 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const string& meta_key, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
2139 int get_bucket_instance_info(RGWSysObjectCtx& obj_ctx, const rgw_bucket& bucket, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs);
2140 int get_bucket_instance_from_oid(RGWSysObjectCtx& obj_ctx, const string& oid, RGWBucketInfo& info, ceph::real_time *pmtime, map<string, bufferlist> *pattrs,
b32b8144
FG
2141 rgw_cache_entry_info *cache_info = NULL,
2142 boost::optional<obj_version> refresh_version = boost::none);
7c673cae 2143
11fdf7f2 2144 int convert_old_bucket_info(RGWSysObjectCtx& obj_ctx, const string& tenant_name, const string& bucket_name);
7c673cae 2145 static void make_bucket_entry_name(const string& tenant_name, const string& bucket_name, string& bucket_entry);
b32b8144
FG
2146
2147
2148private:
11fdf7f2 2149 int _get_bucket_info(RGWSysObjectCtx& obj_ctx, const string& tenant,
b32b8144
FG
2150 const string& bucket_name, RGWBucketInfo& info,
2151 real_time *pmtime,
2152 map<string, bufferlist> *pattrs,
2153 boost::optional<obj_version> refresh_version);
2154public:
2155
11fdf7f2
TL
2156 bool call(std::string_view command, const cmdmap_t& cmdmap,
2157 std::string_view format,
3a9019d9
FG
2158 bufferlist& out) override final;
2159
3a9019d9 2160protected:
3a9019d9
FG
2161 // `call_list` must iterate over all cache entries and call
2162 // `cache_list_dump_helper` with the supplied Formatter on any that
2163 // include `filter` as a substring.
2164 //
11fdf7f2 2165 void call_list(const std::optional<std::string>& filter,
3a9019d9
FG
2166 Formatter* format);
2167 // `call_inspect` must look up the requested target and, if found,
2168 // dump it to the supplied Formatter and return true. If not found,
2169 // it must return false.
2170 //
11fdf7f2 2171 bool call_inspect(const std::string& target, Formatter* format);
3a9019d9
FG
2172
2173 // `call_erase` must erase the requested target and return true. If
2174 // the requested target does not exist, it should return false.
11fdf7f2 2175 bool call_erase(const std::string& target);
3a9019d9
FG
2176
2177 // `call_zap` must erase the cache.
11fdf7f2 2178 void call_zap();
3a9019d9 2179public:
b32b8144 2180
11fdf7f2 2181 int get_bucket_info(RGWSysObjectCtx& obj_ctx,
b32b8144
FG
2182 const string& tenant_name, const string& bucket_name,
2183 RGWBucketInfo& info,
2184 ceph::real_time *pmtime, map<string, bufferlist> *pattrs = NULL);
2185
81eedcae
TL
2186 // Returns 0 on successful refresh. Returns error code if there was
2187 // an error or the version stored on the OSD is the same as that
b32b8144
FG
2188 // presented in the BucketInfo structure.
2189 //
2190 int try_refresh_bucket_info(RGWBucketInfo& info,
2191 ceph::real_time *pmtime,
2192 map<string, bufferlist> *pattrs = nullptr);
2193
7c673cae 2194 int put_linked_bucket_info(RGWBucketInfo& info, bool exclusive, ceph::real_time mtime, obj_version *pep_objv,
b32b8144 2195 map<string, bufferlist> *pattrs, bool create_entry_point);
7c673cae 2196
31f18b77
FG
2197 int cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2198 int cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModifyOp op, string& tag, int64_t pool, uint64_t epoch,
2199 rgw_bucket_dir_entry& ent, RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2200 int cls_obj_complete_add(BucketShard& bs, const rgw_obj& obj, string& tag, int64_t pool, uint64_t epoch, rgw_bucket_dir_entry& ent,
2201 RGWObjCategory category, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
7c673cae 2202 int cls_obj_complete_del(BucketShard& bs, string& tag, int64_t pool, uint64_t epoch, rgw_obj& obj,
31f18b77
FG
2203 ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
2204 int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
7c673cae 2205 int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
1adf2230 2206 int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, int shard_id,
11fdf7f2
TL
2207 const rgw_obj_index_key& start,
2208 const string& prefix,
1adf2230
AA
2209 uint32_t num_entries, bool list_versions,
2210 map<string, rgw_bucket_dir_entry>& m,
2211 bool *is_truncated,
2212 rgw_obj_index_key *last_entry,
2213 bool (*force_check_filter)(const string& name) = nullptr);
2214 int cls_bucket_list_unordered(RGWBucketInfo& bucket_info, int shard_id,
11fdf7f2
TL
2215 const rgw_obj_index_key& start,
2216 const string& prefix,
1adf2230
AA
2217 uint32_t num_entries, bool list_versions,
2218 vector<rgw_bucket_dir_entry>& ent_list,
2219 bool *is_truncated, rgw_obj_index_key *last_entry,
2220 bool (*force_check_filter)(const string& name) = nullptr);
a8e16298 2221 int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, vector<rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
7c673cae
FG
2222 int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
2223 int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
2224 int trim_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, string& end_marker);
c07f9fc5
FG
2225 int resync_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
2226 int stop_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id);
7c673cae
FG
2227 int get_bi_log_status(RGWBucketInfo& bucket_info, int shard_id, map<int, string>& max_marker);
2228
a8e16298
TL
2229 int bi_get_instance(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_dir_entry *dirent);
2230 int bi_get_olh(const RGWBucketInfo& bucket_info, const rgw_obj& obj, rgw_bucket_olh_entry *olh);
2231 int bi_get(const RGWBucketInfo& bucket_info, const rgw_obj& obj, BIIndexType index_type, rgw_cls_bi_entry *entry);
7c673cae
FG
2232 void bi_put(librados::ObjectWriteOperation& op, BucketShard& bs, rgw_cls_bi_entry& entry);
2233 int bi_put(BucketShard& bs, rgw_cls_bi_entry& entry);
2234 int bi_put(rgw_bucket& bucket, rgw_obj& obj, rgw_cls_bi_entry& entry);
2235 int bi_list(rgw_bucket& bucket, int shard_id, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2236 int bi_list(BucketShard& bs, const string& filter_obj, const string& marker, uint32_t max, list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2237 int bi_list(rgw_bucket& bucket, const string& obj_name, const string& marker, uint32_t max,
2238 list<rgw_cls_bi_entry> *entries, bool *is_truncated);
2239 int bi_remove(BucketShard& bs);
2240
2241 int cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info);
11fdf7f2
TL
2242 int cls_obj_usage_log_read(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
2243 uint64_t end_epoch, uint32_t max_entries, string& read_iter, map<rgw_user_bucket,
2244 rgw_usage_log_entry>& usage, bool *is_truncated);
2245 int cls_obj_usage_log_trim(const string& oid, const string& user, const string& bucket, uint64_t start_epoch,
2246 uint64_t end_epoch);
2247 int cls_obj_usage_log_clear(string& oid);
7c673cae
FG
2248
2249 int key_to_shard_id(const string& key, int max_shards);
2250 void shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id);
2251 void shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name);
2252 void shard_name(const string& prefix, unsigned shard_id, string& name);
2253 int get_target_shard_id(const RGWBucketInfo& bucket_info, const string& obj_key, int *shard_id);
2254 void time_log_prepare_entry(cls_log_entry& entry, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
2255 int time_log_add_init(librados::IoCtx& io_ctx);
2256 int time_log_add(const string& oid, list<cls_log_entry>& entries,
2257 librados::AioCompletion *completion, bool monotonic_inc = true);
2258 int time_log_add(const string& oid, const ceph::real_time& ut, const string& section, const string& key, bufferlist& bl);
2259 int time_log_list(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
2260 int max_entries, list<cls_log_entry>& entries,
2261 const string& marker, string *out_marker, bool *truncated);
2262 int time_log_info(const string& oid, cls_log_header *header);
2263 int time_log_info_async(librados::IoCtx& io_ctx, const string& oid, cls_log_header *header, librados::AioCompletion *completion);
2264 int time_log_trim(const string& oid, const ceph::real_time& start_time, const ceph::real_time& end_time,
2265 const string& from_marker, const string& to_marker,
2266 librados::AioCompletion *completion = nullptr);
2267
2268 string objexp_hint_get_shardname(int shard_num);
2269 int objexp_key_shard(const rgw_obj_index_key& key);
2270 void objexp_get_shard(int shard_num,
2271 string& shard); /* out */
2272 int objexp_hint_add(const ceph::real_time& delete_at,
2273 const string& tenant_name,
2274 const string& bucket_name,
2275 const string& bucket_id,
2276 const rgw_obj_index_key& obj_key);
2277 int objexp_hint_list(const string& oid,
2278 const ceph::real_time& start_time,
2279 const ceph::real_time& end_time,
2280 const int max_entries,
2281 const string& marker,
2282 list<cls_timeindex_entry>& entries, /* out */
2283 string *out_marker, /* out */
2284 bool *truncated); /* out */
2285 int objexp_hint_parse(cls_timeindex_entry &ti_entry,
2286 objexp_hint_entry& hint_entry); /* out */
2287 int objexp_hint_trim(const string& oid,
2288 const ceph::real_time& start_time,
2289 const ceph::real_time& end_time,
2290 const string& from_marker = std::string(),
2291 const string& to_marker = std::string());
2292
11fdf7f2
TL
2293 int lock_exclusive(const rgw_pool& pool, const string& oid, ceph::timespan& duration, string& zone_id, string& owner_id);
2294 int unlock(const rgw_pool& pool, const string& oid, string& zone_id, string& owner_id);
7c673cae
FG
2295
2296 void update_gc_chain(rgw_obj& head_obj, RGWObjManifest& manifest, cls_rgw_obj_chain *chain);
2297 int send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool sync);
2298 int gc_operate(string& oid, librados::ObjectWriteOperation *op);
11fdf7f2 2299 int gc_aio_operate(string& oid, librados::ObjectWriteOperation *op, librados::AioCompletion **pc = nullptr);
7c673cae
FG
2300 int gc_operate(string& oid, librados::ObjectReadOperation *op, bufferlist *pbl);
2301
2302 int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
11fdf7f2 2303 int process_gc(bool expired_only);
1adf2230 2304 bool process_expire_objects();
7c673cae
FG
2305 int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
2306
2307 int process_lc();
2308 int list_lc_progress(const string& marker, uint32_t max_entries, map<string, int> *progress_map);
2309
2310 int bucket_check_index(RGWBucketInfo& bucket_info,
2311 map<RGWObjCategory, RGWStorageStats> *existing_stats,
2312 map<RGWObjCategory, RGWStorageStats> *calculated_stats);
2313 int bucket_rebuild_index(RGWBucketInfo& bucket_info);
f64942e4 2314 int bucket_set_reshard(const RGWBucketInfo& bucket_info, const cls_rgw_bucket_instance_entry& entry);
7c673cae
FG
2315 int remove_objs_from_index(RGWBucketInfo& bucket_info, list<rgw_obj_index_key>& oid_list);
2316 int move_rados_obj(librados::IoCtx& src_ioctx,
2317 const string& src_oid, const string& src_locator,
2318 librados::IoCtx& dst_ioctx,
2319 const string& dst_oid, const string& dst_locator);
2320 int fix_head_obj_locator(const RGWBucketInfo& bucket_info, bool copy_obj, bool remove_bad, rgw_obj_key& key);
2321 int fix_tail_obj_locator(const RGWBucketInfo& bucket_info, rgw_obj_key& key, bool fix, bool *need_fix);
2322
2323 int cls_user_get_header(const string& user_id, cls_user_header *header);
94b18763 2324 int cls_user_reset_stats(const string& user_id);
7c673cae
FG
2325 int cls_user_get_header_async(const string& user_id, RGWGetUserHeader_CB *ctx);
2326 int cls_user_sync_bucket_stats(rgw_raw_obj& user_obj, const RGWBucketInfo& bucket_info);
2327 int cls_user_list_buckets(rgw_raw_obj& obj,
2328 const string& in_marker,
2329 const string& end_marker,
2330 int max_entries,
2331 list<cls_user_bucket_entry>& entries,
2332 string *out_marker,
2333 bool *truncated);
2334 int cls_user_add_bucket(rgw_raw_obj& obj, const cls_user_bucket_entry& entry);
2335 int cls_user_update_buckets(rgw_raw_obj& obj, list<cls_user_bucket_entry>& entries, bool add);
2336 int cls_user_complete_stats_sync(rgw_raw_obj& obj);
2337 int complete_sync_user_stats(const rgw_user& user_id);
7c673cae 2338 int cls_user_remove_bucket(rgw_raw_obj& obj, const cls_user_bucket& bucket);
c07f9fc5 2339 int cls_user_get_bucket_stats(const rgw_bucket& bucket, cls_user_bucket_entry& entry);
7c673cae
FG
2340
2341 int check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
11fdf7f2 2342 RGWQuotaInfo& user_quota, RGWQuotaInfo& bucket_quota, uint64_t obj_size, bool check_size_only = false);
7c673cae 2343
224ce89b 2344 int check_bucket_shards(const RGWBucketInfo& bucket_info, const rgw_bucket& bucket,
31f18b77
FG
2345 RGWQuotaInfo& bucket_quota);
2346
2347 int add_bucket_to_reshard(const RGWBucketInfo& bucket_info, uint32_t new_num_shards);
2348
7c673cae 2349 uint64_t instance_id();
3efd9988 2350
7c673cae
FG
2351 librados::Rados* get_rados_handle();
2352
2353 int delete_raw_obj_aio(const rgw_raw_obj& obj, list<librados::AioCompletion *>& handles);
2354 int delete_obj_aio(const rgw_obj& obj, RGWBucketInfo& info, RGWObjState *astate,
2355 list<librados::AioCompletion *>& handles, bool keep_index_consistent);
11fdf7f2
TL
2356
2357 /* mfa/totp stuff */
2358 private:
2359 void prepare_mfa_write(librados::ObjectWriteOperation *op,
2360 RGWObjVersionTracker *objv_tracker,
2361 const ceph::real_time& mtime);
2362 public:
2363 string get_mfa_oid(const rgw_user& user);
2364 int get_mfa_ref(const rgw_user& user, rgw_rados_ref *ref);
2365 int check_mfa(const rgw_user& user, const string& otp_id, const string& pin);
2366 int create_mfa(const rgw_user& user, const rados::cls::otp::otp_info_t& config,
2367 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2368 int remove_mfa(const rgw_user& user, const string& id,
2369 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2370 int get_mfa(const rgw_user& user, const string& id, rados::cls::otp::otp_info_t *result);
2371 int list_mfa(const rgw_user& user, list<rados::cls::otp::otp_info_t> *result);
2372 int otp_get_current_time(const rgw_user& user, ceph::real_time *result);
2373
2374 /* mfa interfaces used by metadata engine */
2375 int set_mfa(const string& oid, const list<rados::cls::otp::otp_info_t>& entries, bool reset_obj,
2376 RGWObjVersionTracker *objv_tracker, const ceph::real_time& mtime);
2377 int list_mfa(const string& oid, list<rados::cls::otp::otp_info_t> *result,
2378 RGWObjVersionTracker *objv_tracker, ceph::real_time *pmtime);
7c673cae
FG
2379 private:
2380 /**
2381 * This is a helper method, it generates a list of bucket index objects with the given
2382 * bucket base oid and number of shards.
2383 *
2384 * bucket_oid_base [in] - base name of the bucket index object;
2385 * num_shards [in] - number of bucket index object shards.
2386 * bucket_objs [out] - filled by this method, a list of bucket index objects.
2387 */
2388 void get_bucket_index_objects(const string& bucket_oid_base, uint32_t num_shards,
2389 map<int, string>& bucket_objs, int shard_id = -1);
2390
2391 /**
2392 * Get the bucket index object with the given base bucket index object and object key,
2393 * and the number of bucket index shards.
2394 *
2395 * bucket_oid_base [in] - bucket object base name.
2396 * obj_key [in] - object key.
2397 * num_shards [in] - number of bucket index shards.
2398 * hash_type [in] - type of hash to find the shard ID.
2399 * bucket_obj [out] - the bucket index object for the given object.
2400 *
2401 * Return 0 on success, a failure code otherwise.
2402 */
2403 int get_bucket_index_object(const string& bucket_oid_base, const string& obj_key,
2404 uint32_t num_shards, RGWBucketInfo::BIShardsHashType hash_type, string *bucket_obj, int *shard);
2405
2406 void get_bucket_index_object(const string& bucket_oid_base, uint32_t num_shards,
2407 int shard_id, string *bucket_obj);
2408
2409 /**
2410 * Check the actual on-disk state of the object specified
2411 * by list_state, and fill in the time and size of object.
2412 * Then append any changes to suggested_updates for
2413 * the rgw class' dir_suggest_changes function.
2414 *
2415 * Note that this can maul list_state; don't use it afterwards. Also
2416 * it expects object to already be filled in from list_state; it only
2417 * sets the size and mtime.
2418 *
2419 * Returns 0 on success, -ENOENT if the object doesn't exist on disk,
2420 * and -errno on other failures. (-ENOENT is not a failure, and it
2421 * will encode that info as a suggested update.)
2422 */
2423 int check_disk_state(librados::IoCtx io_ctx,
2424 const RGWBucketInfo& bucket_info,
2425 rgw_bucket_dir_entry& list_state,
2426 rgw_bucket_dir_entry& object,
2427 bufferlist& suggested_updates);
2428
2429 /**
2430 * Init pool iteration
31f18b77 2431 * pool: pool to use for the ctx initialization
7c673cae
FG
2432 * ctx: context object to use for the iteration
2433 * Returns: 0 on success, -ERR# otherwise.
2434 */
2435 int pool_iterate_begin(const rgw_pool& pool, RGWPoolIterCtx& ctx);
31f18b77 2436
181888fb
FG
2437 /**
2438 * Init pool iteration
2439 * pool: pool to use
2440 * cursor: position to start iteration
2441 * ctx: context object to use for the iteration
2442 * Returns: 0 on success, -ERR# otherwise.
2443 */
2444 int pool_iterate_begin(const rgw_pool& pool, const string& cursor, RGWPoolIterCtx& ctx);
2445
2446 /**
2447 * Get pool iteration position
2448 * ctx: context object to use for the iteration
2449 * Returns: string representation of position
2450 */
2451 string pool_iterate_get_cursor(RGWPoolIterCtx& ctx);
2452
7c673cae
FG
2453 /**
2454 * Iterate over pool return object names, use optional filter
2455 * ctx: iteration context, initialized with pool_iterate_begin()
2456 * num: max number of objects to return
2457 * objs: a vector that the results will append into
2458 * is_truncated: if not NULL, will hold true iff iteration is complete
2459 * filter: if not NULL, will be used to filter returned objects
2460 * Returns: 0 on success, -ERR# otherwise.
2461 */
2462 int pool_iterate(RGWPoolIterCtx& ctx, uint32_t num, vector<rgw_bucket_dir_entry>& objs,
2463 bool *is_truncated, RGWAccessListFilter *filter);
2464
2465 uint64_t next_bucket_id();
2466};
2467
2468class RGWStoreManager {
2469public:
2470 RGWStoreManager() {}
28e407b8
AA
2471 static RGWRados *get_storage(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads,
2472 bool run_sync_thread, bool run_reshard_thread, bool use_cache = true) {
31f18b77 2473 RGWRados *store = init_storage_provider(cct, use_gc_thread, use_lc_thread, quota_threads, run_sync_thread,
28e407b8 2474 run_reshard_thread, use_cache);
7c673cae
FG
2475 return store;
2476 }
2477 static RGWRados *get_raw_storage(CephContext *cct) {
2478 RGWRados *store = init_raw_storage_provider(cct);
2479 return store;
2480 }
28e407b8 2481 static RGWRados *init_storage_provider(CephContext *cct, bool use_gc_thread, bool use_lc_thread, bool quota_threads, bool run_sync_thread, bool run_reshard_thread, bool use_metadata_cache);
7c673cae
FG
2482 static RGWRados *init_raw_storage_provider(CephContext *cct);
2483 static void close_storage(RGWRados *store);
2484
2485};
2486
7c673cae
FG
2487class RGWMPObj {
2488 string oid;
2489 string prefix;
2490 string meta;
2491 string upload_id;
2492public:
2493 RGWMPObj() {}
2494 RGWMPObj(const string& _oid, const string& _upload_id) {
2495 init(_oid, _upload_id, _upload_id);
2496 }
2497 void init(const string& _oid, const string& _upload_id) {
2498 init(_oid, _upload_id, _upload_id);
2499 }
2500 void init(const string& _oid, const string& _upload_id, const string& part_unique_str) {
2501 if (_oid.empty()) {
2502 clear();
2503 return;
2504 }
2505 oid = _oid;
2506 upload_id = _upload_id;
2507 prefix = oid + ".";
2508 meta = prefix + upload_id + MP_META_SUFFIX;
2509 prefix.append(part_unique_str);
2510 }
11fdf7f2
TL
2511 const string& get_meta() const { return meta; }
2512 string get_part(int num) const {
7c673cae
FG
2513 char buf[16];
2514 snprintf(buf, 16, ".%d", num);
2515 string s = prefix;
2516 s.append(buf);
2517 return s;
2518 }
11fdf7f2 2519 string get_part(const string& part) const {
7c673cae
FG
2520 string s = prefix;
2521 s.append(".");
2522 s.append(part);
2523 return s;
2524 }
11fdf7f2 2525 const string& get_upload_id() const {
7c673cae
FG
2526 return upload_id;
2527 }
11fdf7f2 2528 const string& get_key() const {
7c673cae
FG
2529 return oid;
2530 }
2531 bool from_meta(string& meta) {
2532 int end_pos = meta.rfind('.'); // search for ".meta"
2533 if (end_pos < 0)
2534 return false;
2535 int mid_pos = meta.rfind('.', end_pos - 1); // <key>.<upload_id>
2536 if (mid_pos < 0)
2537 return false;
2538 oid = meta.substr(0, mid_pos);
2539 upload_id = meta.substr(mid_pos + 1, end_pos - mid_pos - 1);
2540 init(oid, upload_id, upload_id);
2541 return true;
2542 }
2543 void clear() {
2544 oid = "";
2545 prefix = "";
2546 meta = "";
2547 upload_id = "";
2548 }
11fdf7f2 2549}; // class RGWMPObj
7c673cae 2550
11fdf7f2
TL
2551
2552class RGWRadosThread {
2553 class Worker : public Thread {
2554 CephContext *cct;
2555 RGWRadosThread *processor;
2556 Mutex lock;
2557 Cond cond;
2558
2559 void wait() {
2560 Mutex::Locker l(lock);
2561 cond.Wait(lock);
2562 };
2563
2564 void wait_interval(const utime_t& wait_time) {
2565 Mutex::Locker l(lock);
2566 cond.WaitInterval(lock, wait_time);
2567 }
2568
2569 public:
2570 Worker(CephContext *_cct, RGWRadosThread *_p) : cct(_cct), processor(_p), lock("RGWRadosThread::Worker") {}
2571 void *entry() override;
2572 void signal() {
2573 Mutex::Locker l(lock);
2574 cond.Signal();
2575 }
2576 };
2577
2578 Worker *worker;
7c673cae
FG
2579
2580protected:
11fdf7f2
TL
2581 CephContext *cct;
2582 RGWRados *store;
2583
2584 std::atomic<bool> down_flag = { false };
2585
2586 string thread_name;
2587
2588 virtual uint64_t interval_msec() = 0;
2589 virtual void stop_process() {}
7c673cae 2590public:
11fdf7f2
TL
2591 RGWRadosThread(RGWRados *_store, const string& thread_name = "radosgw")
2592 : worker(NULL), cct(_store->ctx()), store(_store), thread_name(thread_name) {}
2593 virtual ~RGWRadosThread() {
2594 stop();
2595 }
2596
2597 virtual int init() { return 0; }
2598 virtual int process() = 0;
2599
2600 bool going_down() { return down_flag; }
2601
2602 void start();
2603 void stop();
2604
2605 void signal() {
2606 if (worker) {
2607 worker->signal();
2608 }
2609 }
2610};
2611
7c673cae 2612#endif